DECLARE @t TABLE ( InvID INT IDENTITY, BatchNumber INT, InvoiceNumber INT, VisitDate DATETIME, Amount MONEY ) INSERT INTO @t(BatchNumber, InvoiceNumber, VisitDate, Amount) SELECT 10000001,20001,'2009-01-01',50.00 UNION ALL SELECT 10000001,20001,'2009-01-02',50.00 UNION ALL SELECT 10000001,20001,'2009-01-03',50.00 UNION ALL SELECT 10000001,20001,'2009-01-04',50.00 UNION ALL SELECT 10000001,20002,'2009-01-01',50.00 UNION ALL SELECT 10000001,20002,'2009-01-02',50.00 UNION ALL SELECT 10000001,20002,'2009-01-03',50.00 UNION ALL SELECT 10000001,20003,'2009-01-01',50.00 UNION ALL SELECT 10000001,20003,'2009-01-02',50.00 UNION ALL SELECT 10000001,20004,'2009-01-01',50.00 UNION ALL SELECT 10000001,20004,'2009-01-02',50.00 UNION ALL SELECT 10000001,20004,'2009-01-03',50.00 UNION ALL SELECT 10000001,20004,'2009-01-04',50.00 UNION ALL SELECT 10000001,20004,'2009-01-05',50.00 UNION ALL SELECT 10000001,20004,'2009-01-06',50.00 UNION ALL SELECT 10000001,20005,'2009-01-01',50.00 UNION ALL SELECT 10000001,20005,'2009-01-02',50.00 UNION ALL SELECT 10000001,20005,'2009-01-13',50.00 UNION ALL SELECT 10000001,20005,'2009-01-14',50.00 UNION ALL SELECT 10000001,20005,'2009-01-15',50.00 UNION ALL SELECT 10000001,20005,'2009-01-06',50.00 UNION ALL SELECT 10000001,20005,'2009-01-07',50.00 UNION ALL SELECT 10000001,20005,'2009-01-08',50.00 UNION ALL SELECT 10000001,20006,'2009-01-01',50.00 UNION ALL SELECT 10000002,20007,'2009-01-01',50.00 UNION ALL SELECT 10000002,20007,'2009-01-02',50.00 UNION ALL SELECT 10000002,20007,'2009-01-03',50.00 UNION ALL SELECT 10000002,20008,'2009-01-01',50.00 UNION ALL SELECT 10000002,20008,'2009-01-02',50.00 UNION ALL SELECT 10000002,20008,'2009-01-03',50.00 -- ASSUME: that the COUNT for this query is <= 10: -- SELECT InvoiceNumber, COUNT(*) FROM @t GROUP BY InvoiceNumber -- ASSUME: that the InvoiceNumbers are sequential even as they cross BatchNumbers, -- and there are no missing InvoiceNumbers within a given sequence (e.g., if -- we have InvoiceNumbers 1 and 5 (arbitrary numbers), then it will always be the -- case that InvoiceNumbers 2, 3, and 4 exist in the base data set. If this -- assumption is violated for any reason, then we'll have to use a surrogate -- key in place of this column. -- DO NOT ASSUME that the InvoiceNumbers are unique across BatchNumbers. We CAN -- assume it based on the sample data, but the solution does not make this -- assumption, and data received in the future need not rely on this. -- instead of processing at the row level of the source data set, we're going to process -- our results at the batch/invoice grain. This will immediately address the requirement -- that invoices should not be broken across batches. ; WITH CntByInv AS ( SELECT BatchNumber, InvoiceNumber, Cnt = COUNT(*) FROM @t GROUP BY BatchNumber, InvoiceNumber ) -- this CTE is the workhorse of the whole script. it discovers every possible batch of 10 rows or -- less of sequential InvoiceNumbers (and because the InvoiceNumbers are already grouped together -- and counted in a bunch, there's no chance of returning a sequence where an invoice is broken -- across multiple batches). This CTE is a double self-join. First it picks an arbitrary -- InvoiceNumber, then finds an InvoiceNumber with the same BatchNumber that is greater than -- the first, and then it self-joins to count the number of rows between those two. If the sum -- does not exceed 10, then the sequence is returned for possible consideration later. ,AllPossibleSets AS ( SELECT BatchNumber = C1.BatchNumber, BgnInvNo = C1.InvoiceNumber, EndInvNo = C2.InvoiceNumber, Cnt = (SELECT SUM(Cnt) FROM CntByInv C3 WHERE C3.BatchNumber = C1.BatchNumber AND C3.InvoiceNumber BETWEEN C1.InvoiceNumber AND C2.InvoiceNumber) FROM CntByInv C1 JOIN CntByInv C2 ON C2.BatchNumber = C1.BatchNumber AND C2.InvoiceNumber >= C1.InvoiceNumber WHERE (SELECT SUM(Cnt) FROM CntByInv C3 WHERE C3.BatchNumber = C1.BatchNumber AND C3.InvoiceNumber BETWEEN C1.InvoiceNumber AND C2.InvoiceNumber) <= 10 ) -- this just determines the largest sequence per BatchNumber/Beginning InvoiceNumber ,BiggestSets AS ( SELECT BatchNumber, BgnInvNo, MaxCnt = MAX(Cnt) FROM AllPossibleSets GROUP BY BatchNumber, BgnInvNo ) -- for each batch, the first set must contain the min InvoiceNumber. This root InvoiceNumber -- will become the anchor of a recursive CTE later. ,BatchRoots AS ( SELECT BatchNumber, RootInvNo = MIN(InvoiceNumber) FROM @t GROUP BY BatchNumber ) -- this CTE simply pulls the ideal set of batch/invoice rows from AllPossibleSets that start with -- the root InvoiceNumber ,BatchRoots2 AS ( SELECT AllPossibleSets.BatchNumber, AllPossibleSets.BgnInvNo, AllPossibleSets.EndInvNo, AllPossibleSets.Cnt FROM BatchRoots JOIN BiggestSets ON BiggestSets.BatchNumber = BatchRoots.BatchNumber AND BiggestSets.BgnInvNo = BatchRoots.RootInvNo JOIN AllPossibleSets ON AllPossibleSets.BatchNumber = BatchRoots.BatchNumber AND AllPossibleSets.BgnInvNo = BatchRoots.RootInvNo AND AllPossibleSets.Cnt = BiggestSets.MaxCnt ) -- this recursive CTE first pulls the root InvoiceNumber identified in BatchRoots and BatchRoots2, -- then pulls the next biggest set of data by adding 1 to the invoice number (which exposes a -- critical assumption - that the InvoiceNumbers are sequential within batches. ,BestGrouping AS ( SELECT BatchNumber, BgnInvNo, EndInvNo, Cnt FROM BatchRoots2 UNION ALL SELECT AllPossibleSets.BatchNumber, AllPossibleSets.BgnInvNo, AllPossibleSets.EndInvNo, AllPossibleSets.Cnt FROM BestGrouping JOIN AllPossibleSets ON AllPossibleSets.BatchNumber = BestGrouping.BatchNumber AND AllPossibleSets.BgnInvNo = BestGrouping.EndInvNo + 1 JOIN BiggestSets ON BiggestSets.MaxCnt = AllPossibleSets.Cnt AND BiggestSets.BatchNumber = AllPossibleSets.BatchNumber AND BiggestSets.BgnInvNo = AllPossibleSets.BgnInvNo ) -- the icing on the cake - after BestGrouping pulls together the final data set, this CTE -- adds the SetNo values, which will be appended to the original data set. The SetNo values -- are just a ranking by beginning InvoiceNumber across BatchNumbers. ,BestGrouping2 AS ( SELECT BatchNumber, BgnInvNo, EndInvNo, Cnt, SetNo = RANK() OVER (PARTITION BY BatchNumber ORDER BY BgnInvNo) FROM BestGrouping ) SELECT A.InvID, A.BatchNumber, A.InvoiceNumber, A.VisitDate, A.Amount, B.SetNo FROM @t A LEFT JOIN BestGrouping2 B ON B.BatchNumber = A.BatchNumber AND A.InvoiceNumber BETWEEN B.BgnInvNo AND B.EndInvNo -- NOTE: test for success by counting the number of SetNo values that are null. There shouldn't be -- any; if there are, then check for data that violates our assumptions. If the data checks out ok, -- then there is a problem with our solution.