TSQL Challenge 13 - Solution by Steve Howard



--  Strategy:  Use 1 basic CTE to group up items by InvoiceNumber and add a 
--  unique, sequential RowNumber for each InvoiceNumber within a BatchNumber.  
--  Then use a recursive CTE to start with RowNumber 1 in each BatchNumber, 
--  using recursion to add the next RowNumber, either to the same Set (if 
--  the total count in the set is 10 or less), or otherwise to Set + 1.  

--Sample Data
DECLARE @t TABLE (
	InvID INT IDENTITY,
	BatchNumber INT,
	InvoiceNumber INT, 
	VisitDate DATETIME,
	Amount MONEY )
INSERT INTO @t(BatchNumber, InvoiceNumber, VisitDate, Amount)
SELECT 10000001,20001,'2009-01-01',50.00 UNION ALL
SELECT 10000001,20001,'2009-01-02',50.00 UNION ALL
SELECT 10000001,20001,'2009-01-03',50.00 UNION ALL
SELECT 10000001,20001,'2009-01-04',50.00 UNION ALL
SELECT 10000001,20002,'2009-01-01',50.00 UNION ALL
SELECT 10000001,20002,'2009-01-02',50.00 UNION ALL
SELECT 10000001,20002,'2009-01-03',50.00 UNION ALL
SELECT 10000001,20003,'2009-01-01',50.00 UNION ALL
SELECT 10000001,20003,'2009-01-02',50.00 UNION ALL
SELECT 10000001,20004,'2009-01-01',50.00 UNION ALL
SELECT 10000001,20004,'2009-01-02',50.00 UNION ALL
SELECT 10000001,20004,'2009-01-03',50.00 UNION ALL
SELECT 10000001,20004,'2009-01-04',50.00 UNION ALL
SELECT 10000001,20004,'2009-01-05',50.00 UNION ALL
SELECT 10000001,20004,'2009-01-06',50.00 UNION ALL
SELECT 10000001,20005,'2009-01-01',50.00 UNION ALL
SELECT 10000001,20005,'2009-01-02',50.00 UNION ALL
SELECT 10000001,20005,'2009-01-13',50.00 UNION ALL
SELECT 10000001,20005,'2009-01-14',50.00 UNION ALL
SELECT 10000001,20005,'2009-01-15',50.00 UNION ALL
SELECT 10000001,20005,'2009-01-06',50.00 UNION ALL
SELECT 10000001,20005,'2009-01-07',50.00 UNION ALL
SELECT 10000001,20005,'2009-01-08',50.00 UNION ALL
SELECT 10000001,20006,'2009-01-01',50.00 UNION ALL
SELECT 10000002,20007,'2009-01-01',50.00 UNION ALL
SELECT 10000002,20007,'2009-01-02',50.00 UNION ALL
SELECT 10000002,20007,'2009-01-03',50.00 UNION ALL
SELECT 10000002,20008,'2009-01-01',50.00 UNION ALL
SELECT 10000002,20008,'2009-01-02',50.00 UNION ALL
SELECT 10000002,20008,'2009-01-03',50.00
--End Sample Data

--Note:  This is all one query, but comments are inserted before each 
--major subsection of the query.  

--CTE#1:  Groups all items with the same InvoiceNumber together, provides a 
--count of the number of items for that InvoiceNumber, and adds a unique 
--sequential RowNum for each InvoiceNumber within a BatchNumber (RowNum 
--starts over at 1 again for each distinct BatchNumber).
;WITH InvoiceGroups (BatchNumber, InvoiceNumber, InvoiceCount, RowNum) AS 
(
		SELECT BatchNumber, InvoiceNumber, COUNT(InvoiceNumber) AS InvoiceCount, 
			ROW_NUMBER() OVER (PARTITION BY BatchNumber ORDER BY InvoiceNumber) AS RowNum
		FROM @t
		GROUP BY BatchNumber, InvoiceNumber
), 
--CTE#2:  Recursive CTE to group the InvoiceGroups into Sets of at most 10 items.  
--Begins with RowNum = 1 for all BatchNumbers from the InvoiceGroups CTE, 
--placing them in SetNum = 1 for that BatchNumber.  SetCount is used to 
--count how many items have been added to the SetNum in a BatchNumber so far.  
--Each recursion takes the next RowNum for all BatchNumbers from the 
--InvoiceGroups CTE and adds it to SetNum + 1 (if total > 10) or SetNum 
--(if total < 10), and fills SetCount accordingly.  
--The recursion will naturally end when the INNER JOIN looking for RowNum + 1
--fails to find a matching RowNum from InvoiceGroups.  
--Note that all BatchNumbers are filled simultaneously through the 
--recursion, rather than starting over for each BatchNumber.  
SetNumbers (BatchNumber, InvoiceNumber, InvoiceCount, RowNum, SetNum, SetCount) AS
(
	SELECT BatchNumber, InvoiceNumber, InvoiceCount, RowNum, 1, InvoiceCount
	FROM InvoiceGroups
	WHERE RowNum = 1
	UNION ALL
	SELECT I.BatchNumber, I.InvoiceNumber, I.InvoiceCount, I.RowNum, 
		CASE WHEN S.SetCount + I.InvoiceCount > 10 THEN S.SetNum + 1 
			 ELSE S.SetNum END AS SetNum, 
		CASE WHEN S.SetCount + I.InvoiceCount > 10 THEN I.InvoiceCount 
			 ELSE S.SetCount + I.InvoiceCount END AS SetCount
	FROM SetNumbers S
	INNER JOIN InvoiceGroups I 
		ON I.BatchNumber = S.BatchNumber AND I.RowNum = S.RowNum + 1
)
--Query:  At this point we have the SetNumbers Recursive CTE which lists the 
--SetNum for each BatchNumber and InvoiceNumber, so just a simple INNER JOIN 
--on BatchNumber and InvoiceNumber to match the SetNum to each item in the 
--original data set
SELECT T.InvID, T.BatchNumber, T.InvoiceNumber, T.VisitDate, T.Amount, S.SetNum AS [Set]
FROM @t T
INNER JOIN SetNumbers S
	ON T.BatchNumber = S.BatchNumber AND T.InvoiceNumber = S.InvoiceNumber
ORDER BY T.InvID