TSQL Challenge 14 - Solution by HODIN Matthieu



-- HODIN_Matthieu_tsqlchallenge_14.sql
DECLARE @t TABLE (Data VARCHAR(40) )

INSERT @t (Data) SELECT '9992EDC6-D117-4DEE-B410-4E5FAE46AE97'
INSERT @t (Data) SELECT '0BFC936B-BD9A-4C6A-AFB2-CF3F1752F8B1'
INSERT @t (Data) SELECT '4A73E7EB-7777-4A04-9258-F1E75097977C'
INSERT @t (Data) SELECT '5AAF477C-274D-400D-9067-035968F33B19'
INSERT @t (Data) SELECT '725DA718-30D0-44A9-B36A-89F27CDFEEDE'
INSERT @t (Data) SELECT '8083ED5A-D3B9-4694-BB04-F0B09C588888'

/*
Data                                 Char Pos  Len 
------------------------------------ ---- ---- ----
8083ED5A-D3B9-4694-BB04-F0B09C588888 B    20   2  
8083ED5A-D3B9-4694-BB04-F0B09C588888 8    32   5 
4A73E7EB-7777-4A04-9258-F1E75097977C 7    10   4
4A73E7EB-7777-4A04-9258-F1E75097977C 7    34   2
9992EDC6-D117-4DEE-B410-4E5FAE46AE97 9    1    3
9992EDC6-D117-4DEE-B410-4E5FAE46AE97 1    11   2
9992EDC6-D117-4DEE-B410-4E5FAE46AE97 E    17   2
5AAF477C-274D-400D-9067-035968F33B19 A    2    2
5AAF477C-274D-400D-9067-035968F33B19 7    6    2
5AAF477C-274D-400D-9067-035968F33B19 0    16   2
5AAF477C-274D-400D-9067-035968F33B19 3    32   2
725DA718-30D0-44A9-B36A-89F27CDFEEDE 4    15   2
725DA718-30D0-44A9-B36A-89F27CDFEEDE E    33   2

Column 
"char" shows the character having longest sequence within the string. 
"pos" shows the starting position of the sequence and the 
"len" column shows the length of the sequence.

The output should be ordered as given in the 'expected output' listing. 
The data having the longest sequence should come on top 
followed by the next longest sequence (descending order). 
Within each data, the rows should be ordered by the position at which the sequence starts. 

The reason for '5AAF477C-274D-400D-9067-035968F33B19' 
to be listed before '725DA718-30D0-44A9-B36A-89F27CDFEEDE' 
is that “5AA” has more repetitive groups (4 groups) than “725….” (2 groups)
*/


;with cte (data,lettre,pos,len) as 
(select data, lettre, min(number) as pos, max(number)-min(number)+1 as len
from ( 
select data,number , substring(data, number,1) as lettre
	,ROW_NUMBER() OVER(
		PARTITION BY 
			--begin of sequence
			case when substring(data, number,1)<> substring(data,number-1,1) then 1 else 0 end 
			--end of sequence
			,case when substring(data, number,1) <> substring(data,number+1,1) then 1 else 0 end 
		ORDER BY data,number
	) AS RowID -- this creates a unique group id for each sequence of chars in data
from master.dbo.spt_values -- i use this table as a "go forward" machine
cross join @t  
where type='P' and number < 37 and number>0 -- here we can adjust where we are looking in the data 8+4+4+4+12 (+4)=32 (+4) = 36
and case when substring(data, number,1)<> substring(data,number-1,1) then 1 else 0 end 
	<>
	case when substring(data, number,1) <> substring(data,number+1,1) then 1 else 0 end
	-- start and end of sequence are equals to 1 ; middle of a sequence as a begin with 0 and a end with 0
	-- so in order to fin only begin and end we have to find rows where begin is diffrent of end
) as beginAndEndTable
group by data,lettre, rowId
),cte2 (data,nbLignes, MaxLength) as (
select data, count(*) as nbLignes , max(len) as MaxLength 
from cte
group by data
)
select cte.*
from cte
inner join cte2 on cte.data=cte2.data
order by maxLength desc,nbLignes desc, pos asc

Did you understand how this solution work? If you find it difficult to understand, you can Request an Explanation or you can Write an explanation to help others better understand this.