DECLARE @t TABLE (Data VARCHAR(40)) INSERT @t (Data) SELECT '9992EDC6-D117-4DEE-B410-4E5FAE46AE97' INSERT @t (Data) SELECT '0BFC936B-BD9A-4C6A-AFB2-CF3F1752F8B1' INSERT @t (Data) SELECT '4A73E7EB-7777-4A04-9258-F1E75097977C' INSERT @t (Data) SELECT '5AAF477C-274D-400D-9067-035968F33B19' INSERT @t (Data) SELECT '725DA718-30D0-44A9-B36A-89F27CDFEEDE' INSERT @t (Data) SELECT '8083ED5A-D3B9-4694-BB04-F0B09C588888' -- using master.spt_values as a replacement to a tally table -- rnk (rank by longest sequence of repeating characters, 1 is most, etc.) -- cnt (count number of repeating sequences in data for ties in rank) ;with cteData (data, rnk, cnt) as (select t.data, rank() over (order by t2.n desc), row_number() over (partition by t.data order by t.data desc) from @t t, (select number n from master..spt_values where number between 1 and 2048 group by number) t1, (select number n from master..spt_values where number between 1 and 2048 group by number) t2 where t1.n < 36 and t2.n between 2 and 36 -- t2.n can get out of range but sql is faster ignoring it than my where clause and substring(t.data, t1.n, 1) = substring(t.data, t1.n + 1, 1) -- next character is the same and substring(t.data, t1.n, t2.n) = replicate(left(substring(t.data, t1.n, t2.n), 1), t2.n)) -- all characters are the same , cteSum(data, rnk, cnt) as (select data, min(rnk), max(cnt) from cteData ct group by ct.data) -- essentially generating every possible substring of data and -- eliminating those where all characters are not the same or -- where the next character is the same (and would get counted later) -- output in rnk order and use cnt for rnk ties. select t.data, substring(t.data, t1.n, 1) 'char', t1.n 'pos', t2.n 'len' from @t t, (select number n from master..spt_values where number between 1 and 2048 group by number) t1, (select number n from master..spt_values where number between 1 and 2048 group by number) t2, cteSum where t1.n < 36 and t2.n between 2 and 36 -- t1.n and t2.n can get out of range but sql is faster ignoring it than my where clause and substring(t.data, t1.n, t2.n) = replicate(left(substring(t.data, t1.n, t2.n), 1), t2.n) -- all characters are the same and substring(t.data, t1.n, 1) <> substring(t.data, t1.n + t2.n, 1) -- next character is different, so this is longest and substring(t.data, t1.n - 1, 1) <> substring(t.data, t1.n, 1) -- previous character is different so this is a start and cteSum.data = t.data order by cteSum.rnk, cteSum.cnt desc, cteSum.data, t1.n
Did you understand how this solution work? If you find it difficult to understand, you can Request an Explanation or you can Write an explanation to help others better understand this.