In [None]:
"""
Consecutive Number

Write an SQL query to find all numbers that appear at least three times consecutively.
Return the result table in any order.
"""

In [None]:
"""testcase"""
{"headers": {"Logs": ["Id", "Num"]}, "rows": {"Logs": [[1, 1], [2, 1], [3, 1], [4, 2], [5, 1], [6, 2], [7, 2]]}}

In [None]:
"""SQL table creation and data insertion"""
DROP TABLE IF EXISTS Logs;

CREATE TABLE Logs (
Id  int(4) NOT NULL,
Num varchar(32) NOT NULL
PRIMARY KEY ('Id'));

INSERT INTO Logs VALUES
(1, 1),
(2, 1),
(3, 1),
(4, 2),
(5, 1),
(6, 2),
(7, 2);

In [None]:
"""SQL solution"""
# 1
# consectutive numbes have consecutive id; 
# their row_number order by id (the same as id) are also consecutive;
# the row_number within group of different Num are also consecutive;
# however for row_num of the same Num from different sections, they are not consecutive;
# therefore the difference between row_number and row_number within group should be the same for consecutive numbers

SELECT DISTINCT Num AS ConsecutiveNums
FROM
(SELECT NUM, 
(ROW_NUMBER() OVER( ORDER BY id) - ROW_NUMBER() OVER(PARTITION BY Num ORDER BY id)) AS diff
FROM logs) AS g
GROUP BY Num, diff
HAVING COUNT(*) >= 3

# 2 
# label each row from 0, if consecutive label + 1, else reset to 0
# consecutive of 3 or more must have a row with label 2
SELECT DISTINCT Num AS ConsecutiveNums
FROM
(SELECT l.Num, IF(@pre = l.Num, @sr := @sr + 1, @sr := 0) AS m, @pre := l.num
FROM Logs AS l, (SELECT @sr := 0, @pre := -1) AS r) AS t
WHERE m = 2

In [24]:
"""pandas dataframe creation"""
import pandas as pd

testcase = {"headers": {"Logs": ["Id", "Num"]}, "rows": {"Logs": [[1, 1], [2, 1], [3, 1], [4, 2], [5, 1], [6, 2], [7, 2]]}}

logs = pd.DataFrame.from_dict(testcase['rows']['Logs'])
logs.columns = testcase['headers']['Logs']
logs

Unnamed: 0,Id,Num
0,1,1
1,2,1
2,3,1
3,4,2
4,5,1
5,6,2
6,7,2


In [25]:
# 1 
df1 = logs.copy()
df1['Num_rank'] = logs.groupby('Num')['Id'].rank(method='dense')
df1['diff'] = df1['Id'] - df1['Num_rank']
df1

Unnamed: 0,Id,Num,Num_rank,diff
0,1,1,1.0,0.0
1,2,1,2.0,0.0
2,3,1,3.0,0.0
3,4,2,1.0,3.0
4,5,1,4.0,1.0
5,6,2,2.0,4.0
6,7,2,3.0,4.0


In [26]:
df1 = df1.groupby(['Num', 'diff'])['Id'].count().reset_index().rename(columns={'Id': 'cnt'})
df1

Unnamed: 0,Num,diff,cnt
0,1,0.0,3
1,1,1.0,1
2,2,3.0,1
3,2,4.0,2


In [27]:
# find the unique number with count >= 3
# unique is needed because there might be multiple sections of the same number with 3 or more consecutive numbers
df1[df1['cnt'] >= 3]['Num'].unique()

array([1], dtype=int64)

In [28]:
# 2
df2 = logs
df2['label'] = 0
for i in range(1, len(df2)):
    if df2.iloc[i, 1] == df2.iloc[i-1, 1]:
        df2.iloc[i, 2] = df2.iloc[i-1, 2] + 1
df2

Unnamed: 0,Id,Num,label
0,1,1,0
1,2,1,1
2,3,1,2
3,4,2,0
4,5,1,0
5,6,2,0
6,7,2,1


In [30]:
df2[df2['label'] == 2]['Num'].unique()

array([1], dtype=int64)