In [None]:
"""
If a user has 2 or more completed order of course on C++, Java, or Python after 2025-10-15,
find the user_id, 
the date of first completed order fullfilling the requirement (first_buy_date),
the date of second completed order fullfilling the requirement (second_buy_date),
total number of courses fullfilling the requirement (cnt), 
order the result by user_id
"""

In [None]:
"""SQL table creation and data insertion"""
drop table if exists order_info;

CREATE TABLE order_info (
id int(4) NOT NULL,
user_id int(11) NOT NULL,
product_name varchar(256) NOT NULL,
status varchar(32) NOT NULL,
client_id int(4) NOT NULL,
date date NOT NULL,
PRIMARY KEY (id));

INSERT INTO order_info VALUES
(1,557336,'C++','no_completed',1,'2025-10-10'),
(2,230173543,'Python','completed',2,'2025-10-12'),
(3,57,'JS','completed',3,'2025-10-23'),
(4,57,'C++','completed',3,'2025-10-23'),
(5,557336,'Java','completed',1,'2025-10-23'),
(6,557336,'Python','no_completed',1,'2025-10-24'),
(7,557336,'C++','completed',1,'2025-10-16'),
(8,230173543,'C++','completed',1,'2025-10-16');

In [None]:
"""SQL solution"""
SELECT fbuy.user_id, fbuy.date, sbuy.date, fbuy.cnt
FROM
(SELECT f.user_id, f.date, f.cnt
FROM
(SELECT user_id, date,
ROW_NUMBER() OVER(PARTITION BY user_id ORDER BY date) AS r,
COUNT(*) OVER(PARTITION BY user_id) AS cnt
FROM order_info
WHERE date > '2025-10-15'
AND status = 'completed'
AND product_name IN ('C++', 'Java', 'Python')) AS f
WHERE f.r = 1 AND f.cnt >= 2) AS fbuy
INNER JOIN
(SELECT g.user_id, g.date
FROM
(SELECT user_id, date,
ROW_NUMBER() OVER(PARTITION BY user_id ORDER BY date) AS r,
COUNT(*) OVER(PARTITION BY user_id) AS cnt
FROM order_info
WHERE date > '2025-10-15'
AND status = 'completed'
AND product_name IN ('C++', 'Java', 'Python')) AS g
WHERE g.r = 2 AND g.cnt >= 2) AS sbuy
ON fbuy.user_id = sbuy.user_id
ORDER BY user_id

In [1]:
"""pandas dataframe creation"""
import pandas as pd

oi = [(1,557336,'C++','no_completed',1,'2025-10-10'),
(2,230173543,'Python','completed',2,'2025-10-12'),
(3,57,'JS','completed',3,'2025-10-23'),
(4,57,'C++','completed',3,'2025-10-23'),
(5,557336,'Java','completed',1,'2025-10-23'),
(6,57,'Java','completed',1,'2025-10-24'),
(7,557336,'C++','completed',1,'2025-10-25'),
(8,557336,'Python','completed',1,'2025-10-25')]

order_info = pd.DataFrame(oi, columns=['id', 'user_id', 'product_name', 'status', 'client_id', 'date'])
order_info.head()

Unnamed: 0,id,user_id,product_name,status,client_id,date
0,1,557336,C++,no_completed,1,2025-10-10
1,2,230173543,Python,completed,2,2025-10-12
2,3,57,JS,completed,3,2025-10-23
3,4,57,C++,completed,3,2025-10-23
4,5,557336,Java,completed,1,2025-10-23


In [2]:
order_info['date'] = pd.to_datetime(order_info['date'])
# filter the data
courses = ['C++', 'Java', 'Python']
df = order_info[(order_info['product_name'].isin(courses)) & (order_info['status'] == 'completed') & (order_info['date'] >= '2025-10-15')]
df

Unnamed: 0,id,user_id,product_name,status,client_id,date
3,4,57,C++,completed,3,2025-10-23
4,5,557336,Java,completed,1,2025-10-23
5,6,57,Java,completed,1,2025-10-24
6,7,557336,C++,completed,1,2025-10-25
7,8,557336,Python,completed,1,2025-10-25


In [5]:
# order the buy_date of the orders for each user
df['buy_date_order'] = df.groupby('user_id')['date'].rank(method='dense')
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['buy_date_order'] = df.groupby('user_id')['date'].rank(method='dense')


Unnamed: 0,id,user_id,product_name,status,client_id,date,buy_date_order
3,4,57,C++,completed,3,2025-10-23,1.0
4,5,557336,Java,completed,1,2025-10-23,1.0
5,6,57,Java,completed,1,2025-10-24,2.0
6,7,557336,C++,completed,1,2025-10-25,2.0
7,8,557336,Python,completed,1,2025-10-25,2.0


In [6]:
# find first_buy_date
df1 = df[df['buy_date_order']==1]
df1

Unnamed: 0,id,user_id,product_name,status,client_id,date,buy_date_order
3,4,57,C++,completed,3,2025-10-23,1.0
4,5,557336,Java,completed,1,2025-10-23,1.0


In [7]:
# find second_buy_date
df2 = df[df['buy_date_order']==2]
df2

Unnamed: 0,id,user_id,product_name,status,client_id,date,buy_date_order
5,6,57,Java,completed,1,2025-10-24,2.0
6,7,557336,C++,completed,1,2025-10-25,2.0
7,8,557336,Python,completed,1,2025-10-25,2.0


In [13]:
# find cnt
df3 = df.groupby('user_id')['id'].count().reset_index()
df3 = df3.rename(columns={'id':'cnt'})
df3

Unnamed: 0,user_id,cnt
0,57,2
1,557336,3


In [24]:
# merge the results
result = pd.merge(df1[['user_id', 'date']], df2[['user_id', 'date']], on='user_id', suffixes=['_1', '_2'])
result = pd.merge(result, df3, on='user_id')
result = result.rename(columns={'date_1':'first_buy_date', 'date_2': 'second_buy_date'})
result.drop_duplicates(inplace=True)
result.sort_values(by='user_id')

Unnamed: 0,user_id,first_buy_date,second_buy_date,cnt
0,57,2025-10-23,2025-10-24,2
1,557336,2025-10-23,2025-10-25,3
