In [None]:
"""
Find information about source of orders from the users with 2 or more completed order of course on C++, Java, or Python after 2025-10-15,
show the name of client, or 'GroupBuy' if it is a group buy in the 1st column (source);
show the number of orders in the second column,
order the result by source ascending
"""

In [None]:
"""SQL table creation and data insertion"""
drop table if exists order_info;
drop table if exists client;

CREATE TABLE order_info (
id int(4) NOT NULL,
user_id int(11) NOT NULL,
product_name varchar(256) NOT NULL,
status varchar(32) NOT NULL,
client_id int(4) NOT NULL,
date date NOT NULL,
is_group_buy varchar(32) NOT NULL,
PRIMARY KEY (id));

CREATE TABLE client(
id int(4) NOT NULL,
name varchar(32) NOT NULL,
PRIMARY KEY (id)
);

INSERT INTO order_info VALUES
(1,557336,'C++','no_completed',1,'2025-10-10','No'),
(2,230173543,'Python','completed',2,'2025-10-12','No'),
(3,57,'JS','completed',0,'2025-10-23','Yes'),
(4,57,'C++','completed',3,'2025-10-23','No'),
(5,557336,'Java','completed',0,'2025-10-23','Yes'),
(6,57,'Java','completed',1,'2025-10-24','No'),
(7,557336,'C++','completed',0,'2025-10-25','Yes');

INSERT INTO client VALUES
(1,'PC'),
(2,'Android'),
(3,'IOS'),
(4,'H5');

In [None]:
"""SQL solution"""
# 1
SELECT IF(ISNULL(a.sc), 'GroupBuy', a.sc) AS source, COUNT(*)
FROM
(SELECT o.id, c.name AS sc
FROM order_info AS o
LEFT JOIN client AS c
ON o.client_id = c.id
WHERE user_id IN
(SELECT user_id 
FROM order_info
WHERE date > '2025-10-15'
AND status = 'completed'
AND product_name IN ('C++', 'Java', 'Python')
GROUP BY user_id
HAVING COUNT(user_id) >= 2)
AND date > '2025-10-15'
AND status = 'completed'
AND product_name IN ('C++', 'Java', 'Python')) AS a
GROUP BY a.sc
ORDER BY source

# 2
SELECT IFNULL(a.sc, 'GroupBuy') AS source, COUNT(*)
FROM
(SELECT o.id, o.is_group_buy, c.name AS sc
FROM order_info AS o
LEFT JOIN client AS c
ON o.client_id = c.id
WHERE o.user_id IN 
(SELECT user_id 
FROM order_info
WHERE date > '2025-10-15'
AND status = 'completed'
AND product_name IN ('C++', 'Java', 'Python')
GROUP BY user_id
HAVING COUNT(*) >= 2)
AND date > '2025-10-15'
AND status = 'completed'
AND product_name IN ('C++', 'Java', 'Python')) AS a 
GROUP BY a.sc
ORDER BY source

In [3]:
"""pandas dataframe creation"""
import pandas as pd

oi = [(1,557336,'C++','no_completed',1,'2025-10-10','No'),
(2,230173543,'Python','completed',2,'2025-10-12','No'),
(3,57,'JS','completed',0,'2025-10-23','Yes'),
(4,57,'C++','completed',3,'2025-10-23','No'),
(5,557336,'Java','completed',0,'2025-10-23','Yes'),
(6,57,'Java','completed',1,'2025-10-24','No'),
(7,557336,'C++','completed',0,'2025-10-25','Yes')]

c = [(1,'PC'),
(2,'Android'),
(3,'IOS'),
(4,'H5')]

order_info = pd.DataFrame(oi, columns=['id', 'user_id', 'product_name', 'status', 'client_id', 'date', 'is_group_buy'])
client = pd.DataFrame(c, columns=['id', 'name'])

order_info['date'] = pd.to_datetime(order_info['date'])
order_info

Unnamed: 0,id,user_id,product_name,status,client_id,date,is_group_buy
0,1,557336,C++,no_completed,1,2025-10-10,No
1,2,230173543,Python,completed,2,2025-10-12,No
2,3,57,JS,completed,0,2025-10-23,Yes
3,4,57,C++,completed,3,2025-10-23,No
4,5,557336,Java,completed,0,2025-10-23,Yes
5,6,57,Java,completed,1,2025-10-24,No
6,7,557336,C++,completed,0,2025-10-25,Yes


In [2]:
client.head()

Unnamed: 0,id,name
0,1,PC
1,2,Android
2,3,IOS
3,4,H5


In [4]:
# filter the data
courses = ['C++', 'Java', 'Python']
df = order_info[(order_info['product_name'].isin(courses)) & (order_info['status'] == 'completed') & (order_info['date'] >= '2025-10-15')]
df

Unnamed: 0,id,user_id,product_name,status,client_id,date,is_group_buy
3,4,57,C++,completed,3,2025-10-23,No
4,5,557336,Java,completed,0,2025-10-23,Yes
5,6,57,Java,completed,1,2025-10-24,No
6,7,557336,C++,completed,0,2025-10-25,Yes


In [5]:
df_cnt = df.groupby('user_id')['id'].count().reset_index().rename(columns={'id': 'cnt'})
df = pd.merge(df, df_cnt, on='user_id')
df

Unnamed: 0,id,user_id,product_name,status,client_id,date,is_group_buy,cnt
0,4,57,C++,completed,3,2025-10-23,No,2
1,6,57,Java,completed,1,2025-10-24,No,2
2,5,557336,Java,completed,0,2025-10-23,Yes,2
3,7,557336,C++,completed,0,2025-10-25,Yes,2


In [15]:
df_gb = df[df['is_group_buy'] == 'Yes']
df_gb['source'] = 'GroupBuy'
df_gb

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_gb['source'] = 'GroupBuy'


Unnamed: 0,id,user_id,product_name,status,client_id,date,is_group_buy,cnt,source
2,5,557336,Java,completed,0,2025-10-23,Yes,2,GroupBuy
3,7,557336,C++,completed,0,2025-10-25,Yes,2,GroupBuy


In [13]:
df_ngb = df[df['is_group_buy'] == 'No']
df_ngb = pd.merge(df_ngb, client, left_on='client_id', right_on='id', suffixes=['', '_client'])
df_ngb = df_ngb.drop(columns='id_client')
df_ngb = df_ngb.rename(columns={'name': 'source'})
df_ngb

Unnamed: 0,id,user_id,product_name,status,client_id,date,is_group_buy,cnt,source
0,4,57,C++,completed,3,2025-10-23,No,2,IOS
1,6,57,Java,completed,1,2025-10-24,No,2,PC


In [16]:
result = pd.concat([df_gb, df_ngb])
result

Unnamed: 0,id,user_id,product_name,status,client_id,date,is_group_buy,cnt,source
2,5,557336,Java,completed,0,2025-10-23,Yes,2,GroupBuy
3,7,557336,C++,completed,0,2025-10-25,Yes,2,GroupBuy
0,4,57,C++,completed,3,2025-10-23,No,2,IOS
1,6,57,Java,completed,1,2025-10-24,No,2,PC


In [18]:
result.groupby('source')['id'].count().reset_index()

Unnamed: 0,source,id
0,GroupBuy,2
1,IOS,1
2,PC,1
