In [None]:
"""
Find the user name, and accumulated number of problems passed on a ceratin day
order the result by date ascending, then order by name
don't output if someone logged in on a certain day but did not work on any problems 
"""

In [None]:
"""SQL table creation and data insertion"""
drop table if exists login;
drop table if exists passing_number;
drop table if exists user;
drop table if exists client;

CREATE TABLE `login` (
`id` int(4) NOT NULL,
`user_id` int(4) NOT NULL,
`client_id` int(4) NOT NULL,
`date` date NOT NULL,
PRIMARY KEY (`id`));

CREATE TABLE `passing_number` (
`id` int(4) NOT NULL,
`user_id` int(4) NOT NULL,
`number` int(4) NOT NULL,
`date` date NOT NULL,
PRIMARY KEY (`id`));

CREATE TABLE `user` (
`id` int(4) NOT NULL,
`name` varchar(32) NOT NULL,
PRIMARY KEY (`id`));

CREATE TABLE `client` (
`id` int(4) NOT NULL,
`name` varchar(32) NOT NULL,
PRIMARY KEY (`id`));

INSERT INTO login VALUES
(1,2,1,'2020-10-12'),
(2,3,2,'2020-10-12'),
(3,2,2,'2020-10-13'),
(4,3,2,'2020-10-13');

INSERT INTO passing_number VALUES
(1,2,4,'2020-10-12'),
(2,3,1,'2020-10-12'),
(3,2,0,'2020-10-13'),
(4,3,2,'2020-10-13');

INSERT INTO user VALUES
(1,'tm'),
(2,'fh'),
(3,'wc');

INSERT INTO client VALUES
(1,'pc'),
(2,'ios'),
(3,'anroid'),
(4,'h5');

In [None]:
"""SQL solution"""
# 1
SELECT u.name, a.date, CAST(a.tpn AS SIGNED)
FROM user AS u, 
(SELECT p.user_id, p.date, p.number, IF(@pre_user = p.user_id, @pn := @pn + p.number, @pn := p.number) AS tpn, @pre_user := p.user_id
FROM passing_number AS p, (SELECT @pn := 0, @pre_user := 0) AS r
ORDER BY p.user_id, p.date) AS a
WHERE u.id = a.user_id
ORDER BY a.date, u.name

# 2
SELECT u.name, pn.date, SUM(pn.number) OVER(PARTITION BY pn.user_id ORDER BY pn.date)
FROM passing_number AS pn, user AS u
WHERE pn.user_id = u.id
ORDER BY pn.date, u.name

In [None]:
# expected
fh|2020-10-12|4
wc|2020-10-12|1
fh|2020-10-13|4
wc|2020-10-13|3

In [2]:
"""pandas dataframe creation"""
import pandas as pd

# only table passing_number and user are needed
passing_number = pd.DataFrame([[1,2,4,'2020-10-12'],
                               [2,3,1,'2020-10-12'],
                               [3,2,0,'2020-10-13'],
                               [4,3,2,'2020-10-13']],
                              columns=['id','user_id','number','date'])
user = pd.DataFrame([[1,'tm'],
                     [2,'fh'],
                     [3, 'wc']],
                     columns=['id', 'name'])
passing_number.head()

Unnamed: 0,id,user_id,number,date
0,1,2,4,2020-10-12
1,2,3,1,2020-10-12
2,3,2,0,2020-10-13
3,4,3,2,2020-10-13


In [3]:
user.head()

Unnamed: 0,id,name
0,1,tm
1,2,fh
2,3,wc


In [5]:
passing_number['cum_number'] =passing_number.groupby('user_id')['number'].cumsum()
passing_number

Unnamed: 0,id,user_id,number,date,cum_number
0,1,2,4,2020-10-12,4
1,2,3,1,2020-10-12,1
2,3,2,0,2020-10-13,4
3,4,3,2,2020-10-13,3


In [8]:
df = pd.merge(passing_number[['user_id', 'date', 'cum_number']], user, left_on='user_id', right_on='id', how='inner')
df[['name', 'date', 'cum_number']].sort_values(by=['date', 'name'])

Unnamed: 0,name,date,cum_number
0,fh,2020-10-12,4
2,wc,2020-10-12,1
1,fh,2020-10-13,4
3,wc,2020-10-13,3
