In [None]:
"""
Find number of resumes reveived for each job in each month of 2025
order results first buy month descending and then number descendning
"""

In [None]:
"""SQL table creation and data insertion"""
drop table if exists resume_info;

CREATE TABLE resume_info (
id int(4) NOT NULL,
job varchar(64) NOT NULL,
date date NOT NULL,
num int(11) NOT NULL,
PRIMARY KEY (id));

INSERT INTO resume_info VALUES
(1,'C++','2025-01-02',53),
(2,'Python','2025-01-02',23),
(3,'Java','2025-01-02',12),
(4,'C++','2025-01-03',54),
(5,'Python','2025-01-03',43),
(6,'Java','2025-01-03',41),
(7,'Java','2025-02-03',24),
(8,'C++','2025-02-03',23),
(9,'Python','2025-02-03',34),
(10,'Java','2025-02-04',42),
(11,'C++','2025-02-04',45),
(12,'Python','2025-02-04',59),
(13,'Python','2025-03-04',54),
(14,'C++','2025-03-04',65),
(15,'Java','2025-03-04',92),
(16,'Python','2025-03-05',34),
(17,'C++','2025-03-05',34),
(18,'Java','2025-03-05',34),
(19,'Python','2026-01-04',230),
(20,'C++','2026-02-06',231);

In [None]:
"""SQL solution"""
# 1
SELECT job, DATE_FORMAT(date,'%Y-%m'), SUM(num) AS cnt
FROM resume_info
WHERE YEAR(date) = 2025
GROUP BY job, MONTH(date)
ORDER BY MONTH(date) DESC, cnt DESC

# 2
SELECT DISTINCT * FROM
(SELECT job, DATE_FORMAT(date,'%Y-%m') AS mon, SUM(num) OVER(PARTITION BY job, DATE_FORMAT(date,'%Y-%m')) AS cnt
FROM resume_info
WHERE YEAR(date) = 2025) AS a
ORDER BY mon DESC, cnt DESC

In [4]:
"""pandas dataframe creation"""
import pandas as pd

ri = [(1,'C++','2025-01-02',53),
(2,'Python','2025-01-02',23),
(3,'Java','2025-01-02',12),
(4,'C++','2025-01-03',54),
(5,'Python','2025-01-03',43),
(6,'Java','2025-01-03',41),
(7,'Java','2025-02-03',24),
(8,'C++','2025-02-03',23),
(9,'Python','2025-02-03',34),
(10,'Java','2025-02-04',42),
(11,'C++','2025-02-04',45),
(12,'Python','2025-02-04',59),
(13,'Python','2025-03-04',54),
(14,'C++','2025-03-04',65),
(15,'Java','2025-03-04',92),
(16,'Python','2025-03-05',34),
(17,'C++','2025-03-05',34),
(18,'Java','2025-03-05',34),
(19,'Python','2026-01-04',230),
(20,'C++','2026-02-06',231)]

resume_info = pd.DataFrame(ri, columns=['id', 'job', 'date', 'num'])
resume_info

Unnamed: 0,id,job,date,num
0,1,C++,2025-01-02,53
1,2,Python,2025-01-02,23
2,3,Java,2025-01-02,12
3,4,C++,2025-01-03,54
4,5,Python,2025-01-03,43
5,6,Java,2025-01-03,41
6,7,Java,2025-02-03,24
7,8,C++,2025-02-03,23
8,9,Python,2025-02-03,34
9,10,Java,2025-02-04,42


In [63]:
# filter by year
resume_info['date'] = pd.to_datetime(resume_info['date'])
df = resume_info[pd.DatetimeIndex(resume_info['date']).year == 2025]
df

Unnamed: 0,id,job,date,num
0,1,C++,2025-01-02,53
1,2,Python,2025-01-02,23
2,3,Java,2025-01-02,12
3,4,C++,2025-01-03,54
4,5,Python,2025-01-03,43
5,6,Java,2025-01-03,41
6,7,Java,2025-02-03,24
7,8,C++,2025-02-03,23
8,9,Python,2025-02-03,34
9,10,Java,2025-02-04,42


In [64]:
# group by job and month
df = df.groupby(['job', pd.DatetimeIndex(df['date']).month])['num'].sum().reset_index()
df = df.rename(columns={'date': 'mon', 'num': 'cnt'})
df

Unnamed: 0,job,mon,cnt
0,C++,1,107
1,C++,2,68
2,C++,3,99
3,Java,1,53
4,Java,2,66
5,Java,3,126
6,Python,1,66
7,Python,2,93
8,Python,3,88


In [65]:
# convert month from int to string
df['mon'] = df['mon'].astype('str')
# add leading 0 to month
df['mon'] = df['mon'].str.zfill(2)
df

Unnamed: 0,job,mon,cnt
0,C++,1,107
1,C++,2,68
2,C++,3,99
3,Java,1,53
4,Java,2,66
5,Java,3,126
6,Python,1,66
7,Python,2,93
8,Python,3,88


In [66]:
# add year before month
df['mon'] = '2025-' + df['mon']
df

Unnamed: 0,job,mon,cnt
0,C++,2025-01,107
1,C++,2025-02,68
2,C++,2025-03,99
3,Java,2025-01,53
4,Java,2025-02,66
5,Java,2025-03,126
6,Python,2025-01,66
7,Python,2025-02,93
8,Python,2025-03,88


In [67]:
# order results
df.sort_values(by=['mon', 'cnt'], ascending=[False, False])

Unnamed: 0,job,mon,cnt
5,Java,2025-03,126
2,C++,2025-03,99
8,Python,2025-03,88
7,Python,2025-02,93
1,C++,2025-02,68
4,Java,2025-02,66
0,C++,2025-01,107
6,Python,2025-01,66
3,Java,2025-01,53
