In [None]:
"""
Find number of resumes received for each job in each month of 2025,
and the corresponding number of resumes received for each job in each month of 2026,
order the results first by first_year_mon descending and then job descending
"""

In [None]:
"""SQL table creation and data insertion"""
drop table if exists resume_info;

CREATE TABLE resume_info (
id int(4) NOT NULL,
job varchar(64) NOT NULL,
date date NOT NULL,
num int(11) NOT NULL,
PRIMARY KEY (id));

INSERT INTO resume_info VALUES
(1,'C++','2025-01-02',53),
(2,'Python','2025-01-02',23),
(3,'Java','2025-01-02',12),
(4,'C++','2025-01-03',54),
(5,'Python','2025-01-03',43),
(6,'Java','2025-01-03',41),
(7,'Java','2025-02-03',24),
(8,'C++','2025-02-03',23),
(9,'Python','2025-02-03',34),
(10,'Java','2025-02-04',42),
(11,'C++','2025-02-04',45),
(12,'Python','2025-02-04',59),
(13,'C++','2026-01-04',230),
(14,'Java','2026-01-04',764),
(15,'Python','2026-01-04',644),
(16,'C++','2026-01-06',240),
(17,'Java','2026-01-06',714),
(18,'Python','2026-01-06',624),
(19,'C++','2026-02-14',260),
(20,'Java','2026-02-14',721),
(21,'Python','2026-02-14',321),
(22,'C++','2026-02-24',134),
(23,'Java','2026-02-24',928),
(24,'Python','2026-02-24',525),
(25,'C++','2027-02-06',231);

In [None]:
"""SQL solution"""
SELECT f.job, f.mon, f.cnt, s.mon, s.cnt
FROM
(SELECT job, DATE_FORMAT(date, '%Y-%m') AS mon, SUM(num) AS cnt
FROM resume_info
WHERE YEAR(date) = 2025
GROUP BY job, mon) AS f
INNER JOIN
(SELECT job, DATE_FORMAT(date, '%Y-%m') AS mon, SUM(num) AS cnt
FROM resume_info
WHERE YEAR(date) = 2026
GROUP BY job, mon) AS s
ON f.job = s.job
AND RIGHT(f.mon, 2) = RIGHT(s.mon, 2)
ORDER BY f.mon DESC, job DESC

In [1]:
"""padnas dataframe creation"""
import pandas as pd

ri = [(1,'C++','2025-01-02',53),
(2,'Python','2025-01-02',23),
(3,'Java','2025-01-02',12),
(4,'C++','2025-01-03',54),
(5,'Python','2025-01-03',43),
(6,'Java','2025-01-03',41),
(7,'Java','2025-02-03',24),
(8,'C++','2025-02-03',23),
(9,'Python','2025-02-03',34),
(10,'Java','2025-02-04',42),
(11,'C++','2025-02-04',45),
(12,'Python','2025-02-04',59),
(13,'C++','2026-01-04',230),
(14,'Java','2026-01-04',764),
(15,'Python','2026-01-04',644),
(16,'C++','2026-01-06',240),
(17,'Java','2026-01-06',714),
(18,'Python','2026-01-06',624),
(19,'C++','2026-02-14',260),
(20,'Java','2026-02-14',721),
(21,'Python','2026-02-14',321),
(22,'C++','2026-02-24',134),
(23,'Java','2026-02-24',928),
(24,'Python','2026-02-24',525),
(25,'C++','2027-02-06',231)]

resume_info = pd.DataFrame(ri, columns=['id', 'job', 'date', 'num'])
resume_info.head()

Unnamed: 0,id,job,date,num
0,1,C++,2025-01-02,53
1,2,Python,2025-01-02,23
2,3,Java,2025-01-02,12
3,4,C++,2025-01-03,54
4,5,Python,2025-01-03,43


In [4]:
resume_info['date'] = pd.to_datetime(resume_info['date'])
# filter by year
df2025 = resume_info[pd.DatetimeIndex(resume_info['date']).year == 2025]
df2025.head()

Unnamed: 0,id,job,date,num
0,1,C++,2025-01-02,53
1,2,Python,2025-01-02,23
2,3,Java,2025-01-02,12
3,4,C++,2025-01-03,54
4,5,Python,2025-01-03,43


In [6]:
# group by job and month
df2025 = df2025.groupby(['job', pd.DatetimeIndex(df2025['date']).month])['num'].sum().reset_index()
df2025 = df2025.rename(columns={'date': 'mon', 'num': 'cnt'})
df2025

Unnamed: 0,job,mon,cnt
0,C++,1,107
1,C++,2,68
2,Java,1,53
3,Java,2,66
4,Python,1,66
5,Python,2,93


In [7]:
# convert month from int to string
df2025['mon'] = df2025['mon'].astype('str')
# add leading 0 to month
df2025['mon'] = df2025['mon'].str.zfill(2)
df2025

Unnamed: 0,job,mon,cnt
0,C++,1,107
1,C++,2,68
2,Java,1,53
3,Java,2,66
4,Python,1,66
5,Python,2,93


In [8]:
# filter by year
df2026 = resume_info[pd.DatetimeIndex(resume_info['date']).year == 2026]
# group by job and month
df2026 = df2026.groupby(['job', pd.DatetimeIndex(df2026['date']).month])['num'].sum().reset_index()
df2026 = df2026.rename(columns={'date': 'mon', 'num': 'cnt'})
# convert month from int to string
df2026['mon'] = df2026['mon'].astype('str')
# add leading 0 to month
df2026['mon'] = df2026['mon'].str.zfill(2)
df2026

Unnamed: 0,job,mon,cnt
0,C++,1,470
1,C++,2,394
2,Java,1,1478
3,Java,2,1649
4,Python,1,1268
5,Python,2,846


In [12]:
result = pd.merge(df2025, df2026, on=['job', 'mon'], how='inner', suffixes=['_2025', '_2026'])
result = result.rename(columns={'cnt_2025': 'first_year_cnt', 'cnt_2026': 'second_year_cnt'})
result['first_year_mon'] = '2025-' + result['mon']
result['second_year_mon'] = '2026-' + result['mon']
result = result.sort_values(by=['first_year_mon', 'job'], ascending=[False, False])
result[['job', 'first_year_mon', 'first_year_cnt', 'second_year_mon', 'second_year_cnt']]

Unnamed: 0,job,first_year_mon,first_year_cnt,second_year_mon,second_year_cnt
5,Python,2025-02,93,2026-02,846
3,Java,2025-02,66,2026-02,1649
1,C++,2025-02,68,2026-02,394
4,Python,2025-01,66,2026-01,1268
2,Java,2025-01,53,2026-01,1478
0,C++,2025-01,107,2026-01,470
