In [None]:
"""Find number of records of salary for each department, order by dept_no ascending"""

In [None]:
"""SQL table creation and data insertion"""
drop table if exists  `departments` ; 
drop table if exists  `dept_emp` ; 
drop table if exists  `salaries` ;

CREATE TABLE `departments` (
`dept_no` char(4) NOT NULL,
`dept_name` varchar(40) NOT NULL,
PRIMARY KEY (`dept_no`));

CREATE TABLE `dept_emp` (
`emp_no` int(11) NOT NULL,
`dept_no` char(4) NOT NULL,
`from_date` date NOT NULL,
`to_date` date NOT NULL,
PRIMARY KEY (`emp_no`,`dept_no`));

CREATE TABLE `salaries` (
`emp_no` int(11) NOT NULL,
`salary` int(11) NOT NULL,
`from_date` date NOT NULL,
`to_date` date NOT NULL,
PRIMARY KEY (`emp_no`,`from_date`));

INSERT INTO departments VALUES('d001','Marketing');
INSERT INTO departments VALUES('d002','Finance');

INSERT INTO dept_emp VALUES(10001,'d001','2001-06-22','9999-01-01');
INSERT INTO dept_emp VALUES(10002,'d001','1996-08-03','9999-01-01');
INSERT INTO dept_emp VALUES(10003,'d002','1996-08-03','9999-01-01');

INSERT INTO salaries VALUES(10001,85097,'2001-06-22','2002-06-22');
INSERT INTO salaries VALUES(10001,88958,'2002-06-22','9999-01-01');
INSERT INTO salaries VALUES(10002,72527,'1996-08-03','9999-01-01');
INSERT INTO salaries VALUES(10003,32323,'1996-08-03','9999-01-01');

In [None]:
"""SQL solution"""
# 1
SELECT d.dept_no, d.dept_name, COUNT(salary)
FROM departments AS d
INNER JOIN dept_emp AS e
ON d.dept_no = e.dept_no
INNER JOIN salaries AS s
ON e.emp_no = s.emp_no
GROUP BY d.dept_no
ORDER BY d.dept_no

# 2
SELECT d.dept_no, d.dept_name, a.cnt
FROM departments AS d
INNER JOIN
(SELECT de.dept_no, COUNT(*) AS cnt
FROM dept_emp AS de
INNER JOIN salaries AS s
ON de.emp_no = s.emp_no
GROUP BY de.dept_no) AS a
ON d.dept_no = a.dept_no
ORDER BY d.dept_no

In [1]:
"""pandas dataframe creation"""
import numpy as np
import pandas as pd

t_dept = np.array([['d001','Marketing'],
                   ['d002','Finance']])
t_dept_emp = np.array([[10001,'d001','2001-06-22','9999-01-01'],
                       [10002,'d001','1996-08-03','9999-01-01'],
                       [10003,'d002','1996-08-03','9999-01-01']])
t_sal = np.array([[10001,85097,'2001-06-22','2002-06-22'],
                  [10001,88958,'2002-06-22','9999-01-01'],
                  [10002,72527,'1996-08-03','9999-01-01'],
                  [10003,32323,'1996-08-03','9999-01-01']])

departments = pd.DataFrame(data=t_dept, columns=['dept_no', 'dept_name'])
dept_emp = pd.DataFrame(data=t_dept_emp, columns=['emp_no', 'dept_no', 'from_date', 'hire_date'])
salaries = pd.DataFrame(data=t_sal, columns=['emp_no', 'salary', 'from_date', 'hire_date'])
departments.head()

Unnamed: 0,dept_no,dept_name
0,d001,Marketing
1,d002,Finance


In [2]:
dept_emp.head()

Unnamed: 0,emp_no,dept_no,from_date,hire_date
0,10001,d001,2001-06-22,9999-01-01
1,10002,d001,1996-08-03,9999-01-01
2,10003,d002,1996-08-03,9999-01-01


In [3]:
salaries.head()

Unnamed: 0,emp_no,salary,from_date,hire_date
0,10001,85097,2001-06-22,2002-06-22
1,10001,88958,2002-06-22,9999-01-01
2,10002,72527,1996-08-03,9999-01-01
3,10003,32323,1996-08-03,9999-01-01


In [5]:
# connect salary and employees with their dept_no
df = pd.merge(dept_emp[['emp_no', 'dept_no']], salaries[['emp_no', 'salary']], on='emp_no', how='inner')
df

Unnamed: 0,emp_no,dept_no,salary
0,10001,d001,85097
1,10001,d001,88958
2,10002,d001,72527
3,10003,d002,32323


In [6]:
# connect dept_no with dept_name
df = pd.merge(df, departments, on='dept_no', how='inner')
df

Unnamed: 0,emp_no,dept_no,salary,dept_name
0,10001,d001,85097,Marketing
1,10001,d001,88958,Marketing
2,10002,d001,72527,Marketing
3,10003,d002,32323,Finance


In [24]:
# count records for each department with value_counts
pd.DataFrame(df['dept_name'].value_counts())

Unnamed: 0,dept_name
Marketing,3
Finance,1


In [25]:
pd.DataFrame(df['dept_name'].value_counts()).reset_index()

Unnamed: 0,index,dept_name
0,Marketing,3
1,Finance,1


In [26]:
pd.DataFrame(df['dept_name'].value_counts()).reset_index().rename(columns={'index': 'dept_name', 'dept_name': 'sum'})

Unnamed: 0,dept_name,sum
0,Marketing,3
1,Finance,1


In [33]:
# count records for each department with groupby + count
df.groupby('dept_name').count().reset_index()[['dept_name', 'salary']].rename(columns={'salary': 'sum'})

Unnamed: 0,dept_name,sum
0,Finance,1
1,Marketing,3
