In [None]:
"""
Department Highest Salary

The Employee table holds all employees. Every employee has an Id, a salary, and there is also a column for the department Id.

+----+-------+--------+--------------+
| Id | Name  | Salary | DepartmentId |
+----+-------+--------+--------------+
| 1  | Joe   | 70000  | 1            |
| 2  | Jim   | 90000  | 1            |
| 3  | Henry | 80000  | 2            |
| 4  | Sam   | 60000  | 2            |
| 5  | Max   | 90000  | 1            |
+----+-------+--------+--------------+
The Department table holds all departments of the company.

+----+----------+
| Id | Name     |
+----+----------+
| 1  | IT       |
| 2  | Sales    |
+----+----------+
Write a SQL query to find employees who have the highest salary in each of the departments (order of rows does not matter).
"""

In [None]:
"""testcase"""
{"headers": {"Employee": ["Id", "Name", "Salary", "DepartmentId"], 
             "Department": ["Id", "Name"]}, 
 "rows": {"Employee": [[1, "Joe", 70000, 1], [2, "Jim", 90000, 1], [3, "Henry", 80000, 2], [4, "Sam", 60000, 2], [5, "Max", 90000, 1]], 
          "Department": [[1, "IT"], [2, "Sales"]]}}

In [None]:
"""SQL table creation and data insertion"""
DROP TABLE IF EXISTS Employee;
DROP TABLE IF EXISTS Department;

CREATE TABLE Employee(
Id int(4) NOT NULL,
Name varchar(50) NOT NULL,
Salary int(10) NOT NULL,
DepartmentId int(4) NOT NULL,
PRIMARY KEY ('Id'));

CREATE TABLE Department(
Id int(4) NOT NULL,
Name varchar(50) NOT NULL,
PRIMARY KEY ('Id'));

INSERT INTO Employee VALUES
(1, "Joe", 70000, 1),
(2, "Jim", 90000, 1),
(3, "Henry", 80000, 2),
(4, "Sam", 60000, 2),
(5, "Max", 90000, 1);

INSERT INTO Department VALUES
(1, "IT"),
(2, "Sales");

In [None]:
"""SQL solution"""
SELECT t.Department, t.Employee, t.Salary
FROM
(SELECT d.Name AS Department, e.Name AS Employee, Salary, DepartmentId, 
DENSE_RANK() OVER(PARTITION BY DepartmentId ORDER BY Salary DESC) AS rk
FROM Employee AS e
INNER JOIN Department AS d
ON e.DepartmentId = d.Id) t
WHERE t.rk = 1

In [1]:
"""pandas dataframe creation"""
import pandas as pd

emp = [(1, "Joe", 70000, 1),
(2, "Jim", 90000, 1),
(3, "Henry", 80000, 2),
(4, "Sam", 60000, 2),
(5, "Max", 90000, 1)]

dept = [(1, "IT"),
(2, "Sales")]

Employee = pd.DataFrame(emp, columns=['Id', 'Name', 'Salary', 'DepartmentId'])
Department = pd.DataFrame(dept, columns=['Id', 'Name'])

Employee.head()

Unnamed: 0,Id,Name,Salary,DepartmentId
0,1,Joe,70000,1
1,2,Jim,90000,1
2,3,Henry,80000,2
3,4,Sam,60000,2
4,5,Max,90000,1


In [3]:
Department.head()

Unnamed: 0,Id,Name
0,1,IT
1,2,Sales


In [4]:
df = pd.merge(Employee[['Name', 'Salary', 'DepartmentId']], Department, left_on='DepartmentId', right_on='Id', suffixes=['_emp', '_dept'])
df

Unnamed: 0,Name_emp,Salary,DepartmentId,Id,Name_dept
0,Joe,70000,1,1,IT
1,Jim,90000,1,1,IT
2,Max,90000,1,1,IT
3,Henry,80000,2,2,Sales
4,Sam,60000,2,2,Sales


In [7]:
df['sal_rank'] = df.groupby('DepartmentId')['Salary'].rank(method='dense', ascending=False)
df

Unnamed: 0,Name_emp,Salary,DepartmentId,Id,Name_dept,sal_rank
0,Joe,70000,1,1,IT,2.0
1,Jim,90000,1,1,IT,1.0
2,Max,90000,1,1,IT,1.0
3,Henry,80000,2,2,Sales,1.0
4,Sam,60000,2,2,Sales,2.0


In [8]:
df[df['sal_rank'] == 1][['Name_dept', 'Name_emp', 'Salary']].rename(columns={'Name_emp': 'Employee', 'Name_dept': 'Department'})

Unnamed: 0,Department,Employee,Salary
1,IT,Jim,90000
2,IT,Max,90000
3,Sales,Henry,80000
