In [None]:
"""Find the emp_no, salary, last_name, first_name of the employee(s) with the second highest salary, without using order by"""

In [None]:
"""SQL table creation and data insertion"""
drop table if exists  `employees` ; 
drop table if exists  `salaries` ; 

CREATE TABLE `employees` (
`emp_no` int(11) NOT NULL,
`birth_date` date NOT NULL,
`first_name` varchar(14) NOT NULL,
`last_name` varchar(16) NOT NULL,
`gender` char(1) NOT NULL,
`hire_date` date NOT NULL,
PRIMARY KEY (`emp_no`));

CREATE TABLE `salaries` (
`emp_no` int(11) NOT NULL,
`salary` int(11) NOT NULL,
`from_date` date NOT NULL,
`to_date` date NOT NULL,
PRIMARY KEY (`emp_no`,`from_date`));

INSERT INTO employees VALUES(10001,'1953-09-02','Georgi','Facello','M','1986-06-26');
INSERT INTO employees VALUES(10002,'1964-06-02','Bezalel','Simmel','F','1985-11-21');
INSERT INTO employees VALUES(10003,'1959-12-03','Parto','Bamford','M','1986-08-28');
INSERT INTO employees VALUES(10004,'1954-05-01','Chirstian','Koblick','M','1986-12-01');

INSERT INTO salaries VALUES(10001,88958,'2002-06-22','9999-01-01');
INSERT INTO salaries VALUES(10002,72527,'2001-08-02','9999-01-01');
INSERT INTO salaries VALUES(10003,43311,'2001-12-01','9999-01-01');
INSERT INTO salaries VALUES(10004,74057,'2001-11-27','9999-01-01');

In [None]:
"""SQL solution"""
SELECT e.emp_no, s.salary, e.last_name, e.first_name
FROM employees AS e
INNER JOIN salaries AS s
ON e.emp_no = s.emp_no
WHERE s.salary = 
(SELECT MAX(salary)
FROM salaries 
WHERE salary != (SELECT MAX(salary) FROM salaries))

In [1]:
"""pandas dataframe creation"""
import numpy as np
import pandas as pd

emp = np.array([[10001,'1953-09-02','Georgi','Facello','M','1986-06-26'], 
                [10002,'1964-06-02','Bezalel','Simmel','F','1985-11-21'],
                [10003,'1959-12-03','Parto','Bamford','M','1986-08-28'],
                [10004,'1954-05-01','Chirstian','Koblick','M','1986-12-01']])
sal = np.array([[10001,88958,'2002-06-22','9999-01-01'],
                [10002,72527,'2001-08-02','9999-01-01'],
                [10003,43311,'2001-12-01','9999-01-01'],
                [10004,74057,'2001-11-27','9999-01-01']])

employees = pd.DataFrame(data=emp, columns=['emp_no', 'birth_date', 'first_name', 'last_name', 'gender', 'hire_date'])
salaries = pd.DataFrame(data=sal, columns=['emp_no', 'salary', 'from_date', 'to_date'])
employees.head()

Unnamed: 0,emp_no,birth_date,first_name,last_name,gender,hire_date
0,10001,1953-09-02,Georgi,Facello,M,1986-06-26
1,10002,1964-06-02,Bezalel,Simmel,F,1985-11-21
2,10003,1959-12-03,Parto,Bamford,M,1986-08-28
3,10004,1954-05-01,Chirstian,Koblick,M,1986-12-01


In [2]:
salaries.head()

Unnamed: 0,emp_no,salary,from_date,to_date
0,10001,88958,2002-06-22,9999-01-01
1,10002,72527,2001-08-02,9999-01-01
2,10003,43311,2001-12-01,9999-01-01
3,10004,74057,2001-11-27,9999-01-01


In [5]:
# merge the 2 tables and select only the columns needed
df = pd.merge(employees, salaries, on='emp_no', how='inner')
df = df[['emp_no', 'salary', 'last_name', 'first_name']]
df

Unnamed: 0,emp_no,salary,last_name,first_name
0,10001,88958,Facello,Georgi
1,10002,72527,Simmel,Bezalel
2,10003,43311,Bamford,Parto
3,10004,74057,Koblick,Chirstian


In [6]:
# add a column for saalry_rank
df1 = df.sort_values('salary', ascending=False)
df1['salary_rank'] = 1
for i in range(len(df1)):
   df1.iloc[i, -1] = df1.iloc[i-1, -1] + (df1.iloc[i, 1] < df1.iloc[i-1, 1])
df1

Unnamed: 0,emp_no,salary,last_name,first_name,salary_rank
0,10001,88958,Facello,Georgi,1
3,10004,74057,Koblick,Chirstian,2
1,10002,72527,Simmel,Bezalel,3
2,10003,43311,Bamford,Parto,4


In [11]:
# filter the rows with salary_rank
df1[df1['salary_rank']==2].iloc[:,:4]

Unnamed: 0,emp_no,salary,last_name,first_name
3,10004,74057,Koblick,Chirstian


In [20]:
# using rank()
df['salary'] = df['salary'].astype('int')
df['salary_rank'] = df['salary'].rank(method='dense', ascending=False)
df[df['salary_rank']==2].loc[:, ['emp_no', 'salary', 'last_name', 'first_name']]

Unnamed: 0,emp_no,salary,last_name,first_name
3,10004,74057,Koblick,Chirstian
