In [None]:
"""Rank all the employees by their salaries, order by emp_no ascending for the same salary"""

In [None]:
"""SQL table creation and data insertion"""
drop table if exists  `salaries` ; 
CREATE TABLE `salaries` (
`emp_no` int(11) NOT NULL,
`salary` int(11) NOT NULL,
`from_date` date NOT NULL,
`to_date` date NOT NULL,
PRIMARY KEY (`emp_no`,`from_date`));
INSERT INTO salaries VALUES(10001,88958,'2002-06-22','9999-01-01');
INSERT INTO salaries VALUES(10002,72527,'2001-08-02','9999-01-01');
INSERT INTO salaries VALUES(10003,43311,'2001-12-01','9999-01-01');
INSERT INTO salaries VALUES(10004,72527,'2001-12-01','9999-01-01');

In [None]:
"""SQL solution"""
# 1
SELECT a.emp_no, a.salary, a.t_rank
FROM
(SELECT emp_no, salary, @rank:=IF(@prev = salary, @rank, @rank+1) AS t_rank, @prev:=salary
FROM salaries, (SELECT @rank:=0, @prev:=NULL) AS r
ORDER BY salary DESC, emp_no

# 2
SELECT emp_no, salary, @rank:= IF(@prev != (@prev:=salary), @rank+1, @rank)
FROM salaries, (SELECT @rank:=0, @prev:=-1) AS r
ORDER BY salary DESC, emp_no
 
# 3
SELECT emp_no, salary, DENSE_RANK() OVER(ORDER BY salary DESC) AS t_rank
FROM salaries
ORDER BY salary DESC, t_rank

In [1]:
"""pandas dataframe creation"""
import numpy as np
import pandas as pd

sal = np.array([[10001,88958,'2002-06-22','9999-01-01'],
                [10002,72527,'2001-08-02','9999-01-01'],
                [10003,43311,'2001-12-01','9999-01-01'],
                [10004,72527,'2001-12-01','9999-01-01']])
salaries = pd.DataFrame(data=sal, columns=['emp_no', 'salary', 'from_date', 'to_date'])
salaries.head()

Unnamed: 0,emp_no,salary,from_date,to_date
0,10001,88958,2002-06-22,9999-01-01
1,10002,72527,2001-08-02,9999-01-01
2,10003,43311,2001-12-01,9999-01-01
3,10004,72527,2001-12-01,9999-01-01


In [7]:
# sort bby values first, then compare row by row to decide the rank
df1 = salaries[['emp_no', 'salary']]
df1 = df1.sort_values(by='salary', ascending=False)
df1['salary_rank'] = 1
for i in range(1, len(df1)):
    df1.iloc[i, 2] = df1.iloc[i-1, 2] + (df1.iloc[i, 1] < df1.iloc[i-1, 1])
df1

Unnamed: 0,emp_no,salary,salary_rank
0,10001,88958,1
1,10002,72527,2
3,10004,72527,2
2,10003,43311,3


In [13]:
# use rank function
df2 = salaries[['emp_no', 'salary']]
df2['salary_rank'] = df2['salary'].rank(method='dense', ascending=False)
df2['salary_rank'] = df2['salary_rank'].astype('int')
df2.sort_values(by='salary_rank')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['salary_rank'] = df2['salary'].rank(method='dense', ascending=False)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['salary_rank'] = df2['salary_rank'].astype('int')


Unnamed: 0,emp_no,salary,salary_rank
0,10001,88958,1
1,10002,72527,2
3,10004,72527,2
2,10003,43311,3
