In [59]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

![join](./img/joins.png)

## 1. Inner Join

![innerJoin.png](img/inner_join.png)

In [60]:
left_table = {'Date': ['01-01-2020', '02-01-2020', '03-01-2020', '04-01-2020' ],
              'CountryID': [1,1,3,2],
              'Units': [40, 25, 30, 35]}

In [61]:
left_table_df = pd.DataFrame(left_table)
left_table_df

Unnamed: 0,Date,CountryID,Units
0,01-01-2020,1,40
1,02-01-2020,1,25
2,03-01-2020,3,30
3,04-01-2020,2,35


In [62]:
right_table = {'CountryID': [3,4],
              'Country': ['Pandama', 'Spain']}

In [63]:
right_table_df = pd.DataFrame(right_table)
right_table_df

Unnamed: 0,CountryID,Country
0,3,Pandama
1,4,Spain


In [64]:
inner_join = left_table_df.merge(right_table_df, on='CountryID') # by default merge function works as inner join
inner_join

Unnamed: 0,Date,CountryID,Units,Country
0,03-01-2020,3,30,Pandama


In [65]:
right_table = {'ID': [3,4],
              'Country': ['Pandama', 'Spain']}

In [66]:
right_table_df = pd.DataFrame(right_table)
right_table_df

Unnamed: 0,ID,Country
0,3,Pandama
1,4,Spain


In [67]:
left_table_df

Unnamed: 0,Date,CountryID,Units
0,01-01-2020,1,40
1,02-01-2020,1,25
2,03-01-2020,3,30
3,04-01-2020,2,35


In [68]:
inner_join = left_table_df.merge(right_table_df, left_on='CountryID', right_on='ID')
inner_join

Unnamed: 0,Date,CountryID,Units,ID,Country
0,03-01-2020,3,30,3,Pandama


In [69]:
inner_join = left_table_df.merge(right_table_df, left_on='CountryID', right_on='ID').drop('ID',axis=1)
inner_join

Unnamed: 0,Date,CountryID,Units,Country
0,03-01-2020,3,30,Pandama


In [70]:
left_table['Country'] = ['PAK', 'IND', 'IRAN', 'USA']

In [71]:
left_table_df = pd.DataFrame(left_table)
left_table_df

Unnamed: 0,Date,CountryID,Units,Country
0,01-01-2020,1,40,PAK
1,02-01-2020,1,25,IND
2,03-01-2020,3,30,IRAN
3,04-01-2020,2,35,USA


In [72]:
inner_join = left_table_df.merge(right_table_df, left_on='CountryID', right_on='ID', suffixes=['_left', '_right']).drop('ID',axis=1)
inner_join

Unnamed: 0,Date,CountryID,Units,Country_left,Country_right
0,03-01-2020,3,30,IRAN,Pandama


## 2. LEFT JOIN

![left-join.png](img/left_join.png)

In [73]:
left_table = {'Date': ['01-01-2020', '02-01-2020', '03-01-2020', '04-01-2020' ],
              'CountryID': [1,1,3,2],
              'Units': [40, 25, 30, 35]}

left_table_df = pd.DataFrame(left_table)
left_table_df

Unnamed: 0,Date,CountryID,Units
0,01-01-2020,1,40
1,02-01-2020,1,25
2,03-01-2020,3,30
3,04-01-2020,2,35


In [74]:
right_table = {'ID': [3,4],
              'Country': ['Pandama', 'Spain']}

right_table_df = pd.DataFrame(right_table)
right_table_df

Unnamed: 0,ID,Country
0,3,Pandama
1,4,Spain


In [75]:
left_join = left_table_df.merge(right_table_df, left_on='CountryID', right_on='ID', how='left').drop('ID', axis=1)
left_join

Unnamed: 0,Date,CountryID,Units,Country
0,01-01-2020,1,40,
1,02-01-2020,1,25,
2,03-01-2020,3,30,Pandama
3,04-01-2020,2,35,


In [76]:
right_table = {'ID': [3,4],
              'Country': ['Pandama', 'Spain']}

right_table_df = pd.DataFrame(right_table)
right_table_df

Unnamed: 0,ID,Country
0,3,Pandama
1,4,Spain


In [77]:
full_outer_join = left_table_df.merge(right_table_df, left_on='CountryID', right_on='ID', how='outer')
full_outer_join

Unnamed: 0,Date,CountryID,Units,ID,Country
0,01-01-2020,1.0,40.0,,
1,02-01-2020,1.0,25.0,,
2,04-01-2020,2.0,35.0,,
3,03-01-2020,3.0,30.0,3.0,Pandama
4,,,,4.0,Spain


## 3. RIGHT JOIN

![right-join.png](img/right_join.png)

In [78]:
left_table = {'Date': ['01-01-2020', '02-01-2020', '03-01-2020', '04-01-2020' ],
              'CountryID': [1,1,3,2],
              'Units': [40, 25, 30, 35]}

left_table_df = pd.DataFrame(left_table)
left_table_df

Unnamed: 0,Date,CountryID,Units
0,01-01-2020,1,40
1,02-01-2020,1,25
2,03-01-2020,3,30
3,04-01-2020,2,35


In [79]:
right_table = {'ID': [3,4],
              'Country': ['Pandama', 'Spain']}

right_table_df = pd.DataFrame(right_table)
right_table_df

Unnamed: 0,ID,Country
0,3,Pandama
1,4,Spain


In [80]:
right_join = left_table_df.merge(right_table_df, left_on='CountryID', right_on='ID', how='right')
right_join

Unnamed: 0,Date,CountryID,Units,ID,Country
0,03-01-2020,3.0,30.0,3,Pandama
1,,,,4,Spain


## 4. FULL OUTER JOIN

![fullterOuter-join.png](img/full_outer_join.png)

In [81]:
left_table = {'Date': ['01-01-2020', '02-01-2020', '03-01-2020', '04-01-2020' ],
              'CountryID': [1,1,3,2],
              'Units': [40, 25, 30, 35]}

left_table_df = pd.DataFrame(left_table)
left_table_df

Unnamed: 0,Date,CountryID,Units
0,01-01-2020,1,40
1,02-01-2020,1,25
2,03-01-2020,3,30
3,04-01-2020,2,35


## 5. LEFT ANTI

![leftAnti-join.png](img/left_anti_join.png)

In [82]:
left_table = {'Date': ['01-01-2020', '02-01-2020', '03-01-2020', '04-01-2020' ],
              'CountryID': [1,1,3,2],
              'Units': [40, 25, 30, 35]}

In [83]:
left_table_df = pd.DataFrame(left_table)
left_table_df

Unnamed: 0,Date,CountryID,Units
0,01-01-2020,1,40
1,02-01-2020,1,25
2,03-01-2020,3,30
3,04-01-2020,2,35


In [84]:
right_table = {'ID': [3,4],
              'Country': ['Pandama', 'Spain']}

In [85]:
right_table_df = pd.DataFrame(right_table)
right_table_df

Unnamed: 0,ID,Country
0,3,Pandama
1,4,Spain


In [86]:
left_table_df

Unnamed: 0,Date,CountryID,Units
0,01-01-2020,1,40
1,02-01-2020,1,25
2,03-01-2020,3,30
3,04-01-2020,2,35


In [87]:
right_table_df

Unnamed: 0,ID,Country
0,3,Pandama
1,4,Spain


In [88]:
left_anti = left_table_df.merge(right_table_df, left_on='CountryID', right_on='ID', how='left')
left_anti

Unnamed: 0,Date,CountryID,Units,ID,Country
0,01-01-2020,1,40,,
1,02-01-2020,1,25,,
2,03-01-2020,3,30,3.0,Pandama
3,04-01-2020,2,35,,


In [89]:
# method 1
left_anti = left_table_df.merge(right_table_df, left_on='CountryID', right_on='ID', how='left')
left_anti[left_anti['Country'].isna()][['Date', 'CountryID', 'Units']]

Unnamed: 0,Date,CountryID,Units
0,01-01-2020,1,40
1,02-01-2020,1,25
3,04-01-2020,2,35


In [90]:
# method 2
left_join_df = left_table_df.merge(right_table_df,
                                   left_on="CountryID",
                                   right_on='ID',
                                   how='left', indicator=True).drop('ID', axis=1)
left_join_df

Unnamed: 0,Date,CountryID,Units,Country,_merge
0,01-01-2020,1,40,,left_only
1,02-01-2020,1,25,,left_only
2,03-01-2020,3,30,Pandama,both
3,04-01-2020,2,35,,left_only


In [91]:
left_join_df.loc[left_join_df['_merge'] == 'left_only']

Unnamed: 0,Date,CountryID,Units,Country,_merge
0,01-01-2020,1,40,,left_only
1,02-01-2020,1,25,,left_only
3,04-01-2020,2,35,,left_only


### 6. RIGHT ANTI

![rightAnti-join.png](img/right_anti_join.png)

In [92]:
left_table = {'Date': ['01-01-2020', '02-01-2020', '03-01-2020', '04-01-2020' ],
              'CountryID': [1,1,3,2],
              'Units': [40, 25, 30, 35]}
left_table_df = pd.DataFrame(left_table)
left_table_df

Unnamed: 0,Date,CountryID,Units
0,01-01-2020,1,40
1,02-01-2020,1,25
2,03-01-2020,3,30
3,04-01-2020,2,35


In [93]:
right_table = {'ID': [3,4],
              'Country': ['Pandama', 'Spain']}
right_table_df = pd.DataFrame(right_table)
right_table_df

Unnamed: 0,ID,Country
0,3,Pandama
1,4,Spain


In [94]:
right_anti = left_table_df.merge(right_table_df, left_on='CountryID', right_on='ID', how='right')
right_anti

Unnamed: 0,Date,CountryID,Units,ID,Country
0,03-01-2020,3.0,30.0,3,Pandama
1,,,,4,Spain


In [95]:
# method 1
right_anti[right_anti['CountryID'].isna()][['Date', 'CountryID', 'Units','Country']]

Unnamed: 0,Date,CountryID,Units,Country
1,,,,Spain


In [96]:
# method 2
right_anti = left_table_df.merge(right_table_df, left_on='CountryID', right_on='ID', how='right', indicator=True)
right_anti

Unnamed: 0,Date,CountryID,Units,ID,Country,_merge
0,03-01-2020,3.0,30.0,3,Pandama,both
1,,,,4,Spain,right_only


In [97]:
right_anti.loc[right_anti['_merge'] == 'right_only']

Unnamed: 0,Date,CountryID,Units,ID,Country,_merge
1,,,,4,Spain,right_only


In [98]:
right_anti.loc[right_anti['_merge'] == 'right_only'].drop(['_merge', 'ID'], axis=1)

Unnamed: 0,Date,CountryID,Units,Country
1,,,,Spain


### TASK

In [99]:
employees = {
    'Emp_ID': range(1001, 1021),
    'Name': [
        'Ali', 'Sara', 'Umer', 'Hina', 'Bilal', 'Ayesha', 'Raza', 'Mona', 'Danish', 'Tuba',
        'Kashif', 'Iqra', 'Ahmed', 'Nida', 'Owais', 'Farah', 'Zain', 'Mehak', 'Haris', 'Amna'
    ],
    'Dept_ID': [101,102,103,104,105,101,102,103,104,105,101,102,103,104,105,101,102,103,104,105],
    'Salary': [
        70000, 82000, 60000, 90000, 75000, 71000, 88000, 64000, 94000, 77000,
        73000, 85000, 62000, 91000, 76000, 70500, 89500, 65000, 93000, 78000
    ],
    'Join_Year': [
        2018,2019,2020,2018,2017,2019,2018,2021,2022,2020,
        2017,2018,2019,2021,2022,2023,2019,2020,2021,2022
    ]
}

emp_df = pd.DataFrame(employees)
emp_df

Unnamed: 0,Emp_ID,Name,Dept_ID,Salary,Join_Year
0,1001,Ali,101,70000,2018
1,1002,Sara,102,82000,2019
2,1003,Umer,103,60000,2020
3,1004,Hina,104,90000,2018
4,1005,Bilal,105,75000,2017
5,1006,Ayesha,101,71000,2019
6,1007,Raza,102,88000,2018
7,1008,Mona,103,64000,2021
8,1009,Danish,104,94000,2022
9,1010,Tuba,105,77000,2020


In [100]:
departments = {
    'Dept_ID': [101,102,103,104,105,106],
    'Dept_Name': ['HR','IT','Finance','Marketing','Sales','Operations'],
    'Manager': ['Faisal','Asma','Kamran','Tariq','Sadia','Hassan'],
    'Location': ['Lahore','Karachi','Islamabad','Lahore','Karachi','Faisalabad']
}

dep_df = pd.DataFrame(departments)
dep_df

Unnamed: 0,Dept_ID,Dept_Name,Manager,Location
0,101,HR,Faisal,Lahore
1,102,IT,Asma,Karachi
2,103,Finance,Kamran,Islamabad
3,104,Marketing,Tariq,Lahore
4,105,Sales,Sadia,Karachi
5,106,Operations,Hassan,Faisalabad


## 1. INNER JOIN 


In [101]:
inner_join = emp_df.merge(dep_df, left_on='Dept_ID', right_on='Dept_ID')
inner_join

Unnamed: 0,Emp_ID,Name,Dept_ID,Salary,Join_Year,Dept_Name,Manager,Location
0,1001,Ali,101,70000,2018,HR,Faisal,Lahore
1,1002,Sara,102,82000,2019,IT,Asma,Karachi
2,1003,Umer,103,60000,2020,Finance,Kamran,Islamabad
3,1004,Hina,104,90000,2018,Marketing,Tariq,Lahore
4,1005,Bilal,105,75000,2017,Sales,Sadia,Karachi
5,1006,Ayesha,101,71000,2019,HR,Faisal,Lahore
6,1007,Raza,102,88000,2018,IT,Asma,Karachi
7,1008,Mona,103,64000,2021,Finance,Kamran,Islamabad
8,1009,Danish,104,94000,2022,Marketing,Tariq,Lahore
9,1010,Tuba,105,77000,2020,Sales,Sadia,Karachi


## 2. LEFT JOIN

In [102]:
left_join = emp_df.merge(dep_df, left_on='Dept_ID', right_on='Dept_ID', how='left')
left_join

Unnamed: 0,Emp_ID,Name,Dept_ID,Salary,Join_Year,Dept_Name,Manager,Location
0,1001,Ali,101,70000,2018,HR,Faisal,Lahore
1,1002,Sara,102,82000,2019,IT,Asma,Karachi
2,1003,Umer,103,60000,2020,Finance,Kamran,Islamabad
3,1004,Hina,104,90000,2018,Marketing,Tariq,Lahore
4,1005,Bilal,105,75000,2017,Sales,Sadia,Karachi
5,1006,Ayesha,101,71000,2019,HR,Faisal,Lahore
6,1007,Raza,102,88000,2018,IT,Asma,Karachi
7,1008,Mona,103,64000,2021,Finance,Kamran,Islamabad
8,1009,Danish,104,94000,2022,Marketing,Tariq,Lahore
9,1010,Tuba,105,77000,2020,Sales,Sadia,Karachi


## 3. RIGHT JOIN

In [103]:
right_join = emp_df.merge(dep_df, left_on='Dept_ID', right_on='Dept_ID', how='right')
right_join

Unnamed: 0,Emp_ID,Name,Dept_ID,Salary,Join_Year,Dept_Name,Manager,Location
0,1001.0,Ali,101,70000.0,2018.0,HR,Faisal,Lahore
1,1006.0,Ayesha,101,71000.0,2019.0,HR,Faisal,Lahore
2,1011.0,Kashif,101,73000.0,2017.0,HR,Faisal,Lahore
3,1016.0,Farah,101,70500.0,2023.0,HR,Faisal,Lahore
4,1002.0,Sara,102,82000.0,2019.0,IT,Asma,Karachi
5,1007.0,Raza,102,88000.0,2018.0,IT,Asma,Karachi
6,1012.0,Iqra,102,85000.0,2018.0,IT,Asma,Karachi
7,1017.0,Zain,102,89500.0,2019.0,IT,Asma,Karachi
8,1003.0,Umer,103,60000.0,2020.0,Finance,Kamran,Islamabad
9,1008.0,Mona,103,64000.0,2021.0,Finance,Kamran,Islamabad


## 4. FULL OUTER JOIN

In [104]:
full_outer_join = emp_df.merge(dep_df, left_on='Dept_ID', right_on='Dept_ID', how='outer')
full_outer_join

Unnamed: 0,Emp_ID,Name,Dept_ID,Salary,Join_Year,Dept_Name,Manager,Location
0,1001.0,Ali,101,70000.0,2018.0,HR,Faisal,Lahore
1,1006.0,Ayesha,101,71000.0,2019.0,HR,Faisal,Lahore
2,1011.0,Kashif,101,73000.0,2017.0,HR,Faisal,Lahore
3,1016.0,Farah,101,70500.0,2023.0,HR,Faisal,Lahore
4,1002.0,Sara,102,82000.0,2019.0,IT,Asma,Karachi
5,1007.0,Raza,102,88000.0,2018.0,IT,Asma,Karachi
6,1012.0,Iqra,102,85000.0,2018.0,IT,Asma,Karachi
7,1017.0,Zain,102,89500.0,2019.0,IT,Asma,Karachi
8,1003.0,Umer,103,60000.0,2020.0,Finance,Kamran,Islamabad
9,1008.0,Mona,103,64000.0,2021.0,Finance,Kamran,Islamabad


## 5. LEFT ANTI JOIN

In [105]:
left_anti_join = emp_df.merge(dep_df, left_on='Dept_ID', right_on='Dept_ID', how='left')
left_anti_join[left_anti_join['Dept_ID'].isna()][['Emp_ID', 'Name', 'Dept_ID',	'Salary', 'Join_Year', 'Dept_Name', 'Manager', 'Location']]

Unnamed: 0,Emp_ID,Name,Dept_ID,Salary,Join_Year,Dept_Name,Manager,Location


In [112]:
left_anti_join = emp_df.merge(dep_df, left_on='Dept_ID', right_on='Dept_ID', how='left', indicator=True)
left_anti_join[left_anti_join['_merge'] == 'left_only']


Unnamed: 0,Emp_ID,Name,Dept_ID,Salary,Join_Year,Dept_Name,Manager,Location,_merge


## 6. RIGHT ANTI

In [110]:
right_anti_join = emp_df.merge(dep_df, left_on='Dept_ID', right_on='Dept_ID', indicator=True)
right_anti_join[right_anti_join['_merge'] == 'right_only']

Unnamed: 0,Emp_ID,Name,Dept_ID,Salary,Join_Year,Dept_Name,Manager,Location,_merge


In [111]:
right_anti_join = emp_df.merge(dep_df, left_on='Dept_ID', right_on='Dept_ID', how='right')
right_anti_join[right_anti_join['Dept_ID'].isna()]

Unnamed: 0,Emp_ID,Name,Dept_ID,Salary,Join_Year,Dept_Name,Manager,Location
