# DataFrames Merge (== SQL Join)

<img src="./images/PandasMergeDF.png" alt="PandasMergeDF.png" style="height:10em">

In [1]:
import pandas as pd 

## Load data 

In [2]:
devs = pd.read_csv("../../datasets/developers/developers.csv", sep=";")
devs

Unnamed: 0,did,dname
0,1,Ivan
1,2,Asen
2,3,Maria
3,4,Stoyan
4,5,Aleks
5,6,Svetlin


In [3]:
langs = pd.read_csv("../../datasets/developers/languages.csv", sep=";")
langs

Unnamed: 0,did,language
0,2,"""C++"""
1,3,"""Python"""
2,3,"""R"""
3,6,"""Java"""


## Inner Join

![alt](./images/sql-inner-join.png)

In [4]:
dev_langs_inner = pd.merge(devs,langs,on="did",how='inner')
dev_langs_inner

Unnamed: 0,did,dname,language
0,2,Asen,"""C++"""
1,3,Maria,"""Python"""
2,3,Maria,"""R"""
3,6,Svetlin,"""Java"""


## Outer joins

![alt](./images/sql-outer-joins.png)

### Full Outer Join

In [5]:

dev_langs_outer = pd.merge(devs,langs,on="did",how='outer')
dev_langs_outer

Unnamed: 0,did,dname,language
0,1,Ivan,
1,2,Asen,"""C++"""
2,3,Maria,"""Python"""
3,3,Maria,"""R"""
4,4,Stoyan,
5,5,Aleks,
6,6,Svetlin,"""Java"""


### Left outer join

In [6]:
dev_langs_left_outer = pd.merge(devs,langs,on="did",how='left')
dev_langs_left_outer

Unnamed: 0,did,dname,language
0,1,Ivan,
1,2,Asen,"""C++"""
2,3,Maria,"""Python"""
3,3,Maria,"""R"""
4,4,Stoyan,
5,5,Aleks,
6,6,Svetlin,"""Java"""


### Right outer join

In [7]:
dev_langs_right_outer = pd.merge(devs,langs,on="did",how='right')
dev_langs_right_outer

Unnamed: 0,did,dname,language
0,2,Asen,"""C++"""
1,3,Maria,"""Python"""
2,3,Maria,"""R"""
3,6,Svetlin,"""Java"""


## Example Join DataFrames and Sum Columns

### Create DataFrames

In [8]:
df1 = pd.DataFrame( {
		"Name": ['John','Peter','Maria','Aron','Andrea'], 
		"ID":[3, 5, 4, 2, 1],
		'Salary1':[10, 10, 5, 8, 20]
})
df1.head(3)

Unnamed: 0,Name,ID,Salary1
0,John,3,10
1,Peter,5,10
2,Maria,4,5


In [9]:
df2 = pd.DataFrame( {
		"Name": ['Peter','John','Maria','Ivo'], 
		"ID":[5, 3, 4, 9],
		'Salary2':[5, 6, 5, 10]
})
df2.head()

Unnamed: 0,Name,ID,Salary2
0,Peter,5,5
1,John,3,6
2,Maria,4,5
3,Ivo,9,10


### Join the DataFrames


In [10]:
merged = pd.merge(df1,df2, on=['Name','ID'],how='outer')
merged

Unnamed: 0,Name,ID,Salary1,Salary2
0,John,3,10.0,6.0
1,Peter,5,10.0,5.0
2,Maria,4,5.0,5.0
3,Aron,2,8.0,
4,Andrea,1,20.0,
5,Ivo,9,,10.0


### Sum columns

In [11]:
merged['SalSum'] = merged.set_index(['Name','ID']).sum(axis=1,skipna=True).values
merged.sort_values(by='Name',axis=0)

Unnamed: 0,Name,ID,Salary1,Salary2,SalSum
4,Andrea,1,20.0,,20.0
3,Aron,2,8.0,,8.0
5,Ivo,9,,10.0,10.0
0,John,3,10.0,6.0,16.0
2,Maria,4,5.0,5.0,10.0
1,Peter,5,10.0,5.0,15.0
