# DataFrames Merge (== SQL Join)

<img src="./images/PandasMergeDF.png" alt="PandasMergeDF.png" style="height:10em">

In [1]:
import pandas as pd 

## Load data 

In [4]:
devs = pd.DataFrame({
    'did': [1, 2, 3, 4, 5, 6],
    'dname': ['Ivan', 'Asen', 'Maria', 'Stoyan', 'Aleks', 'Svetlin']
} )
devs

Unnamed: 0,did,dname
0,1,Ivan
1,2,Asen
2,3,Maria
3,4,Stoyan
4,5,Aleks
5,6,Svetlin


In [5]:
langs =  pd.DataFrame(
	{
		'did': [0, 1, 2, 3],
		'languages': ["C++", "Python", "R", "Java"]
	}
)

langs

Unnamed: 0,did,languages
0,0,C++
1,1,Python
2,2,R
3,3,Java


## Inner Join

![alt](./images/sql-inner-join.png)

In [6]:
dev_langs_inner = pd.merge(devs,langs,on="did",how='inner')
dev_langs_inner

Unnamed: 0,did,dname,languages
0,1,Ivan,Python
1,2,Asen,R
2,3,Maria,Java


## Outer joins

![alt](./images/sql-outer-joins.png)

### Full Outer Join

In [7]:

dev_langs_outer = pd.merge(devs,langs,on="did",how='outer')
dev_langs_outer

Unnamed: 0,did,dname,languages
0,0,,C++
1,1,Ivan,Python
2,2,Asen,R
3,3,Maria,Java
4,4,Stoyan,
5,5,Aleks,
6,6,Svetlin,


### Left outer join

In [8]:
dev_langs_left_outer = pd.merge(devs,langs,on="did",how='left')
dev_langs_left_outer

Unnamed: 0,did,dname,languages
0,1,Ivan,Python
1,2,Asen,R
2,3,Maria,Java
3,4,Stoyan,
4,5,Aleks,
5,6,Svetlin,


### Right outer join

In [9]:
dev_langs_right_outer = pd.merge(devs,langs,on="did",how='right')
dev_langs_right_outer

Unnamed: 0,did,dname,languages
0,0,,C++
1,1,Ivan,Python
2,2,Asen,R
3,3,Maria,Java


## Example Join DataFrames and Sum Columns

### Create DataFrames

In [10]:
df1 = pd.DataFrame( {
		"Name": ['John','Peter','Maria','Aron','Andrea'], 
		"ID":[3, 5, 4, 2, 1],
		'Salary1':[10, 10, 5, 8, 20]
})
df1.head(3)

Unnamed: 0,Name,ID,Salary1
0,John,3,10
1,Peter,5,10
2,Maria,4,5


In [11]:
df2 = pd.DataFrame( {
		"Name": ['Peter','John','Maria','Ivo'], 
		"ID":[5, 3, 4, 9],
		'Salary2':[5, 6, 5, 10]
})
df2.head()

Unnamed: 0,Name,ID,Salary2
0,Peter,5,5
1,John,3,6
2,Maria,4,5
3,Ivo,9,10


### Join the DataFrames


In [12]:
merged = pd.merge(df1, df2, on='ID', how='outer')
merged

Unnamed: 0,Name_x,ID,Salary1,Name_y,Salary2
0,Andrea,1,20.0,,
1,Aron,2,8.0,,
2,John,3,10.0,John,6.0
3,Maria,4,5.0,Maria,5.0
4,Peter,5,10.0,Peter,5.0
5,,9,,Ivo,10.0


### Sum columns