# Combining & Merging Datasets in Pandas

![image.png](attachment:9927026e-7279-49eb-8feb-5a23763041f9.png)!

In [3]:
import pandas as pd
import numpy as np

## Joining DataFrame

In [4]:
data1 = {'key': ['K0', 'K1', 'K2', 'K3'],
         'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'],
        'Age':[27, 24, 22, 32],}

data2 = {'key': ['K0', 'K1', 'K2', 'K3'],
         'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'],
        'Qualification':['Btech', 'B.A', 'Bcom', 'B.hons']}

df = pd.DataFrame(data1)

df1 = pd.DataFrame(data2)

In [5]:
df

Unnamed: 0,key,Name,Age
0,K0,Jai,27
1,K1,Princi,24
2,K2,Gaurav,22
3,K3,Anuj,32


In [6]:
df1

Unnamed: 0,key,Address,Qualification
0,K0,Nagpur,Btech
1,K1,Kanpur,B.A
2,K2,Allahabad,Bcom
3,K3,Kannuaj,B.hons


In [33]:
res = pd.merge(df, df1, on='key')

res

Unnamed: 0,key,Name,Age,Address,Qualification
0,K0,Jai,27,Nagpur,Btech
1,K1,Princi,24,Kanpur,B.A
2,K2,Gaurav,22,Allahabad,Bcom
3,K3,Anuj,32,Kannuaj,B.hons


In [29]:
data1 = {'key': ['K0', 'K1', 'K2', 'K3'],
         'key1': ['K0', 'K1', 'K0', 'K1'],
         'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'],
        'Age':[27, 24, 22, 32],}

data2 = {'key': ['K0', 'K1', 'K2', 'K3'],
         'key1': ['K0', 'K0', 'K0', 'K0'],
         'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'],
        'Qualification':['Btech', 'B.A', 'Bcom', 'B.hons']}

df = pd.DataFrame(data1)

df1 = pd.DataFrame(data2)

In [30]:
res1 = pd.merge(df, df1, on=['key', 'key1'])

res1

Unnamed: 0,key,key1,Name,Age,Address,Qualification
0,K0,K0,Jai,27,Nagpur,Btech
1,K2,K0,Gaurav,22,Allahabad,Bcom


![image.png](attachment:42684336-d9a9-4159-bc37-c5d9b1be62a6.png)

In [34]:
data1 = {'key': ['K0', 'K1', 'K2', 'K3'],
         'key1': ['K0', 'K1', 'K0', 'K1'],
         'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'],
        'Age':[27, 24, 22, 32],}

data2 = {'key': ['K0', 'K1', 'K2', 'K3'],
         'key1': ['K0', 'K0', 'K0', 'K0'],
         'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'],
        'Qualification':['Btech', 'B.A', 'Bcom', 'B.hons']}

df = pd.DataFrame(data1)

df1 = pd.DataFrame(data2)


In [35]:
res = pd.merge(df, df1, how='left', on=['key', 'key1'])

res

Unnamed: 0,key,key1,Name,Age,Address,Qualification
0,K0,K0,Jai,27,Nagpur,Btech
1,K1,K1,Princi,24,,
2,K2,K0,Gaurav,22,Allahabad,Bcom
3,K3,K1,Anuj,32,,


In [36]:
res2 = pd.merge(df, df1, how='outer', on=['key', 'key1'])

res2

Unnamed: 0,key,key1,Name,Age,Address,Qualification
0,K0,K0,Jai,27.0,Nagpur,Btech
1,K1,K0,,,Kanpur,B.A
2,K1,K1,Princi,24.0,,
3,K2,K0,Gaurav,22.0,Allahabad,Bcom
4,K3,K0,,,Kannuaj,B.hons
5,K3,K1,Anuj,32.0,,


In [37]:
res3 = pd.merge(df, df1, how='inner', on=['key', 'key1'])

res3

Unnamed: 0,key,key1,Name,Age,Address,Qualification
0,K0,K0,Jai,27,Nagpur,Btech
1,K2,K0,Gaurav,22,Allahabad,Bcom


# concatinate

In [19]:
import pandas as pd

data1 = {'Name': ['Jai', 'Princi', 'Gaurav', 'Anuj'],
         'Age': [27, 24, 22, 32],
         'Address': ['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'],
         'Qualification': ['Msc', 'MA', 'MCA', 'Phd']}

data2 = {'Name': ['Abhi', 'Ayushi', 'Dhiraj', 'Hitesh'],
         'Age': [17, 14, 12, 52],
         'Address': ['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'],
         'Qualification': ['Btech', 'B.A', 'Bcom', 'B.hons']}

df = pd.DataFrame(data1, index=[0, 1, 2, 3])

df1 = pd.DataFrame(data2, index=[4, 5, 6, 7])



In [20]:
df1

Unnamed: 0,Name,Age,Address,Qualification
4,Abhi,17,Nagpur,Btech
5,Ayushi,14,Kanpur,B.A
6,Dhiraj,12,Allahabad,Bcom
7,Hitesh,52,Kannuaj,B.hons


In [21]:
df

Unnamed: 0,Name,Age,Address,Qualification
0,Jai,27,Nagpur,Msc
1,Princi,24,Kanpur,MA
2,Gaurav,22,Allahabad,MCA
3,Anuj,32,Kannuaj,Phd


In [16]:
frames = [df, df1]

res1 = pd.concat(frames)
res1

Unnamed: 0,Name,Age,Address,Qualification
0,Jai,27,Nagpur,Msc
1,Princi,24,Kanpur,MA
2,Gaurav,22,Allahabad,MCA
3,Anuj,32,Kannuaj,Phd
4,Abhi,17,Nagpur,Btech
5,Ayushi,14,Kanpur,B.A
6,Dhiraj,12,Allahabad,Bcom
7,Hitesh,52,Kannuaj,B.hons


In [8]:
import pandas as pd

data1 = {'Name': ['Jai', 'Princi', 'Gaurav', 'Anuj'],
         'Age': [27, 24, 22, 32],
         'Address': ['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'],
         'Qualification': ['Msc', 'MA', 'MCA', 'Phd'],
         'Mobile No': [97, 91, 58, 76]}

data2 = {'Name': ['Gaurav', 'Anuj', 'Dhiraj', 'Hitesh'],
         'Age': [22, 32, 12, 52],
         'Address': ['Allahabad', 'Kannuaj', 'Allahabad', 'Kannuaj'],
         'Qualification': ['MCA', 'Phd', 'Bcom', 'B.hons'],
         'Salary': [1000, 2000, 3000, 4000]}

df = pd.DataFrame(data1, index=[0, 1, 2, 3])

df1 = pd.DataFrame(data2, index=[2, 3, 6, 7])



In [9]:
df

Unnamed: 0,Name,Age,Address,Qualification,Mobile No
0,Jai,27,Nagpur,Msc,97
1,Princi,24,Kanpur,MA,91
2,Gaurav,22,Allahabad,MCA,58
3,Anuj,32,Kannuaj,Phd,76


In [10]:
df1

Unnamed: 0,Name,Age,Address,Qualification,Salary
2,Gaurav,22,Allahabad,MCA,1000
3,Anuj,32,Kannuaj,Phd,2000
6,Dhiraj,12,Allahabad,Bcom,3000
7,Hitesh,52,Kannuaj,B.hons,4000


In [11]:
res2 = pd.concat([df, df1], axis=1, join='inner')
res2

Unnamed: 0,Name,Age,Address,Qualification,Mobile No,Name.1,Age.1,Address.1,Qualification.1,Salary
2,Gaurav,22,Allahabad,MCA,58,Gaurav,22,Allahabad,MCA,1000
3,Anuj,32,Kannuaj,Phd,76,Anuj,32,Kannuaj,Phd,2000


In [12]:
res2 = pd.concat([df, df1], axis=1, join='outer')
res2

Unnamed: 0,Name,Age,Address,Qualification,Mobile No,Name.1,Age.1,Address.1,Qualification.1,Salary
0,Jai,27.0,Nagpur,Msc,97.0,,,,,
1,Princi,24.0,Kanpur,MA,91.0,,,,,
2,Gaurav,22.0,Allahabad,MCA,58.0,Gaurav,22.0,Allahabad,MCA,1000.0
3,Anuj,32.0,Kannuaj,Phd,76.0,Anuj,32.0,Kannuaj,Phd,2000.0
6,,,,,,Dhiraj,12.0,Allahabad,Bcom,3000.0
7,,,,,,Hitesh,52.0,Kannuaj,B.hons,4000.0


In [None]:
# along axis

In [26]:
data1 = {'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'],
        'Age':[27, 24, 22, 32],
        'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'],
        'Qualification':['Msc', 'MA', 'MCA', 'Phd']}

df = pd.DataFrame(data1,index=[0, 1, 2, 3])

s1 = pd.Series([1000, 2000, 3000, 4000], name='Salary')



In [27]:
res = pd.concat([df, s1], axis=1)

res

Unnamed: 0,Name,Age,Address,Qualification,Salary
0,Jai,27,Nagpur,Msc,1000
1,Princi,24,Kanpur,MA,2000
2,Gaurav,22,Allahabad,MCA,3000
3,Anuj,32,Kannuaj,Phd,4000
