# Joining DF

In [1]:
import pandas as pd

In [2]:
df1 = pd.DataFrame(
    {
        "Names" : ["Akash","Samay","Gaurav","Sham"],
        "Age" : [30,29,25,35],
        "Location" : ["Pune","Mumbai","Delhi","Chennai"],
        "Salary" : [30000,50000,60000,40000]
    }
    ,index = list("ABCD")
)
df1

Unnamed: 0,Names,Age,Location,Salary
A,Akash,30,Pune,30000
B,Samay,29,Mumbai,50000
C,Gaurav,25,Delhi,60000
D,Sham,35,Chennai,40000


In [10]:
df2 = pd.DataFrame(
    {
        "Names" : ["Akshay","Suresh","Ajinkya","Sham"],
        "Age" : [25,27,85,35],
        "Location" : ["Pune","Mumbai","Delhi","Chennai"],
        "Designation" : ["Python Developer","Data Scientist","Java Developer","Software Engineer"]
    }
    ,index = list("CDEF")
)
df2

Unnamed: 0,Names,Age,Location,Designation
C,Akshay,25,Pune,Python Developer
D,Suresh,27,Mumbai,Data Scientist
E,Ajinkya,85,Delhi,Java Developer
F,Sham,35,Chennai,Software Engineer


In [11]:
pd.concat([df1,df2])

Unnamed: 0,Names,Age,Location,Salary,Designation
A,Akash,30,Pune,30000.0,
B,Samay,29,Mumbai,50000.0,
C,Gaurav,25,Delhi,60000.0,
D,Sham,35,Chennai,40000.0,
C,Akshay,25,Pune,,Python Developer
D,Suresh,27,Mumbai,,Data Scientist
E,Ajinkya,85,Delhi,,Java Developer
F,Sham,35,Chennai,,Software Engineer


In [12]:
pd.concat([df1,df2],axis=1)

Unnamed: 0,Names,Age,Location,Salary,Names.1,Age.1,Location.1,Designation
A,Akash,30.0,Pune,30000.0,,,,
B,Samay,29.0,Mumbai,50000.0,,,,
C,Gaurav,25.0,Delhi,60000.0,Akshay,25.0,Pune,Python Developer
D,Sham,35.0,Chennai,40000.0,Suresh,27.0,Mumbai,Data Scientist
E,,,,,Ajinkya,85.0,Delhi,Java Developer
F,,,,,Sham,35.0,Chennai,Software Engineer


In [13]:
pd.concat([df1,df2],axis=1,join="outer")

Unnamed: 0,Names,Age,Location,Salary,Names.1,Age.1,Location.1,Designation
A,Akash,30.0,Pune,30000.0,,,,
B,Samay,29.0,Mumbai,50000.0,,,,
C,Gaurav,25.0,Delhi,60000.0,Akshay,25.0,Pune,Python Developer
D,Sham,35.0,Chennai,40000.0,Suresh,27.0,Mumbai,Data Scientist
E,,,,,Ajinkya,85.0,Delhi,Java Developer
F,,,,,Sham,35.0,Chennai,Software Engineer


In [14]:
pd.concat([df1,df2],axis=1,join="inner")

Unnamed: 0,Names,Age,Location,Salary,Names.1,Age.1,Location.1,Designation
C,Gaurav,25,Delhi,60000,Akshay,25,Pune,Python Developer
D,Sham,35,Chennai,40000,Suresh,27,Mumbai,Data Scientist


In [15]:
pd.concat([df1,df2],axis=0,join="inner")

Unnamed: 0,Names,Age,Location
A,Akash,30,Pune
B,Samay,29,Mumbai
C,Gaurav,25,Delhi
D,Sham,35,Chennai
C,Akshay,25,Pune
D,Suresh,27,Mumbai
E,Ajinkya,85,Delhi
F,Sham,35,Chennai


In [16]:
pd.concat([df1,df2],axis=0,join="outer")

Unnamed: 0,Names,Age,Location,Salary,Designation
A,Akash,30,Pune,30000.0,
B,Samay,29,Mumbai,50000.0,
C,Gaurav,25,Delhi,60000.0,
D,Sham,35,Chennai,40000.0,
C,Akshay,25,Pune,,Python Developer
D,Suresh,27,Mumbai,,Data Scientist
E,Ajinkya,85,Delhi,,Java Developer
F,Sham,35,Chennai,,Software Engineer


In [18]:
pd.concat([df1,df2],axis=0,join="inner")

Unnamed: 0,Names,Age,Location
A,Akash,30,Pune
B,Samay,29,Mumbai
C,Gaurav,25,Delhi
D,Sham,35,Chennai
C,Akshay,25,Pune
D,Suresh,27,Mumbai
E,Ajinkya,85,Delhi
F,Sham,35,Chennai


### merge(left_df,right_df,how="inner")

In [19]:
df1 = pd.DataFrame(
    {
        "Names" : ["Akash","Samay","Gaurav","Sham"],
        "Age" : [30,29,25,35],
        "Location" : ["Pune","Mumbai","Delhi","Chennai"],
        "Salary" : [30000,50000,60000,40000]
    }
    ,index = list("ABCD")
)
df1

Unnamed: 0,Names,Age,Location,Salary
A,Akash,30,Pune,30000
B,Samay,29,Mumbai,50000
C,Gaurav,25,Delhi,60000
D,Sham,35,Chennai,40000


In [22]:
df2 = pd.DataFrame(
    {
        "Names" : ["Akash","Suresh","Ajinkya","Sham"],
        "Age" : [30,27,85,35],
        "Location" : ["Pune","Mumbai","Delhi","Chennai"],
        "Designation" : ["Python Developer","Data Scientist","Java Developer","Software Engineer"]
    }
    ,index = list("CDEF")
)
df2

Unnamed: 0,Names,Age,Location,Designation
C,Akash,30,Pune,Python Developer
D,Suresh,27,Mumbai,Data Scientist
E,Ajinkya,85,Delhi,Java Developer
F,Sham,35,Chennai,Software Engineer


In [23]:
pd.merge(df1,df2,how="inner")

Unnamed: 0,Names,Age,Location,Salary,Designation
0,Akash,30,Pune,30000,Python Developer
1,Sham,35,Chennai,40000,Software Engineer


In [24]:
pd.merge(df1,df2,how="left")

Unnamed: 0,Names,Age,Location,Salary,Designation
0,Akash,30,Pune,30000,Python Developer
1,Samay,29,Mumbai,50000,
2,Gaurav,25,Delhi,60000,
3,Sham,35,Chennai,40000,Software Engineer


In [25]:
pd.merge(df1,df2,how="right")

Unnamed: 0,Names,Age,Location,Salary,Designation
0,Akash,30,Pune,30000.0,Python Developer
1,Suresh,27,Mumbai,,Data Scientist
2,Ajinkya,85,Delhi,,Java Developer
3,Sham,35,Chennai,40000.0,Software Engineer


In [26]:
pd.merge(df1,df2,on="Names",how="outer")

Unnamed: 0,Names,Age_x,Location_x,Salary,Age_y,Location_y,Designation
0,Akash,30.0,Pune,30000.0,30.0,Pune,Python Developer
1,Samay,29.0,Mumbai,50000.0,,,
2,Gaurav,25.0,Delhi,60000.0,,,
3,Sham,35.0,Chennai,40000.0,35.0,Chennai,Software Engineer
4,Suresh,,,,27.0,Mumbai,Data Scientist
5,Ajinkya,,,,85.0,Delhi,Java Developer


In [27]:
pd.merge(df1,df2,on="Names",how="inner")

Unnamed: 0,Names,Age_x,Location_x,Salary,Age_y,Location_y,Designation
0,Akash,30,Pune,30000,30,Pune,Python Developer
1,Sham,35,Chennai,40000,35,Chennai,Software Engineer


### df1.join(df2, join = "left)

In [28]:
df1 = pd.DataFrame(
    {
        "Col1" : [1,2,3],
        "Col2" : [10,20,30]
    },
    index = list("ABC")
)
df1

Unnamed: 0,Col1,Col2
A,1,10
B,2,20
C,3,30


In [30]:
df2 = pd.DataFrame(
    {
        "Col3" : [100,200,300],
        "Col4" : [1000,2000,3000]
    },
    index = list("ACD")
)
df2

Unnamed: 0,Col3,Col4
A,100,1000
C,200,2000
D,300,3000


In [31]:
df1.join(df2,how="inner")

Unnamed: 0,Col1,Col2,Col3,Col4
A,1,10,100,1000
C,3,30,200,2000


In [32]:
df1.join(df2,how="outer")

Unnamed: 0,Col1,Col2,Col3,Col4
A,1.0,10.0,100.0,1000.0
B,2.0,20.0,,
C,3.0,30.0,200.0,2000.0
D,,,300.0,3000.0


In [33]:
df1.join(df2,how="left")

Unnamed: 0,Col1,Col2,Col3,Col4
A,1,10,100.0,1000.0
B,2,20,,
C,3,30,200.0,2000.0


In [34]:
df1.join(df2,how="right")

Unnamed: 0,Col1,Col2,Col3,Col4
A,1.0,10.0,100,1000
C,3.0,30.0,200,2000
D,,,300,3000


In [35]:
df1 = pd.DataFrame(
    {
        "Col1" : [1,2,3,3,3,3],
        "Col2" : [10,20,30,30,30,30]
    },
    index = list("ABCDEF")
)
df1

Unnamed: 0,Col1,Col2
A,1,10
B,2,20
C,3,30
D,3,30
E,3,30
F,3,30


In [36]:
df1.drop_duplicates()

Unnamed: 0,Col1,Col2
A,1,10
B,2,20
C,3,30


In [38]:
df1.drop_duplicates(keep="first",ignore_index=True,inplace=True)

In [39]:
df1

Unnamed: 0,Col1,Col2
0,1,10
1,2,20
2,3,30
