https://realpython.com/pandas-merge-join-and-concat/

In [1]:
# what do we do if library doesn't exist??
!pip install faker



In [2]:
from faker import Faker
import pandas as pd
import random

In [3]:
f = Faker()

df1 = pd.DataFrame({
    'id': ['st_' + str(i) for i in range(0, 20)],
    'name': [f.name() for _ in range(0,20)],
    'email' : [f.email() for _ in range(0,20)]
})

In [4]:
df1

Unnamed: 0,id,name,email
0,st_0,Joseph Thompson,priscilla84@hotmail.com
1,st_1,Nancy Lee,reneemorris@hotmail.com
2,st_2,Brian Owen,trevorcontreras@gmail.com
3,st_3,Craig Parker,bonniewood@richardson.info
4,st_4,Scott Hill,vdudley@yahoo.com
5,st_5,Zachary Jones,cmccoy@hotmail.com
6,st_6,John Cline,sandra49@daugherty.org
7,st_7,Sarah Callahan,shannonpotter@brown.com
8,st_8,Anthony Horton,amckenzie@gmail.com
9,st_9,Glenda Henderson,csmith@gmail.com


In [5]:
count = 25

df2 = pd.DataFrame({
    'studentId': ['st_' + str(i) for i in range(5, count)],
    'grade1': [random.randint(50, 100) for _ in range(5,count)],
    'grade2': [random.randint(50, 100) for _ in range(5,count)],
    'grade3': [random.randint(50, 100) for _ in range(5,count)],
})

In [6]:
df2

Unnamed: 0,studentId,grade1,grade2,grade3
0,st_5,95,92,59
1,st_6,84,72,97
2,st_7,57,87,66
3,st_8,87,86,87
4,st_9,78,66,79
5,st_10,67,56,85
6,st_11,76,60,83
7,st_12,88,85,67
8,st_13,100,85,55
9,st_14,91,72,65


## Merge `df1` and `df2`: if students exist in both data frames

In [9]:
df_merged = pd.merge(df1, df2, how='inner', left_on='id', right_on='studentId')
df_merged

Unnamed: 0,id,name,email,studentId,grade1,grade2,grade3
0,st_5,Zachary Jones,cmccoy@hotmail.com,st_5,95,92,59
1,st_6,John Cline,sandra49@daugherty.org,st_6,84,72,97
2,st_7,Sarah Callahan,shannonpotter@brown.com,st_7,57,87,66
3,st_8,Anthony Horton,amckenzie@gmail.com,st_8,87,86,87
4,st_9,Glenda Henderson,csmith@gmail.com,st_9,78,66,79
5,st_10,Mark Smith,zclark@kim-silva.com,st_10,67,56,85
6,st_11,Lori Morse,smcclure@yahoo.com,st_11,76,60,83
7,st_12,Deborah Brown,michaelcox@thomas.info,st_12,88,85,67
8,st_13,Jennifer Bean,johnsonkristen@chapman.com,st_13,100,85,55
9,st_14,Eric Collins,jamesadams@robinson.com,st_14,91,72,65


## Merge `df1` and `df2`: get all students from `df1` and get from `df2` if student exists on `df2`

In [10]:
df_merged = pd.merge(df1, df2, how='left', left_on='id', right_on='studentId')
df_merged

Unnamed: 0,id,name,email,studentId,grade1,grade2,grade3
0,st_0,Joseph Thompson,priscilla84@hotmail.com,,,,
1,st_1,Nancy Lee,reneemorris@hotmail.com,,,,
2,st_2,Brian Owen,trevorcontreras@gmail.com,,,,
3,st_3,Craig Parker,bonniewood@richardson.info,,,,
4,st_4,Scott Hill,vdudley@yahoo.com,,,,
5,st_5,Zachary Jones,cmccoy@hotmail.com,st_5,95.0,92.0,59.0
6,st_6,John Cline,sandra49@daugherty.org,st_6,84.0,72.0,97.0
7,st_7,Sarah Callahan,shannonpotter@brown.com,st_7,57.0,87.0,66.0
8,st_8,Anthony Horton,amckenzie@gmail.com,st_8,87.0,86.0,87.0
9,st_9,Glenda Henderson,csmith@gmail.com,st_9,78.0,66.0,79.0


## Merge `df1` and `df2`: get all students from `df2` and get from `df1` if student exists on `df1`

In [11]:
df_merged = pd.merge(df1, df2, how='right', left_on='id', right_on='studentId')
df_merged

Unnamed: 0,id,name,email,studentId,grade1,grade2,grade3
0,st_5,Zachary Jones,cmccoy@hotmail.com,st_5,95,92,59
1,st_6,John Cline,sandra49@daugherty.org,st_6,84,72,97
2,st_7,Sarah Callahan,shannonpotter@brown.com,st_7,57,87,66
3,st_8,Anthony Horton,amckenzie@gmail.com,st_8,87,86,87
4,st_9,Glenda Henderson,csmith@gmail.com,st_9,78,66,79
5,st_10,Mark Smith,zclark@kim-silva.com,st_10,67,56,85
6,st_11,Lori Morse,smcclure@yahoo.com,st_11,76,60,83
7,st_12,Deborah Brown,michaelcox@thomas.info,st_12,88,85,67
8,st_13,Jennifer Bean,johnsonkristen@chapman.com,st_13,100,85,55
9,st_14,Eric Collins,jamesadams@robinson.com,st_14,91,72,65


## Merge `df1` and `df2`: get both records

In [12]:
df_merged = pd.merge(df1, df2, how='outer', left_on='id', right_on='studentId')
df_merged

Unnamed: 0,id,name,email,studentId,grade1,grade2,grade3
0,st_0,Joseph Thompson,priscilla84@hotmail.com,,,,
1,st_1,Nancy Lee,reneemorris@hotmail.com,,,,
2,st_2,Brian Owen,trevorcontreras@gmail.com,,,,
3,st_3,Craig Parker,bonniewood@richardson.info,,,,
4,st_4,Scott Hill,vdudley@yahoo.com,,,,
5,st_5,Zachary Jones,cmccoy@hotmail.com,st_5,95.0,92.0,59.0
6,st_6,John Cline,sandra49@daugherty.org,st_6,84.0,72.0,97.0
7,st_7,Sarah Callahan,shannonpotter@brown.com,st_7,57.0,87.0,66.0
8,st_8,Anthony Horton,amckenzie@gmail.com,st_8,87.0,86.0,87.0
9,st_9,Glenda Henderson,csmith@gmail.com,st_9,78.0,66.0,79.0
