In [10]:
import pandas as pd
import numpy as np

# Concat

In [11]:
df1 = pd.DataFrame({
    'A': ['A0','A1','A2','A3'],
    'B': ['B0','B1','B2','B3'],
    'C': ['C0','C1','C2','C3'],
    'D': ['D0','D1','D2','D3']
    })

df2 = pd.DataFrame({
    'A': ['A4','A5','A6','A7'],
    'B': ['B4','B5','B6','B7'],
    'C': ['C4','C5','C6','C7'],
    'D': ['D4','D5','D6','D7']
    })

In [12]:
pd.concat([df1,df2]) # bad indexing

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
0,A4,B4,C4,D4
1,A5,B5,C5,D5
2,A6,B6,C6,D6
3,A7,B7,C7,D7


In [13]:
pd.concat([df1,df2], ignore_index = True)

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
4,A4,B4,C4,D4
5,A5,B5,C5,D5
6,A6,B6,C6,D6
7,A7,B7,C7,D7


In [14]:
pd.concat([df1,df2], axis = 1)

Unnamed: 0,A,B,C,D,A.1,B.1,C.1,D.1
0,A0,B0,C0,D0,A4,B4,C4,D4
1,A1,B1,C1,D1,A5,B5,C5,D5
2,A2,B2,C2,D2,A6,B6,C6,D6
3,A3,B3,C3,D3,A7,B7,C7,D7


# Merge


In [15]:
left = pd.DataFrame(
{
    'key' : ['K0','K1','K2','K3'],
    'A': ['A0','A1','A2','A3'],
    'B': ['B0','B1','B2','B3']
})

right = pd.DataFrame(
{
    'key' : ['K0','K1','K2','K3'],
    'C': ['C0','C1','C2','C3'],
    'D': ['D0','D1','D2','D3']
})

In [16]:
left.merge(right, on = 'key')

Unnamed: 0,key,A,B,C,D
0,K0,A0,B0,C0,D0
1,K1,A1,B1,C1,D1
2,K2,A2,B2,C2,D2
3,K3,A3,B3,C3,D3


In [17]:
left = pd.DataFrame(
{
    'key_1' : ['K0','K1','K2','K3'],
    'A': ['A0','A1','A2','A3'],
    'B': ['B0','B1','B2','B3']
})

right = pd.DataFrame(
{
    'key_2' : ['K0','K1','K2','K3'],
    'C': ['C0','C1','C2','C3'],
    'D': ['D0','D1','D2','D3']
})

In [18]:
left.merge(right, on = 'key') #The keys are different

KeyError: 'key'

In [None]:
left.merge(right, left_on = 'key_1', right_on = 'key_2')  #fixed

Unnamed: 0,key_1,A,B,key_2,C,D
0,K0,A0,B0,K0,C0,D0
1,K1,A1,B1,K1,C1,D1
2,K2,A2,B2,K2,C2,D2
3,K3,A3,B3,K3,C3,D3


In [None]:
left = pd.DataFrame(
{
    'key_1' : ['K0','K1','K2', np.nan],
    'A': ['A0','A1','A2','A3'],
    'B': ['B0','B1','B2','B3']
})

right = pd.DataFrame(
{
    'key_2' : ['K0','K1','K2','K3'],
    'C': ['C0','C1','C2','C3'],
    'D': ['D0','D1','D2','D3']
})

In [None]:
left.merge(right, left_on = 'key_1', right_on = 'key_2')  #¿nan?

Unnamed: 0,key_1,A,B,key_2,C,D
0,K0,A0,B0,K0,C0,D0
1,K1,A1,B1,K1,C1,D1
2,K2,A2,B2,K2,C2,D2


In [None]:
left.merge(right, left_on = 'key_1', right_on = 'key_2', how = 'right')

Unnamed: 0,key_1,A,B,key_2,C,D
0,K0,A0,B0,K0,C0,D0
1,K1,A1,B1,K1,C1,D1
2,K2,A2,B2,K2,C2,D2
3,,,,K3,C3,D3


# Join

In [19]:
left = pd.DataFrame({
    'A': ['A0','A1','A2'],
    'B': ['B0','B1','B2']},
    index = ['k0','k1','k2']
    )

right = pd.DataFrame({
    'C': ['C0','C1','C2'],
    'D': ['D0','D1','D2']},
    index = ['k0','k2','k3']
    )

In [20]:
#LEFT JOIN
left.join(right)

Unnamed: 0,A,B,C,D
k0,A0,B0,C0,D0
k1,A1,B1,,
k2,A2,B2,C1,D1


In [21]:
#INNER JOIN
left.join(right, how = 'inner')

Unnamed: 0,A,B,C,D
k0,A0,B0,C0,D0
k2,A2,B2,C1,D1


In [22]:
#OUTER JOIN
left.join(right, how = 'outer')

Unnamed: 0,A,B,C,D
k0,A0,B0,C0,D0
k1,A1,B1,,
k2,A2,B2,C1,D1
k3,,,C2,D2


In [24]:
#RIGHT JOIN
left.join(right, how = 'right')

Unnamed: 0,A,B,C,D
k0,A0,B0,C0,D0
k2,A2,B2,C1,D1
k3,,,C2,D2
