# 3.06 Combining Datasets: Concat and Append

In [2]:
import pandas as pd
import numpy as np

def make_df(cols, ind):
    """Quickly make a DataFrame"""
    data = {c: [str(c) + str(i) for i in ind]
            for c in cols}
    return pd.DataFrame(data, ind)

# example DataFrame
make_df('ABC', range(3))

Unnamed: 0,A,B,C
0,A0,B0,C0
1,A1,B1,C1
2,A2,B2,C2


In [5]:
df=pd.DataFrame([['a0','b0','c0'],
                ['a1', 'b1', 'c1'],
                ['a2','b2','c2']],
               index=[0,1,2],
               columns=['A','B','C'])

df

Unnamed: 0,A,B,C
0,a0,b0,c0
1,a1,b1,c1
2,a2,b2,c2


## Recall: Concatenation of NumPy Arrays

In [6]:
x = [1, 2, 3]
y = [4, 5, 6]
z = [7, 8, 9]
np.concatenate([x, y, z])

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [7]:

x = [[1, 2],
     [3, 4]]
np.concatenate([x, x], axis=1)

array([[1, 2, 1, 2],
       [3, 4, 3, 4]])

In [48]:
x = [[1, 2],
     [3, 4]]
np.concatenate([x, x])

array([[1, 2],
       [3, 4],
       [1, 2],
       [3, 4]])

### Simple Concatenation with pd.concat

In [8]:

ser1 = pd.Series(['A', 'B', 'C'], index=[1, 2, 3])
ser2 = pd.Series(['D', 'E', 'F'], index=[4, 5, 6])
pd.concat([ser1, ser2])

1    A
2    B
3    C
4    D
5    E
6    F
dtype: object

In [11]:
def make_df(cols, ind):
    """Quickly make a DataFrame"""
    data = {c: [str(c) + str(i) for i in ind]
            for c in cols}
    return pd.DataFrame(data, ind)

# example DataFrame
make_df('ABC', range(3))

Unnamed: 0,A,B,C
0,A0,B0,C0
1,A1,B1,C1
2,A2,B2,C2


In [13]:
df1 = make_df('AB', [1, 2])
df2 = make_df('AB', [3, 4])

df1 df2 pd.concat([df1, df2])


In [14]:
df1

Unnamed: 0,A,B
1,A1,B1
2,A2,B2


In [15]:
df2

Unnamed: 0,A,B
3,A3,B3
4,A4,B4


In [16]:
pd.concat([df1,df2])

Unnamed: 0,A,B
1,A1,B1
2,A2,B2
3,A3,B3
4,A4,B4


In [18]:

df3 = make_df('AB', [0, 1])
df4 = make_df('CD', [0, 1])
df3

Unnamed: 0,A,B
0,A0,B0
1,A1,B1


In [19]:
df4

Unnamed: 0,C,D
0,C0,D0
1,C1,D1


In [21]:
pd.concat([df3,df4])

Unnamed: 0,A,B,C,D
0,A0,B0,,
1,A1,B1,,
0,,,C0,D0
1,,,C1,D1


In [24]:
pd.concat([df3, df4], axis=1)

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1


In [26]:
pd.concat([df3, df4], axis=0)

Unnamed: 0,A,B,C,D
0,A0,B0,,
1,A1,B1,,
0,,,C0,D0
1,,,C1,D1


### Duplicate indices¶

In [28]:
x = make_df('AB', [0, 1])
y = make_df('AB', [2, 3])
y.index = x.index  # make duplicate indices!
x

Unnamed: 0,A,B
0,A0,B0
1,A1,B1


In [29]:
y

Unnamed: 0,A,B
0,A2,B2
1,A3,B3


In [30]:
pd.concat([x,y])

Unnamed: 0,A,B
0,A0,B0
1,A1,B1
0,A2,B2
1,A3,B3


In [32]:
pd.concat([x, y], ignore_index=True)

Unnamed: 0,A,B
0,A0,B0
1,A1,B1
2,A2,B2
3,A3,B3


In [33]:
x

Unnamed: 0,A,B
0,A0,B0
1,A1,B1


In [34]:
y

Unnamed: 0,A,B
0,A2,B2
1,A3,B3


In [35]:
pd.concat([x, y], keys=['x', 'y'])

Unnamed: 0,Unnamed: 1,A,B
x,0,A0,B0
x,1,A1,B1
y,0,A2,B2
y,1,A3,B3


### Concatenation with joins

In [38]:
df5 = make_df('ABC', [1, 2])
df6 = make_df('BCD', [3, 4])

df5

Unnamed: 0,A,B,C
1,A1,B1,C1
2,A2,B2,C2


In [39]:
df6

Unnamed: 0,B,C,D
3,B3,C3,D3
4,B4,C4,D4


In [40]:
pd.concat([df5, df6])

Unnamed: 0,A,B,C,D
1,A1,B1,C1,
2,A2,B2,C2,
3,,B3,C3,D3
4,,B4,C4,D4


In [42]:
pd.concat([df5, df6], join='inner')

Unnamed: 0,B,C
1,B1,C1
2,B2,C2
3,B3,C3
4,B4,C4


In [43]:
pd.concat([df5, df6], join_axes=[df5.columns])

Unnamed: 0,A,B,C
1,A1,B1,C1
2,A2,B2,C2
3,,B3,C3
4,,B4,C4


### The append() method

In [45]:
df1

Unnamed: 0,A,B
1,A1,B1
2,A2,B2


In [46]:
df2

Unnamed: 0,A,B
3,A3,B3
4,A4,B4


In [47]:
df1.append(df2)

Unnamed: 0,A,B
1,A1,B1
2,A2,B2
3,A3,B3
4,A4,B4
