# ⛓️‍💥 Pandas DataFrame Concatenation

#### » Import the Pandas and Numpy libraries

In [1]:
import numpy as np
import pandas as pd

#### » Create a random integer matrix with a shape

In [2]:
m = np.random.randint(5,30, size=(5,3))

#### » Create a DataFrame from the matrix and column names

In [3]:
df = pd.DataFrame(m, columns=["var1", "var2", "var3"])
df

Unnamed: 0,var1,var2,var3
0,11,10,8
1,12,29,17
2,14,21,25
3,29,5,27
4,16,27,23


#### » Create a new DataFrame by using dataframe operations

In [4]:
df2 = df + 9
df2

Unnamed: 0,var1,var2,var3
0,20,19,17
1,21,38,26
2,23,30,34
3,38,14,36
4,25,36,32


#### » Concatenate two dataframes vertically (default axis=0)

In [5]:
pd.concat([df, df2])

Unnamed: 0,var1,var2,var3
0,11,10,8
1,12,29,17
2,14,21,25
3,29,5,27
4,16,27,23
0,20,19,17
1,21,38,26
2,23,30,34
3,38,14,36
4,25,36,32


#### » Concatenate vertically and resetting the index

In [8]:
pd.concat([df, df2], ignore_index=True)

Unnamed: 0,var1,var2,var3
0,14,24,14
1,11,9,19
2,5,16,24
3,25,26,19
4,28,9,21
5,23,33,23
6,20,18,28
7,14,25,33
8,34,35,28
9,37,18,30


In [6]:
df.columns

Index(['var1', 'var2', 'var3'], dtype='object')

In [7]:
df2.columns = ["var1", "var2", "var4"]
df2

Unnamed: 0,var1,var2,var4
0,20,19,17
1,21,38,26
2,23,30,34
3,38,14,36
4,25,36,32


#### » Concatenate two dataframes with mismatched columns
⚠️ Will result in NaNs in unmatched columns

In [8]:
pd.concat([df, df2])

Unnamed: 0,var1,var2,var3,var4
0,11,10,8.0,
1,12,29,17.0,
2,14,21,25.0,
3,29,5,27.0,
4,16,27,23.0,
0,20,19,,17.0
1,21,38,,26.0
2,23,30,,34.0
3,38,14,,36.0
4,25,36,,32.0


#### » Use 'join="inner"' to only keep common columns

In [9]:
pd.concat([df, df2], join="inner")

Unnamed: 0,var1,var2
0,11,10
1,12,29
2,14,21
3,29,5
4,16,27
0,20,19
1,21,38
2,23,30
3,38,14
4,25,36


#### » Concatenate along columns (axis=1)

In [10]:
pd.concat([df, df2], axis=1)

Unnamed: 0,var1,var2,var3,var1.1,var2.1,var4
0,11,10,8,20,19,17
1,12,29,17,21,38,26
2,14,21,25,23,30,34
3,29,5,27,38,14,36
4,16,27,23,25,36,32


#### » Concatenate along rows (axis=0)

In [13]:
pd.concat([df, df2], axis=0)

Unnamed: 0,var1,var2,var3,var4
0,11,10,8.0,
1,12,29,17.0,
2,14,21,25.0,
3,29,5,27.0,
4,16,27,23.0,
0,20,19,,17.0
1,21,38,,26.0
2,23,30,,34.0
3,38,14,,36.0
4,25,36,,32.0


#### » Concatenate two DataFrames vertically with hierarchical indexing using `keys`

In [14]:
pd.concat([df, df2], keys=["original", "modified"])

Unnamed: 0,Unnamed: 1,var1,var2,var3,var4
original,0,11,10,8.0,
original,1,12,29,17.0,
original,2,14,21,25.0,
original,3,29,5,27.0,
original,4,16,27,23.0,
modified,0,20,19,,17.0
modified,1,21,38,,26.0
modified,2,23,30,,34.0
modified,3,38,14,,36.0
modified,4,25,36,,32.0


#### » Concatenate Series to a DataFrame horizontally (axis=1) to add a new column

In [15]:
pd.concat([df, pd.Series([1, 2, 3], name="new_col")], axis=1)

Unnamed: 0,var1,var2,var3,new_col
0,11,10,8,1.0
1,12,29,17,2.0
2,14,21,25,3.0
3,29,5,27,
4,16,27,23,
