# ⛓️‍💥 Pandas DataFrame Concatenation

#### » Importing the Pandas and Numpy libraries

In [1]:
import numpy as np
import pandas as pd

#### » Creating a random integer matrix with a shape

In [2]:
m = np.random.randint(5,30, size=(5,3))

#### » Creating a DataFrame from the matrix and column names

In [3]:
df = pd.DataFrame(m, columns=["var1", "var2", "var3"])

In [4]:
df

Unnamed: 0,var1,var2,var3
0,14,24,14
1,11,9,19
2,5,16,24
3,25,26,19
4,28,9,21


#### » Creating a new DataFrame by using dataframe operations

In [5]:
df2 = df + 9

In [6]:
df2

Unnamed: 0,var1,var2,var3
0,23,33,23
1,20,18,28
2,14,25,33
3,34,35,28
4,37,18,30


#### » Concatenating two dataframes vertically (default axis=0)

In [7]:
pd.concat([df, df2])

Unnamed: 0,var1,var2,var3
0,14,24,14
1,11,9,19
2,5,16,24
3,25,26,19
4,28,9,21
0,23,33,23
1,20,18,28
2,14,25,33
3,34,35,28
4,37,18,30


#### » Concatenating vertically and resetting the index

In [8]:
pd.concat([df, df2], ignore_index=True)

Unnamed: 0,var1,var2,var3
0,14,24,14
1,11,9,19
2,5,16,24
3,25,26,19
4,28,9,21
5,23,33,23
6,20,18,28
7,14,25,33
8,34,35,28
9,37,18,30


In [9]:
df.columns

Index(['var1', 'var2', 'var3'], dtype='object')

In [10]:
df2.columns = ["var1", "var2", "var4"]

In [11]:
df2

Unnamed: 0,var1,var2,var4
0,23,33,23
1,20,18,28
2,14,25,33
3,34,35,28
4,37,18,30


#### » Concatenating two dataframes with mismatched columns
⚠️ Will result in NaNs in unmatched columns

In [12]:
pd.concat([df, df2])

Unnamed: 0,var1,var2,var3,var4
0,14,24,14.0,
1,11,9,19.0,
2,5,16,24.0,
3,25,26,19.0,
4,28,9,21.0,
0,23,33,,23.0
1,20,18,,28.0
2,14,25,,33.0
3,34,35,,28.0
4,37,18,,30.0


#### » Using 'join="inner"' to only keep common columns

In [13]:
pd.concat([df, df2], join="inner")

Unnamed: 0,var1,var2
0,14,24
1,11,9
2,5,16
3,25,26
4,28,9
0,23,33
1,20,18
2,14,25
3,34,35
4,37,18


In [14]:
?pd.concat

[0;31mSignature:[0m
[0mpd[0m[0;34m.[0m[0mconcat[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mobjs[0m[0;34m:[0m [0;34m'Iterable[Series | DataFrame] | Mapping[HashableT, Series | DataFrame]'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0;34m*[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0maxis[0m[0;34m:[0m [0;34m'Axis'[0m [0;34m=[0m [0;36m0[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mjoin[0m[0;34m:[0m [0;34m'str'[0m [0;34m=[0m [0;34m'outer'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mignore_index[0m[0;34m:[0m [0;34m'bool'[0m [0;34m=[0m [0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mkeys[0m[0;34m:[0m [0;34m'Iterable[Hashable] | None'[0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mlevels[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mnames[0m[0;34m:[0m [0;34m'list[HashableT] | None'[0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mverify_integrity[0m[0;

#### » Concatenating along columns (axis=1)

In [15]:
pd.concat([df, df2], axis=1)

Unnamed: 0,var1,var2,var3,var1.1,var2.1,var4
0,14,24,14,23,33,23
1,11,9,19,20,18,28
2,5,16,24,14,25,33
3,25,26,19,34,35,28
4,28,9,21,37,18,30


In [16]:
pd.concat([df, df2], axis=0)

Unnamed: 0,var1,var2,var3,var4
0,14,24,14.0,
1,11,9,19.0,
2,5,16,24.0,
3,25,26,19.0,
4,28,9,21.0,
0,23,33,,23.0
1,20,18,,28.0
2,14,25,,33.0
3,34,35,,28.0
4,37,18,,30.0


#### » Concatenating two DataFrames vertically with hierarchical indexing using `keys`

In [20]:
pd.concat([df, df2], keys=["original", "modified"])

Unnamed: 0,Unnamed: 1,var1,var2,var3,var4
original,0,9,26,26.0,
original,1,9,17,23.0,
original,2,10,25,8.0,
original,3,8,15,11.0,
original,4,5,27,8.0,
modified,0,18,35,,35.0
modified,1,18,26,,32.0
modified,2,19,34,,17.0
modified,3,17,24,,20.0
modified,4,14,36,,17.0


#### » Concatenating a Series to a DataFrame horizontally (axis=1) to add a new column

In [19]:
pd.concat([df, pd.Series([1, 2, 3], name="new_col")], axis=1)

Unnamed: 0,var1,var2,var3,new_col
0,14,24,14,1.0
1,11,9,19,2.0
2,5,16,24,3.0
3,25,26,19,
4,28,9,21,
