# 데이터 프레임 결합

- 상하 결합
- 좌우 결합

### 1. 상하 결합

In [1]:
import pandas as pd

df1 = pd.DataFrame({'A': [1, 2, 3], 'B': [11, 12, 13], 'C': [21, 22, 23]})
df2 = pd.DataFrame({'A': [4, 5, 6], 'B': [14, 15, 16], 'C': [24, 25, 26]})

In [3]:
df1

Unnamed: 0,A,B,C
0,1,11,21
1,2,12,22
2,3,13,23


In [4]:
df2

Unnamed: 0,A,B,C
0,4,14,24
1,5,15,25
2,6,16,26


In [2]:
pd.concat([df1, df2])

Unnamed: 0,A,B,C
0,1,11,21
1,2,12,22
2,3,13,23
0,4,14,24
1,5,15,25
2,6,16,26


In [5]:
pd.concat([df2, df1])

Unnamed: 0,A,B,C
0,4,14,24
1,5,15,25
2,6,16,26
0,1,11,21
1,2,12,22
2,3,13,23


index 초기화를 위해서는 ignore_index=True

In [6]:
pd.concat([df1, df2], ignore_index=True)

Unnamed: 0,A,B,C
0,1,11,21
1,2,12,22
2,3,13,23
3,4,14,24
4,5,15,25
5,6,16,26


In [7]:
# 컬럼 순서가 다를 때

df1 = pd.DataFrame({'A': [1, 2, 3], 'B': [11, 12, 13], 'C': [21, 22, 23]})
df2 = pd.DataFrame({'B': [4, 5, 6], 'A': [14, 15, 16], 'C': [24, 25, 26]})

In [9]:
pd.concat([df1, df2])   # 컬럼명 기준으로 자리를 바꿔서 잘 concat 됨

Unnamed: 0,A,B,C
0,1,11,21
1,2,12,22
2,3,13,23
0,14,4,24
1,15,5,25
2,16,6,26


서로 다른 필드로 구성되어 있는 데이터 프레임의 결합

In [10]:
df1 = pd.DataFrame({'A': [1, 2, 3], 'B': [11, 12, 13], 'C': [21, 22, 23], 'D': [31, 32, 33]})
df2 = pd.DataFrame({'A': [4, 5, 6], 'B': [14, 15, 16], 'C': [24, 25, 26], 'E': [41, 42, 43]})

In [11]:
df1

Unnamed: 0,A,B,C,D
0,1,11,21,31
1,2,12,22,32
2,3,13,23,33


In [12]:
df2

Unnamed: 0,A,B,C,E
0,4,14,24,41
1,5,15,25,42
2,6,16,26,43


In [13]:
pd.concat([df1, df2])

Unnamed: 0,A,B,C,D,E
0,1,11,21,31.0,
1,2,12,22,32.0,
2,3,13,23,33.0,
0,4,14,24,,41.0
1,5,15,25,,42.0
2,6,16,26,,43.0


In [15]:
pd.concat([df1, df2], join='outer')   # 위와 동일

Unnamed: 0,A,B,C,D,E
0,1,11,21,31.0,
1,2,12,22,32.0,
2,3,13,23,33.0,
0,4,14,24,,41.0
1,5,15,25,,42.0
2,6,16,26,,43.0


In [17]:
pd.concat([df1, df2], join='inner')   # inner 는 교집합 부분만 합하여 출력

Unnamed: 0,A,B,C
0,1,11,21
1,2,12,22
2,3,13,23
0,4,14,24
1,5,15,25
2,6,16,26


### 2. 좌우 결합

In [18]:
import pandas as pd

df1 = pd.DataFrame({'A': [1, 2, 3], 'B': [11, 12, 13], 'C': [21, 22, 23], 'D': [31, 32, 33]})
df2 = pd.DataFrame({'E': [4, 5, 6], 'F': [14, 15, 16], 'G': [24, 25, 26], 'H': [41, 42, 43]})

In [19]:
df1

Unnamed: 0,A,B,C,D
0,1,11,21,31
1,2,12,22,32
2,3,13,23,33


In [20]:
df2

Unnamed: 0,E,F,G,H
0,4,14,24,41
1,5,15,25,42
2,6,16,26,43


In [21]:
pd.concat([df1, df2], axis=1)

Unnamed: 0,A,B,C,D,E,F,G,H
0,1,11,21,31,4,14,24,41
1,2,12,22,32,5,15,25,42
2,3,13,23,33,6,16,26,43


In [23]:
df1 = pd.DataFrame({'ID': [1, 2, 3], '성별': ['F', 'M', 'F'], '나이': [20, 30, 40]})
df2 = pd.DataFrame({'ID': [1, 2, 3], '키': [160.5, 170.3, 180.1], '몸무게': [45.1, 50.3, 72.1]})

In [24]:
df1

Unnamed: 0,ID,성별,나이
0,1,F,20
1,2,M,30
2,3,F,40


In [25]:
df2

Unnamed: 0,ID,키,몸무게
0,1,160.5,45.1
1,2,170.3,50.3
2,3,180.1,72.1


In [26]:
pd.concat([df1, df2], axis=1)

Unnamed: 0,ID,성별,나이,ID.1,키,몸무게
0,1,F,20,1,160.5,45.1
1,2,M,30,2,170.3,50.3
2,3,F,40,3,180.1,72.1


다음 두 데이터 프레임을 ID 기준으로 결합

In [31]:
df1 = pd.DataFrame({'ID': [1, 2, 3, 4, 5], '성별': ['F', 'M', 'F', 'M', 'F'], '나이': [20, 30, 40, 25, 42]})
df2 = pd.DataFrame({'ID': [3, 4, 5, 6, 7], '키': [160.5, 170.3, 180.1, 142.3, 153.7], '몸무게': [45.1, 50.3, 72.1, 38,42]})

In [32]:
df1

Unnamed: 0,ID,성별,나이
0,1,F,20
1,2,M,30
2,3,F,40
3,4,M,25
4,5,F,42


In [33]:
df2

Unnamed: 0,ID,키,몸무게
0,3,160.5,45.1
1,4,170.3,50.3
2,5,180.1,72.1
3,6,142.3,38.0
4,7,153.7,42.0


In [35]:
pd.concat([df1, df2], axis=1)   # id 기준으로 맞지 않게 concat됨

Unnamed: 0,ID,성별,나이,ID.1,키,몸무게
0,1,F,20,3,160.5,45.1
1,2,M,30,4,170.3,50.3
2,3,F,40,5,180.1,72.1
3,4,M,25,6,142.3,38.0
4,5,F,42,7,153.7,42.0


df1 기준으로 결합

In [42]:
pd.merge(df1, df2, how='left', on=['ID'])

Unnamed: 0,ID,성별,나이,키,몸무게
0,1,F,20,,
1,2,M,30,,
2,3,F,40,160.5,45.1
3,4,M,25,170.3,50.3
4,5,F,42,180.1,72.1


df2 기준으로 결합

In [43]:
pd.merge(df1, df2, how='right', on=['ID'])

Unnamed: 0,ID,성별,나이,키,몸무게
0,3,F,40.0,160.5,45.1
1,4,M,25.0,170.3,50.3
2,5,F,42.0,180.1,72.1
3,6,,,142.3,38.0
4,7,,,153.7,42.0


df1, df2에 모두 있는 유저만 결합

In [44]:
pd.merge(df1, df2, how='inner', on=['ID'])

Unnamed: 0,ID,성별,나이,키,몸무게
0,3,F,40,160.5,45.1
1,4,M,25,170.3,50.3
2,5,F,42,180.1,72.1


모든 유저의 정보를 결합

In [45]:
pd.merge(df1, df2, how='outer', on=["ID"])

Unnamed: 0,ID,성별,나이,키,몸무게
0,1,F,20.0,,
1,2,M,30.0,,
2,3,F,40.0,160.5,45.1
3,4,M,25.0,170.3,50.3
4,5,F,42.0,180.1,72.1
5,6,,,142.3,38.0
6,7,,,153.7,42.0


컬럼 이름이 다른 경우에 대한 결합

In [48]:
df1 = pd.DataFrame({'USER_ID': [1, 2, 3, 4, 5], '성별': ['F', 'M', 'F', 'M', 'F'], '나이': [20, 30, 40, 25, 42]})   # 컬럼명이 ID가 아니라 USER_ID임
df2 = pd.DataFrame({'ID': [3, 4, 5, 6, 7], '키': [160.5, 170.3, 180.1, 142.3, 153.7], '몸무게': [45.1, 50.3, 72.1, 38,42]})

In [49]:
pd.merge(df1, df2, how='outer', left_on='USER_ID', right_on='ID')

Unnamed: 0,USER_ID,성별,나이,ID,키,몸무게
0,1.0,F,20.0,,,
1,2.0,M,30.0,,,
2,3.0,F,40.0,3.0,160.5,45.1
3,4.0,M,25.0,4.0,170.3,50.3
4,5.0,F,42.0,5.0,180.1,72.1
5,,,,6.0,142.3,38.0
6,,,,7.0,153.7,42.0


아래 두 dataframe을 ID 기준으로 결합
df1 = 회원 정보
df2 = 회원의 구매 내역

In [50]:
df1 = pd.DataFrame({'ID': [1, 2, 3, 4, 5], '가입일': ['2021-01-02', '2021-01-04', '2021-01-10', '2021-02-10', '2021-02-24'], '성별': ['F', 'M', 'F', 'M', 'M']})
df2 = pd.DataFrame({'구매순서': [1, 2, 3, 4, 5], 'ID': [1, 1, 2, 4, 1], '구매월': [1, 1, 2, 2, 3], '금액': [1000, 1500, 2000, 3000, 4000]})

In [51]:
df1

Unnamed: 0,ID,가입일,성별
0,1,2021-01-02,F
1,2,2021-01-04,M
2,3,2021-01-10,F
3,4,2021-02-10,M
4,5,2021-02-24,M


In [52]:
df2

Unnamed: 0,구매순서,ID,구매월,금액
0,1,1,1,1000
1,2,1,1,1500
2,3,2,2,2000
3,4,4,2,3000
4,5,1,3,4000


In [55]:
pd.merge(df1, df2, how='left', on='ID')     # 단순히 결합

Unnamed: 0,ID,가입일,성별,구매순서,구매월,금액
0,1,2021-01-02,F,1.0,1.0,1000.0
1,1,2021-01-02,F,2.0,1.0,1500.0
2,1,2021-01-02,F,5.0,3.0,4000.0
3,2,2021-01-04,M,3.0,2.0,2000.0
4,3,2021-01-10,F,,,
5,4,2021-02-10,M,4.0,2.0,3000.0
6,5,2021-02-24,M,,,
