# 6. Renaming and Combining

In [1]:
import pandas as pd
from sklearn.datasets import load_iris

# sample df
columns = list(load_iris().feature_names)
columns = list(map(lambda x: x[:-5].replace(' ', '_'), columns))

df = pd.DataFrame(columns = columns, data = load_iris().data)
df.head(10)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
5,5.4,3.9,1.7,0.4
6,4.6,3.4,1.4,0.3
7,5.0,3.4,1.5,0.2
8,4.4,2.9,1.4,0.2
9,4.9,3.1,1.5,0.1


## Renaming

In [3]:
# rename method(칼럼명 변경)
df.rename(columns={'sepal_length':'sepal_length_renamed'}, inplace=True)  # sepal_length -> sepal_length_renamed로 수정
df

Unnamed: 0,sepal_length_renamed,sepal_width,petal_length,petal_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [4]:
# rename method(인덱스 변경)
df.rename(index={0: 'first_entry'}, inplace=True)
df

Unnamed: 0,sepal_length_renamed,sepal_width,petal_length,petal_width
first_entry,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [8]:
# rename_axis(): axis의 이름 변경
df.rename_axis('flowers', axis='rows', inplace=True)
df.rename_axis('categories', axis='columns', inplace=True)
df

categories,sepal_length_renamed,sepal_width,petal_length,petal_width
flowers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
first_entry,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


## Combining
- concat()
- join()
- merge(): merge로 할 수 있는 작업의 대부분은 join으로 더 쉽게 수행할 수 있음

In [22]:
# concat: 동일한 칼럼을 가진 두 데이터프레임을 합칠 때 유용
columns = ['a', 'b', 'c']
v1 = [[1,2,3], [10,11,12]]
v2 = [[4,5,6], [21,22,23]]

df1 = pd.DataFrame(
    columns=columns,
    data=v1
)

df2 = pd.DataFrame(
    columns=columns,
    data=v2
)

result = pd.concat([df1, df2])
result.reset_index(drop=True, inplace=True)     # 두 데이터프레임의 인덱스가 유지되므로 인덱스 reset
result

Unnamed: 0,a,b,c
0,1,2,3
1,10,11,12
2,4,5,6
3,21,22,23


In [23]:
# join: 인덱스 기준으로 합치기
columns = ['a', 'b', 'c']
v1 = [[1,2,3], [10,11,12]]
v2 = [[4,5,6], [21,22,23]]

df1 = pd.DataFrame(
    columns=columns,
    data=v1
)

df2 = pd.DataFrame(
    columns=columns,
    data=v2
)

df1.join(df2, lsuffix='_df1', rsuffix='_df2')

Unnamed: 0,a_df1,b_df1,c_df1,a_df2,b_df2,c_df2
0,1,2,3,4,5,6
1,10,11,12,21,22,23


In [28]:
# join: 칼럼 기준으로 합치기
columns = ['a', 'b', 'c']
v1 = [[1,2,3], [10,11,12]]
v2 = [[1,5,6], [10,22,23]]

df1 = pd.DataFrame(
    columns=columns,
    data=v1
)

df2 = pd.DataFrame(
    columns=columns,
    data=v2
)

df1.set_index('a').join(df2.set_index('a'), lsuffix='_df1', rsuffix='_df2')

Unnamed: 0_level_0,b_df1,c_df1,b_df2,c_df2
a,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,2,3,5,6
10,11,12,22,23
