### [ Part 6장 ]

## 함수매핑

### 1-1 개별 원소에 함수 매핑 <hr>
- apply() 메소드 사용

In [360]:
# 모듈 불러오기
import pandas as pd
import numpy as np

In [361]:
# 파일 불러오기
DATA_FILE = '../DATA/titanic.csv'
titanic = pd.read_csv(DATA_FILE)
df = titanic.loc[:,['age','fare']]

df.head()


Unnamed: 0,age,fare
0,22.0,7.25
1,38.0,71.2833
2,26.0,7.925
3,35.0,53.1
4,35.0,8.05


In [362]:
# 사용자 함수 정의
def add_10(n):
    return n + 10

def add_two_obj(a,b):
    return a + b

print(add_10(10))
print(add_two_obj(10,10))

# apply 함수 사용
sr1 = df['age'].apply(add_10)
sr1.head()

20
20


0    32.0
1    48.0
2    36.0
3    45.0
4    45.0
Name: age, dtype: float64

In [363]:
# lambda로 apply 사용
sr2 = df.age.apply(lambda x:x+10)
sr2

# 함수 매개변수가 2개 이상일 때
sr3 = df.age.apply(add_two_obj,b=10)
sr3

# lambda 매개변수 2개일 때
sr4 = df.age.apply(lambda x, y: x + y, y=10)
sr4

0      32.0
1      48.0
2      36.0
3      45.0
4      45.0
       ... 
886    37.0
887    29.0
888     NaN
889    36.0
890    42.0
Name: age, Length: 891, dtype: float64

In [364]:
# map 사용 -> apply랑 비슷함. 근데 apply가 더 범용적임
def over_thirty(age):
    return age > 30

sr_map = df.age.map(over_thirty)
sr_map

0      False
1       True
2      False
3       True
4       True
       ...  
886    False
887    False
888    False
889    False
890     True
Name: age, Length: 891, dtype: bool

In [365]:
print(titanic.sex.unique())
print(titanic.sex.head())

over_forty_dict = {'male':0, 'female':1}
titanic.gender = titanic.sex.map(over_forty_dict)
print(titanic.gender.head())


['male' 'female']
0      male
1    female
2    female
3    female
4      male
Name: sex, dtype: object
0    0
1    1
2    1
3    1
4    0
Name: sex, dtype: int64


  titanic.gender = titanic.sex.map(over_forty_dict)


### 1-2 데이터프레임의 각 열에 함수 매핑 <hr>


In [366]:
## 조건식을 만족하는 열 필터링 - 평균값이 30을 초과하는 열만 필터링
fil = df.apply(lambda x:x.mean()>30)
print(fil)
df1 = df.loc[:, fil]
df1


age     False
fare     True
dtype: bool


Unnamed: 0,fare
0,7.2500
1,71.2833
2,7.9250
3,53.1000
4,8.0500
...,...
886,13.0000
887,30.0000
888,23.4500
889,30.0000


In [367]:
## 조건부 열 추가 - 각 행의 평균값이 50을 초과하는지 여부에 따라 High 열 추가
df['high'] = df.apply(lambda x: 'o' if x.mean()>50 else 'x', axis=1)
df



Unnamed: 0,age,fare,high
0,22.0,7.2500,x
1,38.0,71.2833,o
2,26.0,7.9250,x
3,35.0,53.1000,x
4,35.0,8.0500,x
...,...,...,...
886,27.0,13.0000,x
887,19.0,30.0000,x
888,,23.4500,x
889,26.0,30.0000,x


In [368]:
## isnull
df.pipe(lambda x:x.isnull())

Unnamed: 0,age,fare,high
0,False,False,False
1,False,False,False
2,False,False,False
3,False,False,False
4,False,False,False
...,...,...,...
886,False,False,False
887,False,False,False
888,True,False,False
889,False,False,False


## 2-1 <hr>

In [369]:
## 열 순서 바꾸기
df = titanic.loc[:, 'survived':'age']
df

columns = list(df.columns.values)
print(columns)

df_col = df[['sex', 'age', 'pclass','survived']] ## 리스트안에 리스트로 넣어야됨
df_col


['survived', 'pclass', 'sex', 'age']


Unnamed: 0,sex,age,pclass,survived
0,male,22.0,3,0
1,female,38.0,1,1
2,female,26.0,3,1
3,female,35.0,1,1
4,male,35.0,3,0
...,...,...,...,...
886,male,27.0,2,0
887,female,19.0,1,1
888,female,,3,0
889,male,26.0,1,1


## 2-2 <hr>

In [370]:
## 열 분리
df = pd.read_csv('../DATA/상품_매출.csv')
df.월 = df.월.astype('str')
df.head()

## 하나씩 분리해서 가져온 뒤 다시 하나씩 저장하기
data = df.월.str.split('-')
data.head()
df['년'] = data.str.get(0)
df['월_'] = data.str.get(1)
df

## 한번에 expand=True 하면 DataFrame으로 저장할 수 있음
data = df.월.str.split('-',expand=True)
data

Unnamed: 0,0,1
0,2025,1
1,2025,2
2,2025,3
3,2025,4
4,2025,5
5,2025,1
6,2025,2
7,2025,3
8,2025,4
9,2025,5


## 3 그룹연산

### 3-1 <hr>


In [371]:
# titanic 파일 불러오기
DATA_FILE = '../DATA/titanic.csv'
titanic = pd.read_csv(DATA_FILE)
df = titanic.loc[:,['age','fare','class','sex','survived']]

df.head()

Unnamed: 0,age,fare,class,sex,survived
0,22.0,7.25,Third,male,0
1,38.0,71.2833,First,female,1
2,26.0,7.925,Third,female,1
3,35.0,53.1,First,female,1
4,35.0,8.05,Third,male,0


In [372]:
group = df.groupby(['class'], observed=True)
group.head()

for key, value in group:
    print('key : ', key)
    print('number : ', len(value))
    print(group.head())

key :  ('First',)
number :  216
     age     fare   class     sex  survived
0   22.0   7.2500   Third    male         0
1   38.0  71.2833   First  female         1
2   26.0   7.9250   Third  female         1
3   35.0  53.1000   First  female         1
4   35.0   8.0500   Third    male         0
5    NaN   8.4583   Third    male         0
6   54.0  51.8625   First    male         0
7    2.0  21.0750   Third    male         0
9   14.0  30.0708  Second  female         1
11  58.0  26.5500   First  female         1
15  55.0  16.0000  Second  female         1
17   NaN  13.0000  Second    male         1
20  35.0  26.0000  Second    male         0
21  34.0  13.0000  Second    male         1
23  28.0  35.5000   First    male         1
key :  ('Second',)
number :  184
     age     fare   class     sex  survived
0   22.0   7.2500   Third    male         0
1   38.0  71.2833   First  female         1
2   26.0   7.9250   Third  female         1
3   35.0  53.1000   First  female         1
4   35.0   

In [373]:
average = group.mean(numeric_only=True)
average

Unnamed: 0_level_0,age,fare,survived
class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
First,38.233441,84.154687,0.62963
Second,29.87763,20.662183,0.472826
Third,25.14062,13.67555,0.242363


In [374]:
## 원하는 열의 원하는 값을 뽑기 [분할]
## 방법 1
group1 = group.get_group('First')
group1

## 방법 2
#mask = df.loc[:, 'class']=='First'
#df = df[mask]
#df

  group1 = group.get_group('First')


Unnamed: 0,age,fare,class,sex,survived
1,38.0,71.2833,First,female,1
3,35.0,53.1000,First,female,1
6,54.0,51.8625,First,male,0
11,58.0,26.5500,First,female,1
23,28.0,35.5000,First,male,1
...,...,...,...,...,...
871,47.0,52.5542,First,female,1
872,33.0,5.0000,First,male,0
879,56.0,83.1583,First,female,1
887,19.0,30.0000,First,female,1


### 3-2 <hr>

In [375]:
gr = df.groupby(['class'], observed=True)
std_all = gr.std(numeric_only=True)
std_all

Unnamed: 0_level_0,age,fare,survived
class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
First,14.802856,78.380373,0.484026
Second,14.001077,13.417399,0.500623
Third,12.495398,11.778142,0.428949


In [376]:
## as_index=False
# : 이걸 False로 두면 새로운 행 인덱스를 만들지 않음
gr = df.groupby(['class'], observed=True, as_index=True).std(numeric_only=True) 
gr.head()

Unnamed: 0_level_0,age,fare,survived
class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
First,14.802856,78.380373,0.484026
Second,14.001077,13.417399,0.500623
Third,12.495398,11.778142,0.428949


In [377]:
## class age 묶고 -> 각 고유값이 가진 개수 출력
gr2 = df.groupby(['class', 'age'], observed=True)
gr2[['class','age']].value_counts()

class  age  
First  0.92     1
       2.00     1
       4.00     1
       11.00    1
       14.00    1
               ..
Third  61.00    1
       63.00    1
       65.00    1
       70.50    1
       74.00    1
Name: count, Length: 182, dtype: int64

In [378]:
z = gr2.aggregate('mean',numeric_only=True)
z

Unnamed: 0_level_0,Unnamed: 1_level_0,fare,survived
class,age,Unnamed: 2_level_1,Unnamed: 3_level_1
First,0.92,151.5500,1.0
First,2.00,151.5500,0.0
First,4.00,81.8583,1.0
First,11.00,120.0000,1.0
First,14.00,120.0000,1.0
...,...,...,...
Third,61.00,6.2375,0.0
Third,63.00,9.5875,1.0
Third,65.00,7.7500,0.0
Third,70.50,7.7500,0.0


In [379]:
gr2.agg(['min','max'])

Unnamed: 0_level_0,Unnamed: 1_level_0,fare,fare,sex,sex,survived,survived
Unnamed: 0_level_1,Unnamed: 1_level_1,min,max,min,max,min,max
class,age,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
First,0.92,151.5500,151.5500,male,male,1,1
First,2.00,151.5500,151.5500,female,female,0,0
First,4.00,81.8583,81.8583,male,male,1,1
First,11.00,120.0000,120.0000,male,male,1,1
First,14.00,120.0000,120.0000,female,female,1,1
...,...,...,...,...,...,...,...
Third,61.00,6.2375,6.2375,male,male,0,0
Third,63.00,9.5875,9.5875,female,female,1,1
Third,65.00,7.7500,7.7500,male,male,0,0
Third,70.50,7.7500,7.7500,male,male,0,0


In [380]:
gr2.agg({'fare':['min','max'], 'age':'mean'}) 


Unnamed: 0_level_0,Unnamed: 1_level_0,fare,fare,age
Unnamed: 0_level_1,Unnamed: 1_level_1,min,max,mean
class,age,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
First,0.92,151.5500,151.5500,0.92
First,2.00,151.5500,151.5500,2.00
First,4.00,81.8583,81.8583,4.00
First,11.00,120.0000,120.0000,11.00
First,14.00,120.0000,120.0000,14.00
...,...,...,...,...
Third,61.00,6.2375,6.2375,61.00
Third,63.00,9.5875,9.5875,63.00
Third,65.00,7.7500,7.7500,65.00
Third,70.50,7.7500,7.7500,70.50


In [381]:
## nth()
# : 클래스에서 원하는 행 인덱스 값 뽑아오기
gr3 = df.groupby(['class'],observed=True, as_index=False)
gr3.head()
g1 = gr3.nth(1)

## sort_values('열이름',accending=True)
#  : 출력 dataFrame에서 원하는 열을 정렬하기 
a = g1.sort_values('class')
display(a)

g1 = g1.set_index(['age','class','sex','fare'])
a = g1.sort_index(level=3, ascending=True)
display(a)

Unnamed: 0,age,fare,class,sex,survived
3,35.0,53.1,First,female,1
15,55.0,16.0,Second,female,1
2,26.0,7.925,Third,female,1


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,survived
age,class,sex,fare,Unnamed: 4_level_1
26.0,Third,female,7.925,1
55.0,Second,female,16.0,1
35.0,First,female,53.1,1


In [382]:
# filter랑 apply 비교
# filter
# display(df.head())
grb = df.groupby('class', observed=True)
age_filter = grb.filter(lambda x:x['age'].mean() < 30)      ## 그룹을 나누고 해당 그룹의 age값 평균을 필터링함
age_filter

age_filter = grb.apply(lambda x:x['age'].mean() < 30)      ## 그룹을 나누고 해당 그룹의 age값 평균을 필터링함

df.loc[df['class'].isin(age_filter[age_filter==True].index), ['age','survived']]

  age_filter = grb.apply(lambda x:x['age'].mean() < 30)      ## 그룹을 나누고 해당 그룹의 age값 평균을 필터링함


Unnamed: 0,age,survived
0,22.0,0
2,26.0,1
4,35.0,0
5,,0
7,2.0,0
...,...,...
884,25.0,0
885,39.0,0
886,27.0,0
888,,0


## 4
멀티인덱스


### 4-1 <hr>

In [383]:
## 그룹 만들기
group = df.groupby(['class','sex'], observed=True)
gdf = group.agg(['mean','std'], numeric_only=True)
gdf

Unnamed: 0_level_0,Unnamed: 1_level_0,age,age,fare,fare,survived,survived
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std
class,sex,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
First,female,34.611765,13.612052,106.125798,74.259988,0.968085,0.176716
First,male,41.281386,15.13957,67.226127,77.548021,0.368852,0.484484
Second,female,28.722973,12.872702,21.970121,10.891796,0.921053,0.271448
Second,male,30.740707,14.793894,19.741782,14.922235,0.157407,0.365882
Third,female,21.75,12.729964,16.11881,11.690314,0.5,0.501745
Third,male,26.507589,12.159514,12.661633,11.681696,0.135447,0.342694


In [384]:
## 리스트 써서 멀티인덱스 만들기
## name = letter, number
## a a b b / 1 2 1 2
data = [['a','a','b','b'], ['1','2','1','2']]
mult_array = pd.MultiIndex.from_arrays(data, names=['letter','number'])
mult_array



MultiIndex([('a', '1'),
            ('a', '2'),
            ('b', '1'),
            ('b', '2')],
           names=['letter', 'number'])

In [385]:
## 튜플로 멀티인덱스 만들기
## 위와 동일
data = (('a',1), ('b',2), ('a',1), ('b',2))
mult_tuple = pd.MultiIndex.from_tuples(data, names=('letter','number'))
mult_tuple

MultiIndex([('a', 1),
            ('b', 2),
            ('a', 1),
            ('b', 2)],
           names=['letter', 'number'])

In [386]:
## 데이터 프레임으로 멀티 인덱스 만들기
dataDF = pd.DataFrame([['a',1], ['b',2], ['a',1], ['b',2]], columns=['letter', 'number'])
display(dataDF)

mult = pd.MultiIndex.from_frame(dataDF, names=dataDF.columns)
mult

Unnamed: 0,letter,number
0,a,1
1,b,2
2,a,1
3,b,2


MultiIndex([('a', 1),
            ('b', 2),
            ('a', 1),
            ('b', 2)],
           names=['letter', 'number'])

In [387]:
gdf.columns.get_level_values(1)

Index(['mean', 'std', 'mean', 'std', 'mean', 'std'], dtype='object')

In [388]:
## class = First / sex = female 의 'age' 선택
display(gdf)
gdf.loc[('First','female'), 'age']

Unnamed: 0_level_0,Unnamed: 1_level_0,age,age,fare,fare,survived,survived
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std
class,sex,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
First,female,34.611765,13.612052,106.125798,74.259988,0.968085,0.176716
First,male,41.281386,15.13957,67.226127,77.548021,0.368852,0.484484
Second,female,28.722973,12.872702,21.970121,10.891796,0.921053,0.271448
Second,male,30.740707,14.793894,19.741782,14.922235,0.157407,0.365882
Third,female,21.75,12.729964,16.11881,11.690314,0.5,0.501745
Third,male,26.507589,12.159514,12.661633,11.681696,0.135447,0.342694


mean    34.611765
std     13.612052
Name: (First, female), dtype: float64

In [389]:
## first이고 female인 행의 age열의 std부터 fare열의 mean열까지의 범위 선택
gdf.loc[('First','female'), ('age','std'):('fare','mean')]

age   std      13.612052
fare  mean    106.125798
Name: (First, female), dtype: float64

In [390]:
gdf.loc[('First','male')]       ## 얘는 값으로 나옴

gdf.xs('male', level='sex')     ## 얘는 DF로 나옴

Unnamed: 0_level_0,age,age,fare,fare,survived,survived
Unnamed: 0_level_1,mean,std,mean,std,mean,std
class,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
First,41.281386,15.13957,67.226127,77.548021,0.368852,0.484484
Second,30.740707,14.793894,19.741782,14.922235,0.157407,0.365882
Third,26.507589,12.159514,12.661633,11.681696,0.135447,0.342694


## 5

### 5-1 <hr>

In [391]:
## 
import pandas as pd

In [392]:
## 데이터 프레임 생성
df1 = pd.DataFrame({'a' : ['a0','a1','a2','a3'],
                    'b' : ['b0', 'b1', 'b2', 'b3'],
                    'c' : ['c0', 'c1', 'c2', 'c3']},
                    index=[0,1,2,3])

df2=pd.DataFrame({'a':['a2','a3','a4','a5'],
                  'b':['b2','b3','b4','b5'],
                  'c':['c2','c3','c4','c5'],
                  'd':['d2','d3','d4','d5']},
                  index=[2,3,4,5])
display(df1, df2)



Unnamed: 0,a,b,c
0,a0,b0,c0
1,a1,b1,c1
2,a2,b2,c2
3,a3,b3,c3


Unnamed: 0,a,b,c,d
2,a2,b2,c2,d2
3,a3,b3,c3,d3
4,a4,b4,c4,d4
5,a5,b5,c5,d5


In [396]:
## concat()
## 데이터프레임 연결
ret = pd.concat([df1, df2], ignore_index=True)
ret


Unnamed: 0,a,b,c,d
0,a0,b0,c0,
1,a1,b1,c1,
2,a2,b2,c2,
3,a3,b3,c3,
4,a2,b2,c2,d2
5,a3,b3,c3,d3
6,a4,b4,c4,d4
7,a5,b5,c5,d5


In [None]:
## join = 'inner' 
ret1 = pd.concat([df1, df2], join='inner', axis=1)      ## 열이 겹치는걸 반환
display(ret1)

ret1 = pd.concat([df1, df2], join='inner', axis=0)      ## 행이 겹치는걸 반환
ret1

Unnamed: 0,a,b,c,a.1,b.1,c.1,d
2,a2,b2,c2,a2,b2,c2,d2
3,a3,b3,c3,a3,b3,c3,d3


Unnamed: 0,a,b,c
0,a0,b0,c0
1,a1,b1,c1
2,a2,b2,c2
3,a3,b3,c3
2,a2,b2,c2
3,a3,b3,c3
4,a4,b4,c4
5,a5,b5,c5


In [404]:
## join = 'outer'
ret2 = pd.concat([df1, df2], axis=0, join='outer')
display(ret2)

ret2 = pd.concat([df1, df2], axis=1, join='outer')
display(ret2)


Unnamed: 0,a,b,c,d
0,a0,b0,c0,
1,a1,b1,c1,
2,a2,b2,c2,
3,a3,b3,c3,
2,a2,b2,c2,d2
3,a3,b3,c3,d3
4,a4,b4,c4,d4
5,a5,b5,c5,d5


Unnamed: 0,a,b,c,a.1,b.1,c.1,d
0,a0,b0,c0,,,,
1,a1,b1,c1,,,,
2,a2,b2,c2,a2,b2,c2,d2
3,a3,b3,c3,a3,b3,c3,d3
4,,,,a4,b4,c4,d4
5,,,,a5,b5,c5,d5


In [405]:
DF1 = pd.read_excel('../DATA/stock_price.xlsx')
DF2 = pd.read_excel('../DATA/stock_valuation.xlsx')

display(DF1, DF2)

Unnamed: 0,id,stock_name,value,price
0,128940,한미약품,59385.666667,421000
1,130960,CJ E&M,58540.666667,98900
2,138250,엔에스쇼핑,14558.666667,13200
3,139480,이마트,239230.833333,254500
4,142280,녹십자엠에스,468.833333,10200
5,145990,삼양사,82750.0,82000
6,185750,종근당,40293.666667,100500
7,192400,쿠쿠홀딩스,179204.666667,177500
8,199800,툴젠,-2514.333333,115400
9,204210,모두투어리츠,3093.333333,3475


Unnamed: 0,id,name,eps,bps,per,pbr
0,130960,CJ E&M,6301.333333,54068,15.695091,1.829178
1,136480,하림,274.166667,3551,11.489362,0.887074
2,138040,메리츠금융지주,2122.333333,14894,6.313806,0.899691
3,139480,이마트,18268.166667,295780,13.931338,0.860437
4,145990,삼양사,5741.0,108090,14.283226,0.758627
5,161390,한국타이어,5648.5,51341,7.453306,0.820007
6,181710,NHN엔터테인먼트,2110.166667,78434,30.755864,0.827447
7,185750,종근당,3990.333333,40684,25.185866,2.470259
8,204210,모두투어리츠,85.166667,5335,40.802348,0.651359
9,207940,삼성바이오로직스,4644.166667,60099,89.790059,6.938551


In [406]:
## id, name을 기준으로 병합. 기준 데이터 프레임은 DF2
dataMG = pd.merge(DF1, DF2, how='right', left_on=['id', 'stock_name'], right_on=['id','name'])
dataMG

Unnamed: 0,id,stock_name,value,price,name,eps,bps,per,pbr
0,130960,CJ E&M,58540.666667,98900.0,CJ E&M,6301.333333,54068,15.695091,1.829178
1,136480,,,,하림,274.166667,3551,11.489362,0.887074
2,138040,,,,메리츠금융지주,2122.333333,14894,6.313806,0.899691
3,139480,이마트,239230.833333,254500.0,이마트,18268.166667,295780,13.931338,0.860437
4,145990,삼양사,82750.0,82000.0,삼양사,5741.0,108090,14.283226,0.758627
5,161390,,,,한국타이어,5648.5,51341,7.453306,0.820007
6,181710,,,,NHN엔터테인먼트,2110.166667,78434,30.755864,0.827447
7,185750,종근당,40293.666667,100500.0,종근당,3990.333333,40684,25.185866,2.470259
8,204210,모두투어리츠,3093.333333,3475.0,모두투어리츠,85.166667,5335,40.802348,0.651359
9,207940,,,,삼성바이오로직스,4644.166667,60099,89.790059,6.938551


In [None]:
price = DF1[DF1['price'] < 50000]
price

Unnamed: 0,id,stock_name,value,price
2,138250,엔에스쇼핑,14558.666667,13200
4,142280,녹십자엠에스,468.833333,10200
9,204210,모두투어리츠,3093.333333,3475


In [409]:
pd.merge(price,DF2)

Unnamed: 0,id,stock_name,value,price,name,eps,bps,per,pbr
0,204210,모두투어리츠,3093.333333,3475,모두투어리츠,85.166667,5335,40.802348,0.651359


## 6


### 6-1 <hr>

In [411]:
df = titanic.loc[:,['age','fare','class','sex','survived']]
df

Unnamed: 0,age,fare,class,sex,survived
0,22.0,7.2500,Third,male,0
1,38.0,71.2833,First,female,1
2,26.0,7.9250,Third,female,1
3,35.0,53.1000,First,female,1
4,35.0,8.0500,Third,male,0
...,...,...,...,...,...
886,27.0,13.0000,Second,male,0
887,19.0,30.0000,First,female,1
888,,23.4500,Third,female,0
889,26.0,30.0000,First,male,1


In [414]:
pivot = pd.pivot_table(df,
                       index='class',
                       columns='sex',
                       values='age',
                       aggfunc=['mean','min'],
                       observed=True)
pivot

Unnamed: 0_level_0,mean,mean,min,min
sex,female,male,female,male
class,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
First,34.611765,41.281386,2.0,0.92
Second,28.722973,30.740707,2.0,0.67
Third,21.75,26.507589,0.75,0.42


## 7


### 7-1 <hr>

In [421]:
## stack
display(df)
a = df.stack()
display(df.stack())
display(a)

Unnamed: 0,age,fare,class,sex,survived
0,22.0,7.2500,Third,male,0
1,38.0,71.2833,First,female,1
2,26.0,7.9250,Third,female,1
3,35.0,53.1000,First,female,1
4,35.0,8.0500,Third,male,0
...,...,...,...,...,...
886,27.0,13.0000,Second,male,0
887,19.0,30.0000,First,female,1
888,,23.4500,Third,female,0
889,26.0,30.0000,First,male,1


0    age          22.0
     fare         7.25
     class       Third
     sex          male
     survived        0
                 ...  
890  age          32.0
     fare         7.75
     class       Third
     sex          male
     survived        0
Length: 4278, dtype: object

0    age          22.0
     fare         7.25
     class       Third
     sex          male
     survived        0
                 ...  
890  age          32.0
     fare         7.75
     class       Third
     sex          male
     survived        0
Length: 4278, dtype: object

## 8

### 8-1 <hr>

In [423]:
df = pd.DataFrame({
    '이름': ['철수', '영희'],
    '국어': [90, 85],
    '수학': [80, 95]
})
df

Unnamed: 0,이름,국어,수학
0,철수,90,80
1,영희,85,95


In [433]:
pd.melt(df)

Unnamed: 0,variable,value
0,이름,철수
1,이름,영희
2,국어,90
3,국어,85
4,수학,80
5,수학,95


In [435]:
pd.melt(df, id_vars=['이름'],var_name='과목', value_name='점수', value_vars=['국어','수학'])

Unnamed: 0,이름,과목,점수
0,철수,국어,90
1,영희,국어,85
2,철수,수학,80
3,영희,수학,95
