In [11]:
# 라이브러리 불러오기
import pandas as pd
import seaborn as sns

# titanic 데이터셋에서 age, sex 등 5개 열을 선택하여 데이터프레임 만들기
titanic = sns.load_dataset('titanic')
df = titanic.loc[:, ['age','sex', 'class', 'fare', 'survived']]

df

Unnamed: 0,age,sex,class,fare,survived
0,22.0,male,Third,7.2500,0
1,38.0,female,First,71.2833,1
2,26.0,female,Third,7.9250,1
3,35.0,female,First,53.1000,1
4,35.0,male,Third,8.0500,0
...,...,...,...,...,...
886,27.0,male,Second,13.0000,0
887,19.0,female,First,30.0000,1
888,,female,Third,23.4500,0
889,26.0,male,First,30.0000,1


In [17]:
# class 열을 기준으로 분할
grouped = df.groupby(['class'], observed=True) 

# fare 열을 그룹별로 누적 합산
grouped['fare'].cumsum()

0          7.2500
1         71.2833
2         15.1750
3        124.3833
4         23.2250
          ...    
886     3801.8417
887    18147.4125
888     6706.9451
889    18177.4125
890     6714.6951
Name: fare, Length: 891, dtype: float64

In [13]:
# 변환 결과를 기존 데이터프레임의 열로 추가
df['fare_cumsum'] = grouped['fare'].cumsum()
df.head()

Unnamed: 0,age,sex,class,fare,survived,fare_cumsum
0,22.0,male,Third,7.25,0,7.25
1,38.0,female,First,71.2833,1,71.2833
2,26.0,female,Third,7.925,1,15.175
3,35.0,female,First,53.1,1,124.3833
4,35.0,male,Third,8.05,0,23.225


In [18]:
# fare 열을 그룹별로 누적 합산
grouped[['fare']].transform('cumsum')

Unnamed: 0,fare
0,7.2500
1,71.2833
2,15.1750
3,124.3833
4,23.2250
...,...
886,3801.8417
887,18147.4125
888,6706.9451
889,18177.4125


In [19]:
# age, survived 열을 그룹별로 평균 계산
grouped[['age', 'survived']].transform('mean')

Unnamed: 0,age,survived
0,25.140620,0.242363
1,38.233441,0.629630
2,25.140620,0.242363
3,38.233441,0.629630
4,25.140620,0.242363
...,...,...
886,29.877630,0.472826
887,38.233441,0.629630
888,25.140620,0.242363
889,38.233441,0.629630


In [20]:
# z-score를 계산하는 사용자 함수 정의
def z_score(x): 
    return (x - x.mean()) / x.std()
   
# transform() 메소드를 이용하여 age 열의 데이터를 z-score로 변환
age_zscore = grouped['age'].transform(z_score)  
age_zscore

0     -0.251342
1     -0.015770
2      0.068776
3     -0.218434
4      0.789041
         ...   
886   -0.205529
887   -1.299306
888         NaN
889   -0.826424
890    0.548953
Name: age, Length: 891, dtype: float64

In [21]:
# z-score를 계산하는 람다 함수를 사용하여 변환
age_zscore2 = grouped['age'].transform(lambda x: (x - x.mean()) / x.std())  
age_zscore2

0     -0.251342
1     -0.015770
2      0.068776
3     -0.218434
4      0.789041
         ...   
886   -0.205529
887   -1.299306
888         NaN
889   -0.826424
890    0.548953
Name: age, Length: 891, dtype: float64

In [22]:
# 내장 집계 함수를 사용하여 변환
age_zscore3 = (df['age'] - grouped['age'].transform('mean')) / grouped['age'].transform('std')
age_zscore3

0     -0.251342
1     -0.015770
2      0.068776
3     -0.218434
4      0.789041
         ...   
886   -0.205529
887   -1.299306
888         NaN
889   -0.826424
890    0.548953
Name: age, Length: 891, dtype: float64