In [1]:
import pandas as pd
import seaborn as sns

# titanic 데이터셋에서 agem sex 등 5개 열을 선택하여 데이터프레임 만들기
titanic = sns.load_dataset('titanic')
df = titanic.loc[:, ['age', 'sex', 'class', 'fare', 'survived']]

# class 열을 기준으로 분할
grouped = df.groupby(['class'], observed=True)

# 각 그룹에 대한 모든 열의 표준편차를 집계하여 데이터프레임으로 반환
std_all = grouped.std(numeric_only=True)

std_all

Unnamed: 0_level_0,age,fare,survived
class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
First,14.802856,78.380373,0.484026
Second,14.001077,13.417399,0.500623
Third,12.495398,11.778142,0.428949


In [5]:
# 각 그룹에 대한 모든 열의 표준편차를 집계(as_index=False)
std_all_index = df.groupby(['class'], observed=True, as_index=False).std(numeric_only=True)

std_all_index

Unnamed: 0,class,age,fare,survived
0,First,14.802856,78.380373,0.484026
1,Second,14.001077,13.417399,0.500623
2,Third,12.495398,11.778142,0.428949


In [6]:
# fare 열의 표준편차를 선택
std_all['fare']

class
First     78.380373
Second    13.417399
Third     11.778142
Name: fare, dtype: float64

In [7]:
# 각 그룹에 대한 fare 열의 표준편차를 집게하여 시리즈로 반환
std_fare = grouped['fare'].std(numeric_only=True)
std_fare

class
First     78.380373
Second    13.417399
Third     11.778142
Name: fare, dtype: float64

In [8]:
# 각 그룹에 대한 age, survived 열의 표준편차를 집계하여 시리즈로 반환
std_age_survived = grouped[['age', 'survived']].std(numeric_only=True)
std_age_survived

Unnamed: 0_level_0,age,survived
class,Unnamed: 1_level_1,Unnamed: 2_level_1
First,14.802856,0.484026
Second,14.001077,0.500623
Third,12.495398,0.428949


In [9]:
# 각 그룹에 대한 요약 통계
grouped.describe()

Unnamed: 0_level_0,age,age,age,age,age,age,age,age,fare,fare,fare,fare,fare,survived,survived,survived,survived,survived,survived,survived,survived
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
class,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
First,186.0,38.233441,14.802856,0.92,27.0,37.0,49.0,80.0,216.0,84.154687,...,93.5,512.3292,216.0,0.62963,0.484026,0.0,0.0,1.0,1.0,1.0
Second,173.0,29.87763,14.001077,0.67,23.0,29.0,36.0,70.0,184.0,20.662183,...,26.0,73.5,184.0,0.472826,0.500623,0.0,0.0,0.0,1.0,1.0
Third,355.0,25.14062,12.495398,0.42,18.0,24.0,32.0,74.0,491.0,13.67555,...,15.5,69.55,491.0,0.242363,0.428949,0.0,0.0,0.0,0.0,1.0


In [16]:
# 각 그룹에 대한 고유값의 빈도 수 
grouped[['class', 'sex']].value_counts()

class   sex   
First   male      122
        female     94
Second  male      108
        female     76
Third   male      347
        female    144
Name: count, dtype: int64

In [17]:
# 그룹 객체에 aggregate() 메소드 적용(mean 함수를 모든 열에 적용)
agg_mean = grouped.aggregate('mean', numeric_only=True)
agg_mean

Unnamed: 0_level_0,age,fare,survived
class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
First,38.233441,84.154687,0.62963
Second,29.87763,20.662183,0.472826
Third,25.14062,13.67555,0.242363


In [18]:
# 그룹 객체에 agg() 메소드 적용(mean 함수를 모든 열에 적용)
agg_mean2 = grouped.agg('mean', numeric_only=True)
agg_mean2

Unnamed: 0_level_0,age,fare,survived
class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
First,38.233441,84.154687,0.62963
Second,29.87763,20.662183,0.472826
Third,25.14062,13.67555,0.242363


In [19]:
# 여러 함수를 각 열에 동일하게 적용하여 집계
agg_all = grouped.agg(['min', 'max'])
agg_all

Unnamed: 0_level_0,age,age,sex,sex,fare,fare,survived,survived
Unnamed: 0_level_1,min,max,min,max,min,max,min,max
class,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
First,0.92,80.0,female,male,0.0,512.3292,0,1
Second,0.67,70.0,female,male,0.0,73.5,0,1
Third,0.42,74.0,female,male,0.0,69.55,0,1


In [20]:
# 각 열마다 다른 함수를 적용하여 집계
agg_sep = grouped.agg({'fare':['min', 'max'], 'age':'mean'})
agg_sep

Unnamed: 0_level_0,fare,fare,age
Unnamed: 0_level_1,min,max,mean
class,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
First,0.0,512.3292,38.233441
Second,0.0,73.5,29.87763
Third,0.0,69.55,25.14062


In [21]:
# 그룹 객체에 agg() 메소드 적용 - 사용자 정의 함수를 인수로 전달
def min_max(x):    # 최댓값 - 최솟값
    return x.max() - x.min()

# 각 그룹의 최댓값과 최솟값의 차이를 계산하여 그룹별로 집계
agg_minmax = grouped[['age', 'fare']].agg(min_max)

agg_minmax

Unnamed: 0_level_0,age,fare
class,Unnamed: 1_level_1,Unnamed: 2_level_1
First,79.08,512.3292
Second,69.33,73.5
Third,73.58,69.55
