함수 매핑

In [None]:
import seaborn as sns
import pandas as pd

titanic = sns.load_dataset('titanic')
df = titanic.loc[:,['age','fare']]
df['ten'] = 10
print(df.head())

    age     fare  ten
0  22.0   7.2500   10
1  38.0  71.2833   10
2  26.0   7.9250   10
3  35.0  53.1000   10
4  35.0   8.0500   10


In [None]:
def add_10(n):
  return n+10
def add_two_obj(a,b):
  return a+b

sr1 = df['age'].apply(add_10)
print(sr1.head())
sr2 = df['age'].apply(add_two_obj, b=10)
print(sr2.head())

df_map = df.applymap(add_10)
print(df_map.head())

0    32.0
1    48.0
2    36.0
3    45.0
4    45.0
Name: age, dtype: float64
0    32.0
1    48.0
2    36.0
3    45.0
4    45.0
Name: age, dtype: float64
    age     fare  ten
0  32.0  17.2500   20
1  48.0  81.2833   20
2  36.0  17.9250   20
3  45.0  63.1000   20
4  45.0  18.0500   20


In [None]:
def missing_value(series):
  return series.isnull()

result = df.apply(missing_value)
print(result.head())

     age   fare    ten
0  False  False  False
1  False  False  False
2  False  False  False
3  False  False  False
4  False  False  False


In [None]:
def min_max(x):
  return x.max() - x.min()

result = df.apply(min_max)
print(result.head())

age      79.5800
fare    512.3292
ten       0.0000
dtype: float64


In [None]:
#각 행에 함수 매핑 : axis = 1
df['add'] = df.apply(lambda x: add_two_obj(x['age'],x['ten']), axis=1)
print(df.head())

    age     fare  ten   add
0  22.0   7.2500   10  32.0
1  38.0  71.2833   10  48.0
2  26.0   7.9250   10  36.0
3  35.0  53.1000   10  45.0
4  35.0   8.0500   10  45.0


In [None]:
#데이터프레임 객체에 함수 매핑: pipe()
def missing_count(x):
  return missing_value(x).sum()
def total_number_missing(x):
  return missing_count(x).sum()

result_df = df.pipe(missing_value)
print(result_df.head())

result_series = df.pipe(missing_count)
print(result_series)

result_value = df.pipe(total_number_missing)
print(result_value)

     age   fare    ten    add
0  False  False  False  False
1  False  False  False  False
2  False  False  False  False
3  False  False  False  False
4  False  False  False  False
age     177
fare      0
ten       0
add     177
dtype: int64
354


열 재구성

In [None]:
df = titanic.loc[0:4, 'survived':'age']
print(df)

columns = list(df.columns.values)
print(columns)

columns_sorted = sorted(columns)
df_sorted = df[columns_sorted]
print(df_sorted)

columns_customed = ['pclass','sex','age','survived']
df_customed = df[columns_customed]
print(df_customed)

   survived  pclass     sex   age
0         0       3    male  22.0
1         1       1  female  38.0
2         1       3  female  26.0
3         1       1  female  35.0
4         0       3    male  35.0
['survived', 'pclass', 'sex', 'age']
    age  pclass     sex  survived
0  22.0       3    male         0
1  38.0       1  female         1
2  26.0       3  female         1
3  35.0       1  female         1
4  35.0       3    male         0
   pclass     sex   age  survived
0       3    male  22.0         0
1       1  female  38.0         1
2       3  female  26.0         1
3       1  female  35.0         1
4       3    male  35.0         0


In [None]:
df = pd.read_excel('./주가데이터.xlsx')
print(df.head())

df['연월일'] = df['연월일'].astype('str')
dates = df['연월일'].str.split('-')
print(dates.head())

         연월일   당일종가  전일종가     시가     고가     저가     거래량
0 2018-07-02  10100   600  10850  10900  10000  137977
1 2018-06-29  10700   300  10550  10900   9990  170253
2 2018-06-28  10400   500  10900  10950  10150  155769
3 2018-06-27  10900   100  10800  11050  10500  133548
4 2018-06-26  10800   350  10900  11000  10700   63039
0    [2018, 07, 02]
1    [2018, 06, 29]
2    [2018, 06, 28]
3    [2018, 06, 27]
4    [2018, 06, 26]
Name: 연월일, dtype: object


In [None]:
df['연'] = dates.str.get(0)
df['월'] = dates.str.get(1)
df['일'] = dates.str.get(2)

print(df.head())

          연월일   당일종가  전일종가     시가     고가     저가     거래량     연   월   일
0  2018-07-02  10100   600  10850  10900  10000  137977  2018  07  02
1  2018-06-29  10700   300  10550  10900   9990  170253  2018  06  29
2  2018-06-28  10400   500  10900  10950  10150  155769  2018  06  28
3  2018-06-27  10900   100  10800  11050  10500  133548  2018  06  27
4  2018-06-26  10800   350  10900  11000  10700   63039  2018  06  26


필터링

In [None]:
titanic = sns.load_dataset('titanic')

mask1 = (titanic.age>=10) & (titanic.age<20)
df_teenage = titanic.loc[mask1, :]
print(df_teenage.head())

    survived  pclass     sex   age  ...  deck  embark_town  alive  alone
9          1       2  female  14.0  ...   NaN    Cherbourg    yes  False
14         0       3  female  14.0  ...   NaN  Southampton     no   True
22         1       3  female  15.0  ...   NaN   Queenstown    yes   True
27         0       1    male  19.0  ...     C  Southampton     no  False
38         0       3  female  18.0  ...   NaN  Southampton     no  False

[5 rows x 15 columns]


In [None]:
mask2 = (titanic.age<10) & (titanic.sex == 'female')
df_girls = titanic.loc[mask2, :]
print(df_girls.head())

     survived  pclass     sex  age  ...  deck  embark_town  alive  alone
10          1       3  female  4.0  ...     G  Southampton    yes  False
24          0       3  female  8.0  ...   NaN  Southampton     no  False
43          1       2  female  3.0  ...   NaN    Cherbourg    yes  False
58          1       2  female  5.0  ...   NaN  Southampton    yes  False
119         0       3  female  2.0  ...   NaN  Southampton     no  False

[5 rows x 15 columns]


In [None]:
mask3 = (titanic.age<10) | (titanic.age>=60)
df_no = titanic.loc[mask3,:]
print(df_no.head())

    survived  pclass     sex   age  ...  deck  embark_town  alive  alone
7          0       3    male   2.0  ...   NaN  Southampton     no  False
10         1       3  female   4.0  ...     G  Southampton    yes  False
16         0       3    male   2.0  ...   NaN   Queenstown     no  False
24         0       3  female   8.0  ...   NaN  Southampton     no  False
33         0       2    male  66.0  ...   NaN  Southampton     no   True

[5 rows x 15 columns]


In [None]:
mask4 = titanic['sibsp'] == 3
mask5 = titanic['sibsp'] == 4
mask6 = titanic['sibsp'] == 5
df_boolean = titanic[mask4|mask5|mask6]
print(df_boolean.head())

    survived  pclass     sex   age  ...  deck  embark_town  alive  alone
7          0       3    male   2.0  ...   NaN  Southampton     no  False
16         0       3    male   2.0  ...   NaN   Queenstown     no  False
24         0       3  female   8.0  ...   NaN  Southampton     no  False
27         0       1    male  19.0  ...     C  Southampton     no  False
50         0       3    male   7.0  ...   NaN  Southampton     no  False

[5 rows x 15 columns]


In [None]:
isin_filter = titanic['sibsp'].isin([3,4,5])
df_isin = titanic[isin_filter]
print(df_isin.head())

    survived  pclass     sex   age  ...  deck  embark_town  alive  alone
7          0       3    male   2.0  ...   NaN  Southampton     no  False
16         0       3    male   2.0  ...   NaN   Queenstown     no  False
24         0       3  female   8.0  ...   NaN  Southampton     no  False
27         0       1    male  19.0  ...     C  Southampton     no  False
50         0       3    male   7.0  ...   NaN  Southampton     no  False

[5 rows x 15 columns]


데이터프레임 합치기

In [None]:
#axis=0이면 위아래, 1이면 양옆으로~
df1 = pd.DataFrame({'a': ['a0', 'a1', 'a2', 'a3'],
                    'b': ['b0', 'b1', 'b2', 'b3'],
                    'c': ['c0', 'c1', 'c2', 'c3']},
                    index=[0, 1, 2, 3])
 
df2 = pd.DataFrame({'a': ['a2', 'a3', 'a4', 'a5'],
                    'b': ['b2', 'b3', 'b4', 'b5'],
                    'c': ['c2', 'c3', 'c4', 'c5'],
                    'd': ['d2', 'd3', 'd4', 'd5']},
                    index=[2, 3, 4, 5])
print(df1)
print(df2)
result = pd.concat([df1,df2])
print(result)

    a   b   c
0  a0  b0  c0
1  a1  b1  c1
2  a2  b2  c2
3  a3  b3  c3
    a   b   c   d
2  a2  b2  c2  d2
3  a3  b3  c3  d3
4  a4  b4  c4  d4
5  a5  b5  c5  d5
    a   b   c    d
0  a0  b0  c0  NaN
1  a1  b1  c1  NaN
2  a2  b2  c2  NaN
3  a3  b3  c3  NaN
2  a2  b2  c2   d2
3  a3  b3  c3   d3
4  a4  b4  c4   d4
5  a5  b5  c5   d5


In [None]:
result2 = pd.concat([df1,df2], ignore_index = True)#기존 index 유지안됨
print(result2)

    a   b   c    d
0  a0  b0  c0  NaN
1  a1  b1  c1  NaN
2  a2  b2  c2  NaN
3  a3  b3  c3  NaN
4  a2  b2  c2   d2
5  a3  b3  c3   d3
6  a4  b4  c4   d4
7  a5  b5  c5   d5


In [None]:
result3 = pd.concat([df1,df2], axis=1)
print(result3)

     a    b    c    a    b    c    d
0   a0   b0   c0  NaN  NaN  NaN  NaN
1   a1   b1   c1  NaN  NaN  NaN  NaN
2   a2   b2   c2   a2   b2   c2   d2
3   a3   b3   c3   a3   b3   c3   d3
4  NaN  NaN  NaN   a4   b4   c4   d4
5  NaN  NaN  NaN   a5   b5   c5   d5


In [None]:
result3_in = pd.concat([df1,df2], axis=1, join='inner')#중복되는거만
print(result3_in)

    a   b   c   a   b   c   d
2  a2  b2  c2  a2  b2  c2  d2
3  a3  b3  c3  a3  b3  c3  d3


In [None]:
sr1 = pd.Series(['e0', 'e1', 'e2', 'e3'], name='e')
sr2 = pd.Series(['f0', 'f1', 'f2'], name='f', index=[3, 4, 5])
sr3 = pd.Series(['g0', 'g1', 'g2', 'g3'], name='g')

result4 = pd.concat([df1,sr1],axis=1)
print(result4)

    a   b   c   e
0  a0  b0  c0  e0
1  a1  b1  c1  e1
2  a2  b2  c2  e2
3  a3  b3  c3  e3


In [None]:
#데이터 합치기: merge()
df1 = pd.read_excel('./stock price.xlsx')
df2 = pd.read_excel('./stock valuation.xlsx')

print(df1)
print(df2)

       id stock_name          value   price
0  128940       한미약품   59385.666667  421000
1  130960     CJ E&M   58540.666667   98900
2  138250      엔에스쇼핑   14558.666667   13200
3  139480        이마트  239230.833333  254500
4  142280     녹십자엠에스     468.833333   10200
5  145990        삼양사   82750.000000   82000
6  185750        종근당   40293.666667  100500
7  192400      쿠쿠홀딩스  179204.666667  177500
8  199800         툴젠   -2514.333333  115400
9  204210     모두투어리츠    3093.333333    3475
       id       name           eps     bps        per       pbr
0  130960     CJ E&M   6301.333333   54068  15.695091  1.829178
1  136480         하림    274.166667    3551  11.489362  0.887074
2  138040    메리츠금융지주   2122.333333   14894   6.313806  0.899691
3  139480        이마트  18268.166667  295780  13.931338  0.860437
4  145990        삼양사   5741.000000  108090  14.283226  0.758627
5  161390      한국타이어   5648.500000   51341   7.453306  0.820007
6  181710  NHN엔터테인먼트   2110.166667   78434  30.755864  0.827447
7  1

In [None]:
merge_inner = pd.merge(df1,df2)
print(merge_inner)

       id stock_name          value  ...     bps        per       pbr
0  130960     CJ E&M   58540.666667  ...   54068  15.695091  1.829178
1  139480        이마트  239230.833333  ...  295780  13.931338  0.860437
2  145990        삼양사   82750.000000  ...  108090  14.283226  0.758627
3  185750        종근당   40293.666667  ...   40684  25.185866  2.470259
4  204210     모두투어리츠    3093.333333  ...    5335  40.802348  0.651359

[5 rows x 9 columns]


In [None]:
merge_outer = pd.merge(df1,df2, how='outer',on='id')#id열을 기준으로 모든 종목의 데이터 통합
print(merge_outer)

        id stock_name          value  ...       bps        per       pbr
0   128940       한미약품   59385.666667  ...       NaN        NaN       NaN
1   130960     CJ E&M   58540.666667  ...   54068.0  15.695091  1.829178
2   138250      엔에스쇼핑   14558.666667  ...       NaN        NaN       NaN
3   139480        이마트  239230.833333  ...  295780.0  13.931338  0.860437
4   142280     녹십자엠에스     468.833333  ...       NaN        NaN       NaN
5   145990        삼양사   82750.000000  ...  108090.0  14.283226  0.758627
6   185750        종근당   40293.666667  ...   40684.0  25.185866  2.470259
7   192400      쿠쿠홀딩스  179204.666667  ...       NaN        NaN       NaN
8   199800         툴젠   -2514.333333  ...       NaN        NaN       NaN
9   204210     모두투어리츠    3093.333333  ...    5335.0  40.802348  0.651359
10  136480        NaN            NaN  ...    3551.0  11.489362  0.887074
11  138040        NaN            NaN  ...   14894.0   6.313806  0.899691
12  161390        NaN            NaN  ...   51341.0

In [None]:
merge_left = pd.merge(df1,df2,how='left',left_on='stock_name',right_on='name')
print(merge_left)

     id_x stock_name          value  ...       bps        per       pbr
0  128940       한미약품   59385.666667  ...       NaN        NaN       NaN
1  130960     CJ E&M   58540.666667  ...   54068.0  15.695091  1.829178
2  138250      엔에스쇼핑   14558.666667  ...       NaN        NaN       NaN
3  139480        이마트  239230.833333  ...  295780.0  13.931338  0.860437
4  142280     녹십자엠에스     468.833333  ...       NaN        NaN       NaN
5  145990        삼양사   82750.000000  ...  108090.0  14.283226  0.758627
6  185750        종근당   40293.666667  ...   40684.0  25.185866  2.470259
7  192400      쿠쿠홀딩스  179204.666667  ...       NaN        NaN       NaN
8  199800         툴젠   -2514.333333  ...       NaN        NaN       NaN
9  204210     모두투어리츠    3093.333333  ...    5335.0  40.802348  0.651359

[10 rows x 10 columns]


In [None]:
price = df1[df1.price < 50000]
print(price)
print(pd.merge(price,df2))

       id stock_name         value  price
2  138250      엔에스쇼핑  14558.666667  13200
4  142280     녹십자엠에스    468.833333  10200
9  204210     모두투어리츠   3093.333333   3475
       id stock_name        value  price  ...        eps   bps        per       pbr
0  204210     모두투어리츠  3093.333333   3475  ...  85.166667  5335  40.802348  0.651359

[1 rows x 9 columns]


In [None]:
#행 인덱스 기준으로 병합: join
df1 = pd.read_excel('./stock price.xlsx', index_col='id')
df2 = pd.read_excel('./stock valuation.xlsx', index_col='id')

df3 = df1.join(df2)
print(df3)
df4 = df1.join(df2, how='inner')#공통으로 존재하는 행 인덱스 기준으로 병합
print(df4)

       stock_name          value   price  ...       bps        per       pbr
id                                        ...                               
128940       한미약품   59385.666667  421000  ...       NaN        NaN       NaN
130960     CJ E&M   58540.666667   98900  ...   54068.0  15.695091  1.829178
138250      엔에스쇼핑   14558.666667   13200  ...       NaN        NaN       NaN
139480        이마트  239230.833333  254500  ...  295780.0  13.931338  0.860437
142280     녹십자엠에스     468.833333   10200  ...       NaN        NaN       NaN
145990        삼양사   82750.000000   82000  ...  108090.0  14.283226  0.758627
185750        종근당   40293.666667  100500  ...   40684.0  25.185866  2.470259
192400      쿠쿠홀딩스  179204.666667  177500  ...       NaN        NaN       NaN
199800         툴젠   -2514.333333  115400  ...       NaN        NaN       NaN
204210     모두투어리츠    3093.333333    3475  ...    5335.0  40.802348  0.651359

[10 rows x 8 columns]
       stock_name          value   price  ...     bps

그룹 연산

In [None]:
#분할
df = titanic.loc[:,['age','sex','class','fare','survived']]
print('승객 수:', len(df))

grouped = df.groupby(['class'])
for key, group in grouped:
  print('* key: ',key)
  print('* number: ', len(group))
  print(group.head())
  print('\n')

승객 수: 891
* key:  First
* number:  216
     age     sex  class     fare  survived
1   38.0  female  First  71.2833         1
3   35.0  female  First  53.1000         1
6   54.0    male  First  51.8625         0
11  58.0  female  First  26.5500         1
23  28.0    male  First  35.5000         1


* key:  Second
* number:  184
     age     sex   class     fare  survived
9   14.0  female  Second  30.0708         1
15  55.0  female  Second  16.0000         1
17   NaN    male  Second  13.0000         1
20  35.0    male  Second  26.0000         0
21  34.0    male  Second  13.0000         1


* key:  Third
* number:  491
    age     sex  class     fare  survived
0  22.0    male  Third   7.2500         0
2  26.0  female  Third   7.9250         1
4  35.0    male  Third   8.0500         0
5   NaN    male  Third   8.4583         0
7   2.0    male  Third  21.0750         0




In [None]:
average = grouped.mean()
print(average)

              age       fare  survived
class                                 
First   38.233441  84.154687  0.629630
Second  29.877630  20.662183  0.472826
Third   25.140620  13.675550  0.242363


In [None]:
group3 = grouped.get_group('Third')
print(group3.head())

    age     sex  class     fare  survived
0  22.0    male  Third   7.2500         0
2  26.0  female  Third   7.9250         1
4  35.0    male  Third   8.0500         0
5   NaN    male  Third   8.4583         0
7   2.0    male  Third  21.0750         0


In [None]:
grouped_two = df.groupby(['class','sex'])
for key, group in grouped_two:
  print('* key: ',key)
  print('* number: ', len(group))
  print(group.head())
  print('\n')

* key:  ('First', 'female')
* number:  94
     age     sex  class      fare  survived
1   38.0  female  First   71.2833         1
3   35.0  female  First   53.1000         1
11  58.0  female  First   26.5500         1
31   NaN  female  First  146.5208         1
52  49.0  female  First   76.7292         1


* key:  ('First', 'male')
* number:  122
     age   sex  class      fare  survived
6   54.0  male  First   51.8625         0
23  28.0  male  First   35.5000         1
27  19.0  male  First  263.0000         0
30  40.0  male  First   27.7208         0
34  28.0  male  First   82.1708         0


* key:  ('Second', 'female')
* number:  76
     age     sex   class     fare  survived
9   14.0  female  Second  30.0708         1
15  55.0  female  Second  16.0000         1
41  27.0  female  Second  21.0000         0
43   3.0  female  Second  41.5792         1
53  29.0  female  Second  26.0000         1


* key:  ('Second', 'male')
* number:  108
     age   sex   class  fare  survived
17   Na

In [None]:
average_two = grouped_two.mean()
print(average_two)

                     age        fare  survived
class  sex                                    
First  female  34.611765  106.125798  0.968085
       male    41.281386   67.226127  0.368852
Second female  28.722973   21.970121  0.921053
       male    30.740707   19.741782  0.157407
Third  female  21.750000   16.118810  0.500000
       male    26.507589   12.661633  0.135447


In [None]:
group3f = grouped_two.get_group(('Third','female'))
print(group3f.head())

     age     sex  class     fare  survived
2   26.0  female  Third   7.9250         1
8   27.0  female  Third  11.1333         1
10   4.0  female  Third  16.7000         1
14  14.0  female  Third   7.8542         0
18  31.0  female  Third  18.0000         0


In [None]:
#적용-결합
grouped = df.groupby(['class'])

std_all = grouped.std()
print(std_all)
std_fare = grouped.fare.std()
print(std_fare)

              age       fare  survived
class                                 
First   14.802856  78.380373  0.484026
Second  14.001077  13.417399  0.500623
Third   12.495398  11.778142  0.428949
class
First     78.380373
Second    13.417399
Third     11.778142
Name: fare, dtype: float64


In [None]:
#집계 연산 함수 적용: agg()
def mean_max(x):
  return x.max() - x.min()
agg_minmax = grouped.agg(min_max)
print(agg_minmax.head())

          age      fare  survived
class                            
First   79.08  512.3292         1
Second  69.33   73.5000         1
Third   73.58   69.5500         1


In [None]:
#모든 열에 같은 함수 매핑
agg_all = grouped.agg(['min','max'])
print(agg_all.head())
#각 열에 각각 함수 매핑
agg_sep = grouped.agg({'fare':['min','max'],'age':'mean'})
print(agg_sep.head())

         age           sex       fare           survived    
         min   max     min   max  min       max      min max
class                                                       
First   0.92  80.0  female  male  0.0  512.3292        0   1
Second  0.67  70.0  female  male  0.0   73.5000        0   1
Third   0.42  74.0  female  male  0.0   69.5500        0   1
       fare                  age
        min       max       mean
class                           
First   0.0  512.3292  38.233441
Second  0.0   73.5000  29.877630
Third   0.0   69.5500  25.140620


In [None]:
age_mean = grouped.age.mean()
age_std = grouped.age.std()

for key, group in grouped.age:
  group_zscore = (group-age_mean.loc[key])/age_std.loc[key]
  print('* key: ',key)
  print(group_zscore.head())
  print('\n')

* key:  First
1    -0.015770
3    -0.218434
6     1.065103
11    1.335321
23   -0.691315
Name: age, dtype: float64


* key:  Second
9    -1.134029
15    1.794317
17         NaN
20    0.365855
21    0.294432
Name: age, dtype: float64


* key:  Third
0   -0.251342
2    0.068776
4    0.789041
5         NaN
7   -1.851931
Name: age, dtype: float64




In [None]:
def z_score(x):
  return (x-x.mean())/x.std()

age_zscore = grouped.age.transform(z_score)
print(age_zscore.head())

0   -0.251342
1   -0.015770
2    0.068776
3   -0.218434
4    0.789041
Name: age, dtype: float64


In [None]:
#그룹 객체 필터링
grouped_filter = grouped.filter(lambda x: x.age.mean() < 30)
print(grouped_filter.head())

    age     sex  class     fare  survived
0  22.0    male  Third   7.2500         0
2  26.0  female  Third   7.9250         1
4  35.0    male  Third   8.0500         0
5   NaN    male  Third   8.4583         0
7   2.0    male  Third  21.0750         0


In [None]:
agg_grouped = grouped.apply(lambda x: x.describe())
print(agg_grouped)

                     age        fare    survived
class                                           
First  count  186.000000  216.000000  216.000000
       mean    38.233441   84.154687    0.629630
       std     14.802856   78.380373    0.484026
       min      0.920000    0.000000    0.000000
       25%     27.000000   30.923950    0.000000
       50%     37.000000   60.287500    1.000000
       75%     49.000000   93.500000    1.000000
       max     80.000000  512.329200    1.000000
Second count  173.000000  184.000000  184.000000
       mean    29.877630   20.662183    0.472826
       std     14.001077   13.417399    0.500623
       min      0.670000    0.000000    0.000000
       25%     23.000000   13.000000    0.000000
       50%     29.000000   14.250000    0.000000
       75%     36.000000   26.000000    1.000000
       max     70.000000   73.500000    1.000000
Third  count  355.000000  491.000000  491.000000
       mean    25.140620   13.675550    0.242363
       std     12.49

멀티 인덱스

In [None]:
grouped = df.groupby(['class','sex'])
gdf = grouped.mean()
print(gdf)

print(gdf.loc['First','female'])
print(gdf.xs('male',level='sex'))

                     age        fare  survived
class  sex                                    
First  female  34.611765  106.125798  0.968085
       male    41.281386   67.226127  0.368852
Second female  28.722973   21.970121  0.921053
       male    30.740707   19.741782  0.157407
Third  female  21.750000   16.118810  0.500000
       male    26.507589   12.661633  0.135447
age          34.611765
fare        106.125798
survived      0.968085
Name: (First, female), dtype: float64
              age       fare  survived
class                                 
First   41.281386  67.226127  0.368852
Second  30.740707  19.741782  0.157407
Third   26.507589  12.661633  0.135447
