In [1]:
import numpy as np
import pandas as pd

In [2]:
# 관련 데이터
 
# 3개주('CA','NY','TX') x 2개 연대 (2000, 2010)의 카테시안곱을 갖는
# 6개의 튜플 인덱스
index = [('CA', 2000), ('CA', 2010),
        ('NY', 2000), ('NY', 2010),
        ('TX', 2000), ('TX', 2010)]

# 각 6개 인덱스에 대응하는 데이터 값

# 인구 전체
populations = [ 33871648, 37253956,
              18975457, 19378102,
              20851820, 25145561]

# 인구 18세이하
populations_under18 = \
    [9267089, 9284094,
     2687374, 4318033,
     5906301, 6879014]


# 소득 전체
earnings = [72792, 81004, 65897, 69338, 58921, 60435]
# 소득 18세이하
earnings_under18 =  [4500,4700,5200,5300,3950,4100 ]


In [50]:
# 1. 주별, 연도별 인구수를 표현해보자.

index = [('CA', 2000), ('CA', 2010),
        ('NY', 2000), ('NY', 2010),
        ('TX', 2000), ('TX', 2010)]

populations = [ 33871648, 37253956,
              18975457, 19378102,
              20851820, 25145561]


pop_si = pd.Series (populations, 
                    index = index)
pop_si

(CA, 2000)    33871648
(CA, 2010)    37253956
(NY, 2000)    18975457
(NY, 2010)    19378102
(TX, 2000)    20851820
(TX, 2010)    25145561
dtype: int64

In [51]:
pop_si.index

Index([('CA', 2000), ('CA', 2010), ('NY', 2000), ('NY', 2010), ('TX', 2000),
       ('TX', 2010)],
      dtype='object')

In [52]:
# CA 자료의 추출 (slicing)
pop_si[:('CA',2010)]

(CA, 2000)    33871648
(CA, 2010)    37253956
dtype: int64

In [6]:
# NY 자료의 추출 (slicing)
pop_si[('NY',2000):('NY',2010)]

(NY, 2000)    18975457
(NY, 2010)    19378102
dtype: int64

In [7]:
# 2000년 자료의 추출 (fancy indexing)
pop_si[[('CA',2000),('NY',2000),('TX', 2000)]]

(CA, 2000)    33871648
(NY, 2000)    18975457
(TX, 2000)    20851820
dtype: int64

In [8]:
# 2. 계층적 인덱스를 생성하고, 
# Series를 재구성 해보자. 
multi_index = \
    pd.MultiIndex.from_tuples(index)
multi_index

MultiIndex([('CA', 2000),
            ('CA', 2010),
            ('NY', 2000),
            ('NY', 2010),
            ('TX', 2000),
            ('TX', 2010)],
           )

In [9]:
pop_mi = pop_si.reindex(multi_index)
pop_mi

CA  2000    33871648
    2010    37253956
NY  2000    18975457
    2010    19378102
TX  2000    20851820
    2010    25145561
dtype: int64

In [10]:
# CA 자료의 추출 (indexing)
pop_mi['CA']

2000    33871648
2010    37253956
dtype: int64

In [11]:
# NY 자료의 추출 (indexing)
pop_mi['NY']

2000    18975457
2010    19378102
dtype: int64

In [12]:
# 2010 자료의 추출 (indexing)
pop_mi[:, 2010]

CA    37253956
NY    19378102
TX    25145561
dtype: int64

In [13]:
# 3. 3차원을 표현하는 DataFrame 생성 (with 행 MultiIndex)
populations_under18 = \
    [9267089, 9284094,
     2687374, 4318033,
     5906301, 6879014]
pop_dict={'total' : populations,
        'under18':populations_under18}
pop_mi_df = pd.DataFrame (pop_dict,
                 index = multi_index)
pop_mi_df

Unnamed: 0,Unnamed: 1,total,under18
CA,2000,33871648,9267089
CA,2010,37253956,9284094
NY,2000,18975457,2687374
NY,2010,19378102,4318033
TX,2000,20851820,5906301
TX,2010,25145561,6879014


In [14]:
# CA의 추출 (indexing)
pop_mi_df.loc['CA']

Unnamed: 0,total,under18
2000,33871648,9267089
2010,37253956,9284094


In [15]:
# 2010년 데이터 (idx 객체 이용 후술)
idx = pd.IndexSlice
pop_mi_df.loc[idx[:,2010], idx[:]]

Unnamed: 0,Unnamed: 1,total,under18
CA,2010,37253956,9284094
NY,2010,19378102,4318033
TX,2010,25145561,6879014


In [16]:
# total의 추출 (indexing)
pop_mi_df.loc[:,'total']
pop_mi_df['total']

CA  2000    33871648
    2010    37253956
NY  2000    18975457
    2010    19378102
TX  2000    20851820
    2010    25145561
Name: total, dtype: int64

In [17]:
# 4. MultiLevel Index의 생성
#  묵시적 방법
#      a. index에 다차원 배열을 전달 한다.   또는
#      b.  data 구성 시에  튜플을 인덱스로 갖는 사전을 구성한다. 
#  명시적으로 인덱스 생성 방법
#      c. tuple로 부터 생성  .from_tuples
#      d. 카테시안곱   .from_product
#      e. 레벨과 코드의 인코딩 

In [18]:
# a. 묵시적 , index에 다차원 배열을 전달 한다. 
df_a = pd.DataFrame(pop_dict, 
        index = [['CA','CA','NY','NY','TX','TX'],
                 [2000,2010,2000,2010,2000,2010]])
df_a.index

MultiIndex([('CA', 2000),
            ('CA', 2010),
            ('NY', 2000),
            ('NY', 2010),
            ('TX', 2000),
            ('TX', 2010)],
           )

In [19]:
df_a.equals(pop_mi_df)

True

In [20]:
#b. 묵시적 ,  Series data 구성 시에  
#   튜플을 인덱스로 갖는 사전을 구성한다. 
s_b = pd.Series ({('CA', 2000):33871648, 
                  ('CA', 2010):37253956,
                  ('NY', 2000):18975457, 
                  ('NY', 2010):19378102,
                  ('TX', 2000):20851820, 
                  ('TX', 2010):25145561})
s_b.index

MultiIndex([('CA', 2000),
            ('CA', 2010),
            ('NY', 2000),
            ('NY', 2010),
            ('TX', 2000),
            ('TX', 2010)],
           )

In [57]:
s_b.index.equals(pop_si.index)

True

In [56]:
s_b.index,pop_si.index

(MultiIndex([('CA', 2000),
             ('CA', 2010),
             ('NY', 2000),
             ('NY', 2010),
             ('TX', 2000),
             ('TX', 2010)],
            ),
 Index([('CA', 2000), ('CA', 2010), ('NY', 2000), ('NY', 2010), ('TX', 2000),
        ('TX', 2010)],
       dtype='object'))

In [23]:
#  c. 명시적, tuple로 부터 생성 
# .from_tuples()
mi_c = pd.MultiIndex.from_tuples (index)
mi_c

MultiIndex([('CA', 2000),
            ('CA', 2010),
            ('NY', 2000),
            ('NY', 2010),
            ('TX', 2000),
            ('TX', 2010)],
           )

In [24]:
mi_c.equals(df_a.index)

True

In [25]:
#  d. 명시적, 카테시안곱  
# .from_product()
mi_d = pd.MultiIndex.from_product\
        ([['CA','NY','TX'],[2000, 2010]])
mi_d

MultiIndex([('CA', 2000),
            ('CA', 2010),
            ('NY', 2000),
            ('NY', 2010),
            ('TX', 2000),
            ('TX', 2010)],
           )

In [26]:
mi_c.equals(mi_d)

True

In [27]:
#  e. 명시적, 레벨과 코드의 인코딩 
mi_e = pd.MultiIndex(
        levels=[['CA','NY','TX'],[2000, 2010]],
        codes = [[0,0,1,1,2,2],[0,1,0,1,0,1]])
mi_e

MultiIndex([('CA', 2000),
            ('CA', 2010),
            ('NY', 2000),
            ('NY', 2010),
            ('TX', 2000),
            ('TX', 2010)],
           )

In [28]:
mi_c.equals(mi_e)

True

In [29]:
# 5. 4차원 데이터의 표현
#   with MultiIndex row and Column

In [58]:
# 주state별, 연대별, 
# 데이터항목별(인구/소득), 조사대상별(전체/18세이하)

# 소득 전체
earnings = [72792, 81004, 65897, 69338, 58921, 60435]
# 소득 18세이하
earnings_under18 =  [4500,4700,5200,5300,3950,4100 ]

# data array의 생성
data = np.transpose([populations, populations_under18,
                     earnings, earnings_under18 ])

#행과 컬럼에 대해서 MultiIndex 생성
row_index = pd.MultiIndex.from_tuples(index)
col_index = pd.MultiIndex.from_product\
      ([['pop','earn'],['total','under18']])

# 행/컬럼 MultiIndex로 지정된 DataFrame 생성
pop_earn_df = pd.DataFrame( data, 
                           index= row_index, 
                           columns = col_index)
pop_earn_df

Unnamed: 0_level_0,Unnamed: 1_level_0,pop,pop,earn,earn
Unnamed: 0_level_1,Unnamed: 1_level_1,total,under18,total,under18
CA,2000,33871648,9267089,72792,4500
CA,2010,37253956,9284094,81004,4700
NY,2000,18975457,2687374,65897,5200
NY,2010,19378102,4318033,69338,5300
TX,2000,20851820,5906301,58921,3950
TX,2010,25145561,6879014,60435,4100


In [59]:
# 6. index level 과 이름
pop_earn_df


Unnamed: 0_level_0,Unnamed: 1_level_0,pop,pop,earn,earn
Unnamed: 0_level_1,Unnamed: 1_level_1,total,under18,total,under18
CA,2000,33871648,9267089,72792,4500
CA,2010,37253956,9284094,81004,4700
NY,2000,18975457,2687374,65897,5200
NY,2010,19378102,4318033,69338,5300
TX,2000,20851820,5906301,58921,3950
TX,2010,25145561,6879014,60435,4100


In [32]:
# 행인덱스 : 계층적 인덱스 
pop_earn_df.index

MultiIndex([('CA', 2000),
            ('CA', 2010),
            ('NY', 2000),
            ('NY', 2010),
            ('TX', 2000),
            ('TX', 2010)],
           )

In [33]:
# 행인덱스의 levels
pop_earn_df.index.levels

FrozenList([['CA', 'NY', 'TX'], [2000, 2010]])

In [63]:
# 행인덱스의 0번째 level
pop_earn_df.index.levels[0]

Index(['CA', 'NY', 'TX'], dtype='object', name='state')

In [60]:
# 행인덱스 내 각 레벨의 이름 : 미지정시 없음
pop_earn_df.index.names

FrozenList([None, None])

In [62]:
# 행인덱스의 각 레벨 이름지정 
pop_earn_df.index.names = ['state','decade']

In [37]:
# 컬럼인덱스의 각 레벨 이름지정 및 df 출력
pop_earn_df.columns.names = ['category', 'scope']
pop_earn_df

Unnamed: 0_level_0,category,pop,pop,earn,earn
Unnamed: 0_level_1,scope,total,under18,total,under18
state,decade,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
CA,2000,33871648,9267089,72792,4500
CA,2010,37253956,9284094,81004,4700
NY,2000,18975457,2687374,65897,5200
NY,2010,19378102,4318033,69338,5300
TX,2000,20851820,5906301,58921,3950
TX,2010,25145561,6879014,60435,4100


In [38]:
# 7. Index의 해제와 설정 

In [39]:
# 인덱스 지정 해제
pop_earn_df.reset_index()

category,state,decade,pop,pop,earn,earn
scope,Unnamed: 1_level_1,Unnamed: 2_level_1,total,under18,total,under18
0,CA,2000,33871648,9267089,72792,4500
1,CA,2010,37253956,9284094,81004,4700
2,NY,2000,18975457,2687374,65897,5200
3,NY,2010,19378102,4318033,69338,5300
4,TX,2000,20851820,5906301,58921,3950
5,TX,2010,25145561,6879014,60435,4100


In [40]:
# 인덱스 지정 해제하면 RangeIndex 생성
pop_earn_df.reset_index().index

RangeIndex(start=0, stop=6, step=1)

In [65]:
pop_earn_df.reset_index().columns

MultiIndex([( 'state',        ''),
            ('decade',        ''),
            (   'pop',   'total'),
            (   'pop', 'under18'),
            (  'earn',   'total'),
            (  'earn', 'under18')],
           )

In [64]:
# 컬럼인덱스 
pop_earn_df.columns

MultiIndex([( 'pop',   'total'),
            ( 'pop', 'under18'),
            ('earn',   'total'),
            ('earn', 'under18')],
           )

In [42]:
# 인덱스 지정 해제하고, 컬럼의 level0 제거
pop_earn_df.reset_index()\
    .droplevel(0, axis = 1)

scope,Unnamed: 1,Unnamed: 2,total,under18,total.1,under18.1
0,CA,2000,33871648,9267089,72792,4500
1,CA,2010,37253956,9284094,81004,4700
2,NY,2000,18975457,2687374,65897,5200
3,NY,2010,19378102,4318033,69338,5300
4,TX,2000,20851820,5906301,58921,3950
5,TX,2010,25145561,6879014,60435,4100


In [43]:
# 인덱스 지정 해제하고, 컬럼의 level0 제거후 컬럼인덱스
pop_earn_df.reset_index()\
    .droplevel(0, axis = 1).columns

Index(['', '', 'total', 'under18', 'total', 'under18'], dtype='object', name='scope')

In [44]:
# 인덱스의 재설정

In [45]:
# 기존 인덱스 해제 후, 순서를 변경 'decade', 'state'
# 컬럼 목록으로 인덱스 재지정
pop_earn_df.reset_index()\
    .set_index(['decade', 'state'])

Unnamed: 0_level_0,category,pop,pop,earn,earn
Unnamed: 0_level_1,scope,total,under18,total,under18
decade,state,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2000,CA,33871648,9267089,72792,4500
2010,CA,37253956,9284094,81004,4700
2000,NY,18975457,2687374,65897,5200
2010,NY,19378102,4318033,69338,5300
2000,TX,20851820,5906301,58921,3950
2010,TX,25145561,6879014,60435,4100


In [46]:
# MultiIndex가 묵시적으로 생성됨
pop_earn_df.reset_index()\
    .set_index(['decade', 'state'])\
    .index

MultiIndex([(2000, 'CA'),
            (2010, 'CA'),
            (2000, 'NY'),
            (2010, 'NY'),
            (2000, 'TX'),
            (2010, 'TX')],
           names=['decade', 'state'])

In [47]:
# 8. MultiIndex의 정렬

In [66]:
pop_earn_df

Unnamed: 0_level_0,Unnamed: 1_level_0,pop,pop,earn,earn
Unnamed: 0_level_1,Unnamed: 1_level_1,total,under18,total,under18
state,decade,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
CA,2000,33871648,9267089,72792,4500
CA,2010,37253956,9284094,81004,4700
NY,2000,18975457,2687374,65897,5200
NY,2010,19378102,4318033,69338,5300
TX,2000,20851820,5906301,58921,3950
TX,2010,25145561,6879014,60435,4100


In [67]:
# 미정렬 상태에서 슬라이싱 시도
# 오류 발생함
pop_earn_df.loc[:,'pop':'earn':-1]

UnsortedIndexError: 'Key length (1) was greater than MultiIndex lexsort depth (0)'

In [70]:
# MultiIndex도 .sort_index로 축과 방향을 지정하여
# sorting 수행
pop_earn_df.sort_index\
    (axis=1, ascending = False)

Unnamed: 0_level_0,Unnamed: 1_level_0,pop,pop,earn,earn
Unnamed: 0_level_1,Unnamed: 1_level_1,under18,total,under18,total
state,decade,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
CA,2000,9267089,33871648,4500,72792
CA,2010,9284094,37253956,4700,81004
NY,2000,2687374,18975457,5200,65897
NY,2010,4318033,19378102,5300,69338
TX,2000,5906301,20851820,3950,58921
TX,2010,6879014,25145561,4100,60435


In [81]:
# sorting 후에는 슬라이싱 정상 동작
pop_earn_df.sort_index\
    (axis=0, ascending = False).loc['TX':'NY':-1]


UnsortedIndexError: 'Key length (1) was greater than MultiIndex lexsort depth (0)'

In [82]:
# 9. MultiIndex의 인덱싱 , 슬라이싱

In [85]:
pop_earn_df

Unnamed: 0_level_0,Unnamed: 1_level_0,pop,pop,earn,earn
Unnamed: 0_level_1,Unnamed: 1_level_1,total,under18,total,under18
state,decade,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
CA,2000,33871648,9267089,72792,4500
CA,2010,37253956,9284094,81004,4700
NY,2000,18975457,2687374,65897,5200
NY,2010,19378102,4318033,69338,5300
TX,2000,20851820,5906301,58921,3950
TX,2010,25145561,6879014,60435,4100


In [83]:
# 'pop' 인구항목만 추출
#  축약형 - 컬럼 level0
pop_earn_df['pop']

Unnamed: 0_level_0,Unnamed: 1_level_0,total,under18
state,decade,Unnamed: 2_level_1,Unnamed: 3_level_1
CA,2000,33871648,9267089
CA,2010,37253956,9284094
NY,2000,18975457,2687374
NY,2010,19378102,4318033
TX,2000,20851820,5906301
TX,2010,25145561,6879014


In [84]:
# 'CA' 항목만 추출 
#  .loc 행인덱스 level0
pop_earn_df.loc['CA']

Unnamed: 0_level_0,pop,pop,earn,earn
Unnamed: 0_level_1,total,under18,total,under18
decade,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2000,33871648,9267089,72792,4500
2010,37253956,9284094,81004,4700


In [86]:
# 'CA','TX' 항목 추출
#  .loc 행인덱스 level0에 팬시인덱싱
pop_earn_df.loc[['CA','TX']]

Unnamed: 0_level_0,Unnamed: 1_level_0,pop,pop,earn,earn
Unnamed: 0_level_1,Unnamed: 1_level_1,total,under18,total,under18
state,decade,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
CA,2000,33871648,9267089,72792,4500
CA,2010,37253956,9284094,81004,4700
TX,2000,20851820,5906301,58921,3950
TX,2010,25145561,6879014,60435,4100


In [88]:
# 'CA' - 2010 항목 추출
# .loc 행인덱스 level0/level1 인덱싱
pop_earn_df.loc[('CA',2000)]
#pop_earn_df.loc['CA',2000]

pop   total      33871648
      under18     9267089
earn  total         72792
      under18        4500
Name: (CA, 2000), dtype: int32

In [89]:
# 'CA' 항목과 'pop' 항목 추출
# .loc 행인덱스 level0
#      컬럼인덱스 level0
pop_earn_df.loc['CA','pop']

Unnamed: 0_level_0,total,under18
decade,Unnamed: 1_level_1,Unnamed: 2_level_1
2000,33871648,9267089
2010,37253956,9284094


In [90]:
# 'CA' 항목과 'pop'의 'total' 항목 추출
# .loc 행인덱스 level0
#      컬럼인덱스 level0/leve1
# 괄호로 명시
pop_earn_df.loc['CA',('pop','total')]

decade
2000    33871648
2010    37253956
Name: (pop, total), dtype: int32

In [91]:
# 'CA'-2010 항목과 'pop' 항목 추출
# .loc 행인덱스 level0/levle1
#      컬럼인덱스 level0
# 괄호로 명시
pop_earn_df.loc[('CA',2010),'pop']

total      37253956
under18     9284094
Name: (CA, 2010), dtype: int32

In [92]:
# 'CA'-2010 항목과 'pop'의 'total' 항목 추출
# .loc 행인덱스 level0/level1
#      컬럼인덱스 level0/leve1
# 괄호로 명시
pop_earn_df.loc[('CA',2010),('pop','total')]

37253956

In [93]:
# '2010' 항목과 'under18' 항목 추출
# .loc 행인덱스 level1
#      컬럼인덱스 level1
# 오류가 발생한다. 
pop_earn_df.loc[(:,2010),(:,'under18')]

SyntaxError: invalid syntax (<ipython-input-93-30f287032c7e>, line 5)

In [94]:
# '2010' 항목과 'under18' 항목 추출
# .loc 행인덱스 level1
#      컬럼인덱스 level1
# pd.IndexSlice 객체를 통해 slicing 가능하다. 
pop_earn_df.loc[pd.IndexSlice [:,2010], 
                pd.IndexSlice [:, 'under18']]

Unnamed: 0_level_0,Unnamed: 1_level_0,pop,earn
Unnamed: 0_level_1,Unnamed: 1_level_1,under18,under18
state,decade,Unnamed: 2_level_2,Unnamed: 3_level_2
CA,2010,9284094,4700
NY,2010,4318033,5300
TX,2010,6879014,4100


In [95]:
# idx = pd.IndexSlice 로 
# 객체생성해서 
# 간결하게 표현한다. 
idx = pd.IndexSlice   
pop_earn_df.loc[idx[:,2010], idx[:, 'under18']]

Unnamed: 0_level_0,Unnamed: 1_level_0,pop,earn
Unnamed: 0_level_1,Unnamed: 1_level_1,under18,under18
state,decade,Unnamed: 2_level_2,Unnamed: 3_level_2
CA,2010,9284094,4700
NY,2010,4318033,5300
TX,2010,6879014,4100


In [96]:
# '2010' 항목 추출
# .loc 행인덱스 level1
pop_earn_df.loc[idx[:,2010], idx[:]]

Unnamed: 0_level_0,Unnamed: 1_level_0,pop,pop,earn,earn
Unnamed: 0_level_1,Unnamed: 1_level_1,total,under18,total,under18
state,decade,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
CA,2010,37253956,9284094,81004,4700
NY,2010,19378102,4318033,69338,5300
TX,2010,25145561,6879014,60435,4100


In [97]:
pop_earn_df.loc[:'NY', 'pop' ]

Unnamed: 0_level_0,Unnamed: 1_level_0,total,under18
state,decade,Unnamed: 2_level_1,Unnamed: 3_level_1
CA,2000,33871648,9267089
CA,2010,37253956,9284094
NY,2000,18975457,2687374
NY,2010,19378102,4318033


In [98]:
pop_earn_df.mean()

pop   total      2.591276e+07
      under18    6.390318e+06
earn  total      6.806450e+04
      under18    4.625000e+03
dtype: float64

In [99]:
# 10. MultiIndex에서의 부분집합별 집계연산

# 주별 평균을 구한다. 
pop_earn_df.mean(level = 'state')

Unnamed: 0_level_0,pop,pop,earn,earn
Unnamed: 0_level_1,total,under18,total,under18
state,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
CA,35562802.0,9275591.5,76898.0,4600.0
NY,19176779.5,3502703.5,67617.5,5250.0
TX,22998690.5,6392657.5,59678.0,4025.0


In [100]:
# 연대별 평균을 구한다. 
pop_earn_df.mean(level='decade')

Unnamed: 0_level_0,pop,pop,earn,earn
Unnamed: 0_level_1,total,under18,total,under18
decade,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2000,24566310.0,5953588.0,65870.0,4550.0
2010,27259210.0,6827047.0,70259.0,4700.0


In [103]:
# 데이터항목별 최대값을 구한다. (컬럼축)
pop_earn_df.max(level='decade')

Unnamed: 0_level_0,pop,pop,earn,earn
Unnamed: 0_level_1,total,under18,total,under18
decade,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2000,33871648,9267089,72792,5200
2010,37253956,9284094,81004,5300


In [105]:
# chain을 통해서 항목별 최대값에
# 연대별 평균을 구한다. 
pop_earn_df.max(level='state')

Unnamed: 0_level_0,pop,pop,earn,earn
Unnamed: 0_level_1,total,under18,total,under18
state,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
CA,37253956,9284094,81004,4700
NY,19378102,4318033,69338,5300
TX,25145561,6879014,60435,4100
