# 7 层级索引（hierarchical indexing）（在机器学习和深度学习中不重要）

In [24]:
import pandas as pd
import numpy as np

# MultiIndex是层级索引，索引类型的一种
index1 = pd.MultiIndex.from_arrays([['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'c', 'd', 'd', 'd'],
                                    [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2]], names=['cloth', 'size'])
ser_obj = pd.Series(np.random.randn(12), index=index1)
print(ser_obj)
print("-" * 100)
print(type(ser_obj))
print(type(ser_obj.index))
print(ser_obj.index.names)
print(ser_obj.index.levels)
print("-" * 100)
print(ser_obj.index.codes)  # 代表索引的位置，没那么重要
print("-" * 100)
# 层级索引查找数据
print("通过层级索引查找数据")
print(ser_obj.iloc[0])  # 取出第一行数据
print("-" * 100)
print(ser_obj.loc[('a', 0)])  # 取出多级索引为('a', 0)的数据
print("-" * 100)
print(ser_obj.loc[('a', 0):('c', 2)])  # 取出多级索引为('a', 0)到('c', 2)的数据
print("-" * 100)
print(ser_obj.loc['c'])
print("-" * 100)
print(ser_obj.loc['a', 2])
print("-" * 100)
print(ser_obj.loc[:, 2])
print("-" * 100)
# print(ser_obj.loc[1]) # 默认第一个参数是第一级索引，第一级所有有'a','b','c','d'，而数字是第二索引，这么写会报错

cloth  size
a      0      -0.462184
       1       1.455234
       2       1.116422
b      0       0.535046
       1       0.689163
       2       1.262925
c      0      -0.407794
       1      -0.544847
       2      -1.861852
d      0       0.672676
       1      -0.863503
       2      -0.698782
dtype: float64
----------------------------------------------------------------------------------------------------
<class 'pandas.core.series.Series'>
<class 'pandas.core.indexes.multi.MultiIndex'>
['cloth', 'size']
[['a', 'b', 'c', 'd'], [0, 1, 2]]
----------------------------------------------------------------------------------------------------
[[0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2]]
----------------------------------------------------------------------------------------------------
通过层级索引查找数据
-0.4621843357342522
----------------------------------------------------------------------------------------------------
-0.4621843357342522
------------------

# 交换层级

In [38]:
print(ser_obj.swaplevel())  # 将size和cloth的两个层级进行交换
print("-" * 100)
print(ser_obj)
print("-" * 100)
ser_obj_swap = ser_obj.swaplevel()
# print(ser_obj_swap.loc[('a', 0)]) # 此时'a'和'0'的位置交换了，所以会报错
print(ser_obj_swap.loc[(0, 'a')])  # 正确的写法
print("-" * 100)
print(ser_obj_swap[1])
print("-" * 100)
print(ser_obj_swap.sort_index(level=0))  # 按照第一级索引排序
print("-" * 100)
df_obj = ser_obj_swap.unstack(level=-1)  # 层级索引转为列索引，level默认是-1
print(df_obj)
print("-" * 100)
# 对df进行stack，就会把行，列索引进行堆叠，变为series
# 把列索引放入内层，只能放到内层
print(df_obj.stack())  # stack变为series和unstack保持一致

size  cloth
0     a       -0.462184
1     a        1.455234
2     a        1.116422
0     b        0.535046
1     b        0.689163
2     b        1.262925
0     c       -0.407794
1     c       -0.544847
2     c       -1.861852
0     d        0.672676
1     d       -0.863503
2     d       -0.698782
dtype: float64
----------------------------------------------------------------------------------------------------
cloth  size
a      0      -0.462184
       1       1.455234
       2       1.116422
b      0       0.535046
       1       0.689163
       2       1.262925
c      0      -0.407794
       1      -0.544847
       2      -1.861852
d      0       0.672676
       1      -0.863503
       2      -0.698782
dtype: float64
----------------------------------------------------------------------------------------------------
-0.4621843357342522
----------------------------------------------------------------------------------------------------
cloth
a    1.455234
b    0.689163
c   -0.544847