In [1]:
import numpy as np
import pandas as pd

In [3]:
my_tuple = list(zip(*[[1, 2, 3, 4, 5, 17, 18, 19],[11, 12, 13, 6, 7, 8, 9, 10]]))
my_tuple

[(1, 11), (2, 12), (3, 13), (4, 6), (5, 7), (17, 8), (18, 9), (19, 10)]

In [5]:
index = pd.MultiIndex.from_tuples(my_tuple, names = ["First", "Second"])
index

MultiIndex([( 1, 11),
            ( 2, 12),
            ( 3, 13),
            ( 4,  6),
            ( 5,  7),
            (17,  8),
            (18,  9),
            (19, 10)],
           names=['First', 'Second'])

In [10]:
# 构建多级索引的DataFrame
df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=['A', 'B'])
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
First,Second,Unnamed: 2_level_1,Unnamed: 3_level_1
1,11,-0.418266,1.741632
2,12,0.130065,-0.527543
3,13,-0.759303,0.451719
4,6,1.297487,-0.269853
5,7,1.196965,-0.314076
17,8,0.124303,1.442213
18,9,-0.920386,-0.652091
19,10,-0.803523,-1.023892


In [11]:
df2 = df[:4]
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
First,Second,Unnamed: 2_level_1,Unnamed: 3_level_1
1,11,-0.418266,1.741632
2,12,0.130065,-0.527543
3,13,-0.759303,0.451719
4,6,1.297487,-0.269853


In [17]:
# stack之后变成了一个series
ser = df2.stack()
print(type(ser))
print(ser[1][11]['A'])
ser

<class 'pandas.core.series.Series'>
-0.4182662559973507


First  Second   
1      11      A   -0.418266
               B    1.741632
2      12      A    0.130065
               B   -0.527543
3      13      A   -0.759303
               B    0.451719
4      6       A    1.297487
               B   -0.269853
dtype: float64

In [18]:
# unstack还可以变回DataFrame
ser.unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
First,Second,Unnamed: 2_level_1,Unnamed: 3_level_1
1,11,-0.418266,1.741632
2,12,0.130065,-0.527543
3,13,-0.759303,0.451719
4,6,1.297487,-0.269853


In [20]:
# 创建另一个DataFrame来演示pivot操作
df_1 = pd.DataFrame(
    {
        'A': ['a', 'b', 'c', 'd'] * 3,
        'B': ['A', 'B', 'C'] * 4,
        'C': ['P', 'P', 'P', 'Q', 'Q', 'Q'] * 2,
        'D': np.random.randn(12),
        'E': np.random.randn(12)
    }
)

df_1

Unnamed: 0,A,B,C,D,E
0,a,A,P,-0.238062,0.887977
1,b,B,P,0.114718,0.93443
2,c,C,P,0.217893,-0.460176
3,d,A,Q,-0.198628,-0.367397
4,a,B,Q,-0.849431,-0.208785
5,b,C,Q,-0.722253,0.550433
6,c,A,P,-0.216982,-0.661626
7,d,B,P,-0.661637,1.85908
8,a,C,P,0.896569,2.131873
9,b,A,Q,0.309532,0.619726


In [21]:
pd.pivot_table(df_1, values='D', index=['A', 'B'], columns=['C'])

Unnamed: 0_level_0,C,P,Q
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
a,A,-0.238062,
a,B,,-0.849431
a,C,0.896569,
b,A,,0.309532
b,B,0.114718,
b,C,,-0.722253
c,A,-0.216982,
c,B,,0.635844
c,C,0.217893,
d,A,,-0.198628


In [23]:
pd.pivot_table(df_1, values='D', index=df_1.index,  columns=['C'])

C,P,Q
0,-0.238062,
1,0.114718,
2,0.217893,
3,,-0.198628
4,,-0.849431
5,,-0.722253
6,-0.216982,
7,-0.661637,
8,0.896569,
9,,0.309532


In [25]:
# 另一个 pivot_table 演示
df = pd.DataFrame({
    'ID': [1, 1, 1, 2, 2, 2],
    'attr': ['A', 'B', 'C'] * 2,
    'value': [0.1, 0.2, 0.3, 0.2, 0.35, 0.4]
})

df

Unnamed: 0,ID,attr,value
0,1,A,0.1
1,1,B,0.2
2,1,C,0.3
3,2,A,0.2
4,2,B,0.35
5,2,C,0.4


In [26]:
# 结论: 根据index， 看columns各列的值是多少
pd.pivot_table(df, values=['value'], index=['ID'], columns=['attr'])

Unnamed: 0_level_0,value,value,value
attr,A,B,C
ID,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
1,0.1,0.2,0.3
2,0.2,0.35,0.4
