## 3.1 Pandas数据结构

### 3.1.1 Series

In [11]:
import pandas as pd                       # 导入pandas库
ser_obj = pd.Series([1, 2, 3, 4, 5])      # 创建Series类对象
ser_obj

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [12]:
# 创建Series类对象，并指定索引
ser_obj = pd.Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 'd', 'e'])
ser_obj

a    1
b    2
c    3
d    4
e    5
dtype: int64

In [13]:
year_data = {2001: 17.8, 2002: 20.1, 2003: 16.5}
ser_obj2 = pd.Series(year_data)   # 创建Series类对象
ser_obj2

2001    17.8
2002    20.1
2003    16.5
dtype: float64

In [14]:
ser_obj.index         # 获取ser_obj的索引

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [15]:
ser_obj.values       # 获取ser_obj的数据

array([1, 2, 3, 4, 5], dtype=int64)

In [17]:
#ser_obj[3]            # 获取位置索引3对应的数据
ser_obj['d']

4

In [18]:
ser_obj * 2

a     2
b     4
c     6
d     8
e    10
dtype: int64

###  3.1.2 DataFrame

In [19]:
import numpy as np
import pandas as pd
demo_arr = np.array([['a', 'b', 'c'], ['d', 'e', 'f']]) # 创建数组
df_obj = pd.DataFrame(demo_arr)    # 基于数组创建DataFrame对象
df_obj

Unnamed: 0,0,1,2
0,a,b,c
1,d,e,f


In [23]:
# 创建DataFrame对象，指定列索引
df_obj = pd.DataFrame(demo_arr, columns=['No1', 'No2', 'No3'])
df_obj

Unnamed: 0,No1,No2,No3
0,a,b,c
1,d,e,f


In [41]:
element = df_obj['No2']  # 通过列索引的方式获取一列数据
element = df_obj.iloc[[0,1],:]  # 通过列索引的方式获取一列数据
#element = df_obj[1]  # 通过列索引的方式获取一列数据,这样不行！
print(element)

  No1 No2 No3
0   a   b   c
1   d   e   f


In [42]:
type(element)                # 查看返回结果的类型

pandas.core.frame.DataFrame

In [43]:
element = df_obj.No2  # 通过属性获取列数据
element

0    b
1    e
Name: No2, dtype: object

In [44]:
type(element)           # 查看返回结果的类型

pandas.core.series.Series

In [45]:
df_obj['No4'] = ['g', 'h']
df_obj

Unnamed: 0,No1,No2,No3,No4
0,a,b,c,g
1,d,e,f,h


In [46]:
del df_obj['No3']
df_obj

Unnamed: 0,No1,No2,No4
0,a,b,g
1,d,e,h


## 3.2 索引操作及高级索引

### 3.2.1 索引对象

In [47]:
import pandas as pd
ser_obj = pd.Series(range(5), index=['a','b','c','d','e'])
ser_index = ser_obj.index
ser_index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [49]:
#ser_index[2] = 'cc'  # (执行时，将注释打开，便可以看到错误信息)

TypeError: Index does not support mutable operations

In [50]:
ser_obj1 = pd.Series(range(3), index=['a','b','c'])
ser_obj2 = pd.Series(['a','b','c'], index=ser_obj1.index)
ser_obj2.index is ser_obj1.index

True

### 3.2.2 重置索引

In [51]:
import pandas as pd
ser_obj = pd.Series([1, 2, 3, 4, 5], index=['c', 'd', 'a', 'b', 'e'])
ser_obj

c    1
d    2
a    3
b    4
e    5
dtype: int64

In [52]:
# 重新索引
ser_obj2 = ser_obj.reindex(['a', 'b', 'c', 'd', 'e', 'f']) 
ser_obj2

a    3.0
b    4.0
c    1.0
d    2.0
e    5.0
f    NaN
dtype: float64

In [53]:
# 重新索引时指定填充的缺失值
ser_obj2 = ser_obj.reindex(['a', 'b', 'c', 'd', 'e', 'f'], fill_value = 6)
ser_obj2

a    3
b    4
c    1
d    2
e    5
f    6
dtype: int64

In [54]:
# 创建Series对象，并为其指定索引
ser_obj3 = pd.Series([1, 3, 5, 7], index=[0, 2, 4, 6])
ser_obj3

0    1
2    3
4    5
6    7
dtype: int64

In [55]:
ser_obj3.reindex(range(6), method = 'ffill') # 重新索引，前向填充值

0    1
1    1
2    3
3    3
4    5
5    5
dtype: int64

In [56]:
ser_obj3.reindex(range(6), method = 'bfill')# 重新索引，后向填充值
0    1
2    3
4    5
6    7

0    1
1    3
2    3
3    5
4    5
5    7
dtype: int64

### 3.2.3 索引操作

In [58]:
import pandas as pd
ser_obj = pd.Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 'd', 'e'])
ser_obj[2]       # 使用索引位置获取数据

3

In [59]:
ser_obj['c']    # 使用索引名称获取数据

3

In [60]:
ser_obj[2: 4]           # 使用位置索引进行切片

c    3
d    4
dtype: int64

In [61]:
ser_obj['c': 'e']      # 使用索引名称进行切片

c    3
d    4
e    5
dtype: int64

In [63]:
ser_obj[[0, 2, 4]]          # 通过不连续位置索引获取数据集

a    1
c    3
e    5
dtype: int64

In [64]:
ser_obj[['a', 'c', 'd']]   # 通过不连续索引名称获取数据集

a    1
c    3
d    4
dtype: int64

In [65]:
ser_bool = ser_obj > 2         # 创建布尔型Series对象
#[1, 2, 3, 4, 5]
ser_bool

a    False
b    False
c     True
d     True
e     True
dtype: bool

In [67]:
ser_obj[ser_bool]               # 获取结果为True的数据

c    3
d    4
e    5
dtype: int64

In [75]:
arr = np.arange(12).reshape(3, 4)
df_obj = pd.DataFrame(arr, columns=['a', 'b', 'c', 'd'],index=["A","B","C"])
df_obj

Unnamed: 0,a,b,c,d
A,0,1,2,3
B,4,5,6,7
C,8,9,10,11


In [76]:
df_obj['b']

A    1
B    5
C    9
Name: b, dtype: int32

In [77]:
type(df_obj['b'])

pandas.core.series.Series

In [78]:
dd = df_obj[['b', 'd']]        # 获取不连续的Series对象
print(dd)
print(type(dd))

   b   d
A  1   3
B  5   7
C  9  11
<class 'pandas.core.frame.DataFrame'>


In [82]:
df_obj[:2]               # 使用切片获取第0~1行的数据

Unnamed: 0,a,b,c,d
A,0,1,2,3
B,4,5,6,7


In [85]:
# 使用多个切片先通过行索引获取第0~2行的数据，再通过不连续列索引获取第b、d列的数据
df_obj[: 3][['b', 'd']] 

Unnamed: 0,b,d
A,1,3
B,5,7
C,9,11


### 多学一招

In [86]:
arr = np.arange(16).reshape(4, 4)
dataframe_obj = pd.DataFrame(arr, columns=['a', 'b', 'c', 'd'])
dataframe_obj

Unnamed: 0,a,b,c,d
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15


In [87]:
dataframe_obj.loc[:, ["c", "a"]]

Unnamed: 0,c,a
0,2,0
1,6,4
2,10,8
3,14,12


In [88]:
dataframe_obj.iloc[:, [2, 0]]

Unnamed: 0,c,a
0,2,0
1,6,4
2,10,8
3,14,12


In [89]:
dataframe_obj.loc[1:2, ['b','c']]

Unnamed: 0,b,c
1,5,6
2,9,10


In [90]:
dataframe_obj.iloc[1:3, [1, 2]]

Unnamed: 0,b,c
1,5,6
2,9,10


## 3.3 算术运算与数据对齐

In [44]:
obj_one = pd.Series(range(10, 13), index=range(3)) 
obj_one

0    10
1    11
2    12
dtype: int64

In [45]:
obj_two = pd.Series(range(20, 25), index=range(5))
obj_two

0    20
1    21
2    22
3    23
4    24
dtype: int64

In [46]:
obj_one + obj_two

0    30.0
1    32.0
2    34.0
3     NaN
4     NaN
dtype: float64

In [47]:
obj_one.add(obj_two, fill_value = 0)   # 执行加法运算，补充缺失值

0    30.0
1    32.0
2    34.0
3    23.0
4    24.0
dtype: float64

## 3.4 数据排序

### 3.4.1 按索引排序

In [48]:
import pandas as pd
ser_obj = pd.Series(range(10, 15), index=[5, 3, 1, 3, 2])
ser_obj

5    10
3    11
1    12
3    13
2    14
dtype: int64

In [49]:
ser_obj.sort_index()        # 按索引进行升序排列

1    12
2    14
3    11
3    13
5    10
dtype: int64

In [50]:
ser_obj.sort_index(ascending = False)  # 按索引进行降序排列

5    10
3    11
3    13
2    14
1    12
dtype: int64

In [51]:
import pandas as pd
import numpy as np
df_obj = pd.DataFrame(np.arange(9).reshape(3, 3), index=[4, 3, 5])
df_obj

Unnamed: 0,0,1,2
4,0,1,2
3,3,4,5
5,6,7,8


In [52]:
df_obj.sort_index()                      # 按索引升序排列

Unnamed: 0,0,1,2
3,3,4,5
4,0,1,2
5,6,7,8


In [53]:
df_obj.sort_index(ascending = False)     # 按索引降序排列

Unnamed: 0,0,1,2
5,6,7,8
4,0,1,2
3,3,4,5


### 3.4.2 按值排序

In [54]:
ser_obj = pd.Series([4, np.nan, 6, np.nan, -3, 2])
ser_obj

0    4.0
1    NaN
2    6.0
3    NaN
4   -3.0
5    2.0
dtype: float64

In [55]:
ser_obj.sort_values()   # 按值升序排列

4   -3.0
5    2.0
0    4.0
2    6.0
1    NaN
3    NaN
dtype: float64

In [56]:
df_obj = pd.DataFrame([[0.4, -0.1, -0.3, 0.0], 
                       [0.2, 0.6, -0.1, -0.7],
                       [0.8, 0.6, -0.5, 0.1]])
df_obj

Unnamed: 0,0,1,2,3
0,0.4,-0.1,-0.3,0.0
1,0.2,0.6,-0.1,-0.7
2,0.8,0.6,-0.5,0.1


In [57]:
df_obj.sort_values(by = 2)  # 对列索引值为2的数据进行排序

Unnamed: 0,0,1,2,3
2,0.8,0.6,-0.5,0.1
0,0.4,-0.1,-0.3,0.0
1,0.2,0.6,-0.1,-0.7


## 3.5 统计计算与描述

### 3.5.1 常用的统计计算

In [58]:
df_obj = pd.DataFrame(np.arange(12).reshape(3, 4), columns=['a', 'b', 'c', 'd'])
df_obj

Unnamed: 0,a,b,c,d
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11


In [59]:
df_obj.sum()          # 计算每列元素的和

a    12
b    15
c    18
d    21
dtype: int64

In [60]:
df_obj.max()         # 获取每列的最大值

a     8
b     9
c    10
d    11
dtype: int32

In [61]:
df_obj.min(axis=1)   # 沿着横向轴，获取每行的最小值

0    0
1    4
2    8
dtype: int32

### 3.5.2 统计描述（descript）

In [62]:
df_obj = pd.DataFrame([[12, 6, -11, 19], 
                       [-1, 7, 50, 36],
                       [5, 9, 23, 28]])
df_obj

Unnamed: 0,0,1,2,3
0,12,6,-11,19
1,-1,7,50,36
2,5,9,23,28


In [63]:
df_obj.describe()

Unnamed: 0,0,1,2,3
count,3.0,3.0,3.0,3.0
mean,5.333333,7.333333,20.666667,27.666667
std,6.506407,1.527525,30.566867,8.504901
min,-1.0,6.0,-11.0,19.0
25%,2.0,6.5,6.0,23.5
50%,5.0,7.0,23.0,28.0
75%,8.5,8.0,36.5,32.0
max,12.0,9.0,50.0,36.0


## 3.6 层次化索引

### 3.6.1 认识层次化索引

In [64]:
import numpy as np
import pandas as pd
mulitindex_series = pd.Series([15848,13472,12073.8,7813,7446,6444,15230,8269],
                              index=[['河北省','河北省','河北省','河北省',
                                      '河南省','河南省','河南省','河南省'],
                                     ['石家庄市','唐山市','邯郸市','秦皇岛市',
                                      '郑州市','开封市','洛阳市','新乡市']])
mulitindex_series

河北省  石家庄市    15848.0
     唐山市     13472.0
     邯郸市     12073.8
     秦皇岛市     7813.0
河南省  郑州市      7446.0
     开封市      6444.0
     洛阳市     15230.0
     新乡市      8269.0
dtype: float64

In [65]:
import pandas as pd
from pandas import DataFrame,Series
# 占地面积为增加的列索引
mulitindex_df = DataFrame({'占地面积':[15848,13472,12073.8,7813,
                                   7446,6444,15230,8269]},
                          index=[['河北省','河北省','河北省','河北省',
                                  '河南省','河南省','河南省','河南省'],
                                 ['石家庄市','唐山市','邯郸市','秦皇岛市',
                                  '郑州市','开封市','洛阳市','新乡市']])
mulitindex_df

Unnamed: 0,Unnamed: 1,占地面积
河北省,石家庄市,15848.0
河北省,唐山市,13472.0
河北省,邯郸市,12073.8
河北省,秦皇岛市,7813.0
河南省,郑州市,7446.0
河南省,开封市,6444.0
河南省,洛阳市,15230.0
河南省,新乡市,8269.0


In [66]:
from pandas import MultiIndex
# 创建包含多个元组的列表
list_tuples = [('A','A1'), ('A','A2'), ('B','B1'),
               ('B','B2'), ('B','B3')]
# 根据元组列表创建一个MultiIndex对象
multi_index = MultiIndex.from_tuples(tuples=list_tuples, 
                                     names=[ '外层索引', '内层索引'])
multi_index

MultiIndex(levels=[['A', 'B'], ['A1', 'A2', 'B1', 'B2', 'B3']],
           labels=[[0, 0, 1, 1, 1], [0, 1, 2, 3, 4]],
           names=['外层索引', '内层索引'])

In [67]:
# 导入所需要的包
import pandas as pd
values = [[1, 2, 3], [8, 5, 7], [4, 7, 7], [5, 5, 4], [4, 9, 9]]
df_indexs = pd.DataFrame(data=values, index=multi_index)
df_indexs

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1,2
外层索引,内层索引,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A,A1,1,2,3
A,A2,8,5,7
B,B1,4,7,7
B,B2,5,5,4
B,B3,4,9,9


In [68]:
from pandas import MultiIndex
# 根据列表创建一个MultiIndex对象
multi_array = MultiIndex.from_arrays(arrays =[['A', 'B', 'A', 'B', 'B'], 
                                              ['A1', 'A2', 'B1', 'B2', 'B3']],
                                     names=['外层索引','内层索引'])
multi_array

MultiIndex(levels=[['A', 'B'], ['A1', 'A2', 'B1', 'B2', 'B3']],
           labels=[[0, 1, 0, 1, 1], [0, 1, 2, 3, 4]],
           names=['外层索引', '内层索引'])

In [69]:
# 导入所需要的包
import pandas as pd
import numpy as np
values = np.array([[1, 2, 3], [8, 5, 7], [4, 7, 7],
                   [5, 5, 4], [4, 9, 9]])
df_array = pd.DataFrame(data=values, index=multi_array)
df_array

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1,2
外层索引,内层索引,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A,A1,1,2,3
B,A2,8,5,7
A,B1,4,7,7
B,B2,5,5,4
B,B3,4,9,9


In [70]:
from pandas import MultiIndex
import pandas as pd
numbers = [0, 1, 2]
colors = ['green', 'purple']
multi_product = pd.MultiIndex.from_product([numbers, colors], 
                                           names=['number', 'color'])
multi_product

MultiIndex(levels=[[0, 1, 2], ['green', 'purple']],
           labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
           names=['number', 'color'])

In [71]:
# 导入所需要的包
import pandas as pd
# 使用变量values接收DataFrame对象的值
values = np.array([[7, 5], [6, 6], [3, 1], [5, 5], [4, 5], [5, 3]])
df_product = pd.DataFrame(data=values, index=multi_product)
df_product

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1
number,color,Unnamed: 2_level_1,Unnamed: 3_level_1
0,green,7,5
0,purple,6,6
1,green,3,1
1,purple,5,5
2,green,4,5
2,purple,5,3


### 3.6.2 层次化索引的操作

In [72]:
from pandas import Series, DataFrame
ser_obj = Series([50, 60, 40, 94, 63, 101, 200, 56, 45],
                 index=[['小说', '小说', '小说',
                         '散文随笔', '散文随笔', '散文随笔',
                         '传记', '传记', '传记'],
                        ['高山上的小邮局', '失踪的总统', '绿毛水怪',
                         '皮囊', '浮生六记', '自在独行',
                         '梅西', '老舍自传', '库里传']])
ser_obj

小说    高山上的小邮局     50
      失踪的总统       60
      绿毛水怪        40
散文随笔  皮囊          94
      浮生六记        63
      自在独行       101
传记    梅西         200
      老舍自传        56
      库里传         45
dtype: int64

In [73]:
ser_obj['小说']     # 获取所有外层索引为“小说”的数据

高山上的小邮局    50
失踪的总统      60
绿毛水怪       40
dtype: int64

In [74]:
ser_obj[:,'自在独行']       # 获取内层索引对应的数据

散文随笔    101
dtype: int64

In [75]:
ser_obj.swaplevel()               # 交换外层索引与内层索引位置

高山上的小邮局  小说       50
失踪的总统    小说       60
绿毛水怪     小说       40
皮囊       散文随笔     94
浮生六记     散文随笔     63
自在独行     散文随笔    101
梅西       传记      200
老舍自传     传记       56
库里传      传记       45
dtype: int64

In [76]:
from pandas import DataFrame,Series
df_obj = DataFrame({'str':['a','b','d','e','f','k','d','s','l'],
                    'num':[1, 2, 4, 5, 3, 2, 6, 2, 3]},
                   index=[['A', 'A', 'A', 'C', 'C', 'C', 'B', 'B', 'B'],
                          [1, 3, 2, 3, 1, 2, 4, 5, 8]])
df_obj

Unnamed: 0,Unnamed: 1,str,num
A,1,a,1
A,3,b,2
A,2,d,4
C,3,e,5
C,1,f,3
C,2,k,2
B,4,d,6
B,5,s,2
B,8,l,3


In [77]:
df_obj.sort_index()         # 按索引排序

Unnamed: 0,Unnamed: 1,str,num
A,1,a,1
A,2,d,4
A,3,b,2
B,4,d,6
B,5,s,2
B,8,l,3
C,1,f,3
C,2,k,2
C,3,e,5


In [78]:
# 按num列降序排列
df_obj.sort_index(by='num',ascending=False)

  


Unnamed: 0,Unnamed: 1,str,num
B,4,d,6
C,3,e,5
A,2,d,4
C,1,f,3
B,8,l,3
A,3,b,2
C,2,k,2
B,5,s,2
A,1,a,1


## 3.7 读写数据操作

### 3.7.1 读写文本文件

In [79]:
import pandas as pd
df = pd.DataFrame({'one_name':[1,2,3], 'two_name':[4,5,6]})
# 将df对象写入到csv格式的文件中
df.to_csv(r'E:/数据分析/itcast.csv',index=False)
'写入完毕'

'写入完毕'

In [80]:
import pandas as pd
file = open(r"E:/数据分析/itcast.csv")
# 读取指定目录下的csv格式的文件
file_data = pd.read_csv(file)
file_data

Unnamed: 0,one_name,two_name
0,1,4
1,2,5
2,3,6


In [81]:
import pandas as pd
file = open(r'E:/数据分析/itcast.txt')
data = pd.read_table(file)
data

Unnamed: 0,Hello itcast！
0,ABC
1,itheima


### 3.7.2 读写Excel文件

In [82]:
import pandas as pd
df1 = pd.DataFrame({'col1': ['人', '工'], 'col2': ['智', '能']})
df1.to_excel(r'E:/数据分析/itcast.xlsx', '可视化')
'写入完毕'

'写入完毕'

In [83]:
import pandas as pd
excel_path =r'E:/数据分析/itcast.xlsx'
data = pd.read_excel(excel_path)
data

Unnamed: 0,col1,col2
0,传,播
1,智,客


### 3.7.3 读取HTML表格数据

In [3]:
import pandas as pd
import requests
html_data = requests.get('http://kaoshi.edu.sina.com.cn/college/majorlist/')
html_table_data = pd.read_html(html_data.content,encoding='utf-8')
html_table_data[1]

Unnamed: 0,0,1,2,3,4
0,专业名称,专业代码,专业大类,专业小类,操作
1,哲学类,0101,哲学,哲学类,开设院校 加入对比
2,哲学,010101,哲学,哲学类,开设院校 加入对比
3,逻辑学,010102,哲学,哲学类,开设院校 加入对比
4,宗教学,010103,哲学,哲学类,开设院校 加入对比
5,伦理学,010104,哲学,哲学类,开设院校 加入对比
6,经济学类,0201,经济学,经济学类,开设院校 加入对比
7,经济学,020101,经济学,经济学类,开设院校 加入对比
8,经济统计学,020102,经济学,经济学类,开设院校 加入对比
9,国民经济管理,020103,经济学,经济学类,开设院校 加入对比


### 3.7.4读写数据库

In [10]:
import pandas as pd
from pandas import DataFrame
from sqlalchemy import create_engine
 # mysql账号为root  密码为123456 数据名：info  
 # 数据表名称：person_info
engine = create_engine('mysql+mysqlconnector://root:123456@127.0.0.1/students_info')
pd.read_sql('userinfo',engine)

Unnamed: 0,id,name
0,1,32424
1,2,中国人


In [86]:
import pandas as pd
from pandas import DataFrame,Series
from sqlalchemy import create_engine
# mysql账号为root  密码为123456 数据名：info
# 数据表名称：person_info
# 创建数据库引擎
# mysql+pymysql 表示使用Mysql数据库的pymysql驱动
engine = create_engine('mysql+mysqlconnector://root:123456@127.0.0.1/info')
sql = 'select * from person_info where id >3;'
pd.read_sql(sql,engine)

Unnamed: 0,id,name,age,height,gender
0,4,刘华,59,175.0,男
1,5,王贤,18,172.0,女
2,6,周平,36,,男
3,7,程坤,27,181.0,男
4,8,李平,38,160.0,女


In [5]:
from pandas import DataFrame,Series
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy.types import *
df = DataFrame({"班级":["一年级","二年级","三年级","四年级"],
                              "男生人数":[25,23,27,30],
                              "女生人数":[19,17,20,20]})
# 创建数据库引擎
# mysql+pymysql 表示使用Mysql数据库的pymysql驱动
# 账号：root 密码：123456 数据库名：studnets_info
# 数据表的名称： students
engine=create_engine('mysql+mysqlconnector://root:123456@127.0.0.1/students_info')
df.to_sql('students',engine)

ModuleNotFoundError: No module named 'mysql'

## 案例—读取北京市2006~2018年高考分数线表格信息及分析

In [88]:
import pandas as pd
# 指定文件的路径
file_path = 'C:/Users/admin/Desktop/scores.xlsx'
# 指定列标签的索引列表
df_obj = pd.read_excel(file_path, header=[0, 1])
df_obj

Unnamed: 0_level_0,一本分数线,一本分数线,二本分数线,二本分数线
Unnamed: 0_level_1,文科,理科,文科,理科
2018,576,532,488,432
2017,555,537,468,439
2016,583,548,532,494
2015,579,548,527,495
2014,565,543,507,495
2013,549,550,494,505
2012,495,477,446,433
2011,524,484,481,435
2010,524,494,474,441
2009,532,501,489,459


In [89]:
sorted_obj = df_obj.sort_index(ascending = False)
sorted_obj

Unnamed: 0_level_0,一本分数线,一本分数线,二本分数线,二本分数线
Unnamed: 0_level_1,文科,理科,文科,理科
2018,576,532,488,432
2017,555,537,468,439
2016,583,548,532,494
2015,579,548,527,495
2014,565,543,507,495
2013,549,550,494,505
2012,495,477,446,433
2011,524,484,481,435
2010,524,494,474,441
2009,532,501,489,459


In [90]:
sorted_obj.max()

一本分数线  文科    583
       理科    550
二本分数线  文科    532
       理科    505
dtype: int64

In [91]:
sorted_obj.min()

一本分数线  文科    495
       理科    477
二本分数线  文科    446
       理科    432
dtype: int64

In [92]:
result1 = sorted_obj["一本分数线", "文科"].ptp()
result1

88

In [93]:
result2 = sorted_obj["一本分数线", "理科"].ptp()  
result2

73

In [94]:
result3 = sorted_obj["二本分数线", "文科"].ptp()  
result3

86

In [95]:
result4 = sorted_obj["二本分数线", "理科"].ptp()  
result4

73

In [96]:
ser_obj1 = sorted_obj['一本分数线','文科']
ser_obj1[2018] - ser_obj1[2017]

21

In [97]:
ser_obj2 = sorted_obj['一本分数线','理科']
ser_obj2[2018] - ser_obj2[2017]

-5

In [98]:
ser_obj3 = sorted_obj['二本分数线','文科']
ser_obj3[2018] - ser_obj3[2017]

20

In [99]:
ser_obj4 = sorted_obj['二本分数线','理科']
ser_obj4[2018] - ser_obj4[2017]

-7

In [100]:
sorted_obj.describe()

Unnamed: 0_level_0,一本分数线,一本分数线,二本分数线,二本分数线
Unnamed: 0_level_1,文科,理科,文科,理科
count,13.0,13.0,13.0,13.0
mean,541.615385,521.153846,487.923077,464.384615
std,28.15001,25.986683,23.567144,27.274953
min,495.0,477.0,446.0,432.0
25%,524.0,501.0,474.0,439.0
50%,532.0,531.0,488.0,459.0
75%,565.0,543.0,494.0,494.0
max,583.0,550.0,532.0,505.0
