In [1]:
import pandas as pd

In [2]:
pd.__version__

'0.23.4'

In [3]:
ser = pd.Series(['张三', '李四', '王五'])
ser

0    张三
1    李四
2    王五
dtype: object

In [4]:
list(range(1, 4))

[1, 2, 3]

In [5]:
ser = pd.Series(['张三', '李四', '王五'], index = list(range(1, 4)))
ser

1    张三
2    李四
3    王五
dtype: object

In [6]:
ser[2]

'李四'

In [7]:
ser[2] = 'Jack'

In [8]:
ser

1      张三
2    Jack
3      王五
dtype: object

In [9]:
ind = [1, 3]
ser[ind]

1    张三
3    王五
dtype: object

In [10]:
ser.values

array(['张三', 'Jack', '王五'], dtype=object)

In [11]:
type(ser.values)

numpy.ndarray

In [12]:
ser2 = pd.Series([18, 19, 17], index = range(1, 4))
ser2

1    18
2    19
3    17
dtype: int64

In [13]:
ser2 + 1

1    19
2    20
3    18
dtype: int64

In [14]:
ser2[ser2%2==0]

1    18
dtype: int64

In [15]:
data = {'beijing': 9240, 'shanghai': 8960, 'guangzhou': 7400}
ser3 = pd.Series(data)
ser3

beijing      9240
shanghai     8960
guangzhou    7400
dtype: int64

In [16]:
ser3['beijing']

9240

In [17]:
'beijing' in ser3

True

In [18]:
ser3

beijing      9240
shanghai     8960
guangzhou    7400
dtype: int64

In [19]:
ser3.to_dict()

{'beijing': 9240, 'shanghai': 8960, 'guangzhou': 7400}

In [20]:
ser3.tolist()

[9240, 8960, 7400]

In [21]:
ser3.to_json()

'{"beijing":9240,"shanghai":8960,"guangzhou":7400}'

In [22]:
ser3.to_frame()

Unnamed: 0,0
beijing,9240
shanghai,8960
guangzhou,7400


## DataFrame

In [23]:
import numpy as np

In [24]:
data = np.arange(100, 109).reshape(3, -1)
data

array([[100, 101, 102],
       [103, 104, 105],
       [106, 107, 108]])

In [25]:
df = pd.DataFrame(data)
df

Unnamed: 0,0,1,2
0,100,101,102
1,103,104,105
2,106,107,108


In [26]:
data = {
    'name': ['jack', 'mary', 'lily'],
    'age': [19, 19, 17],
    'height': [1.68, 1.37, 1.62]
}
df = pd.DataFrame(data)
df

Unnamed: 0,name,age,height
0,jack,19,1.68
1,mary,19,1.37
2,lily,17,1.62


In [27]:
df.columns

Index(['name', 'age', 'height'], dtype='object')

In [28]:
df.columns = ['userName', 'age', 'height']

In [29]:
df

Unnamed: 0,userName,age,height
0,jack,19,1.68
1,mary,19,1.37
2,lily,17,1.62


In [30]:


df = pd.DataFrame(data, columns = ['userName', 'age', 'height', 'email'])
df


Unnamed: 0,userName,age,height,email
0,,19,1.68,
1,,19,1.37,
2,,17,1.62,


In [31]:
df.index

RangeIndex(start=0, stop=3, step=1)

In [32]:
df = pd.DataFrame(data, columns = ['userName', 'age', 'height', 'email'], index = range(1, 4))
df

Unnamed: 0,userName,age,height,email
1,,19,1.68,
2,,19,1.37,
3,,17,1.62,


In [33]:
df = pd.DataFrame(data, columns = ['name', 'age', 'height', 'email'], index = range(1, 4))
df

Unnamed: 0,name,age,height,email
1,jack,19,1.68,
2,mary,19,1.37,
3,lily,17,1.62,


## pandas中数据选取操作

In [34]:
data = {
    'name': ['张三', '李四', '王五', '赵六'],
    'age': [19, 19, 17, 20],
    'height': [1.68, 1.37, 1.62, 1.55]
}
df = pd.DataFrame(data, columns = ['name', 'age', 'height'])
df

Unnamed: 0,name,age,height
0,张三,19,1.68
1,李四,19,1.37
2,王五,17,1.62
3,赵六,20,1.55


In [35]:
df['name']

0    张三
1    李四
2    王五
3    赵六
Name: name, dtype: object

In [36]:
df.age

0    19
1    19
2    17
3    20
Name: age, dtype: int64

In [37]:
df[['name']]

Unnamed: 0,name
0,张三
1,李四
2,王五
3,赵六


In [38]:
df[['name', 'age']]

Unnamed: 0,name,age
0,张三,19
1,李四,19
2,王五,17
3,赵六,20


In [39]:
names = df['name']
names[0] = 'jack'


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [40]:
df

Unnamed: 0,name,age,height
0,jack,19,1.68
1,李四,19,1.37
2,王五,17,1.62
3,赵六,20,1.55


In [41]:
names = df.name.copy()
names

0    jack
1      李四
2      王五
3      赵六
Name: name, dtype: object

In [42]:
names[0] = '周八'
names

0    周八
1    李四
2    王五
3    赵六
Name: name, dtype: object

In [43]:
df

Unnamed: 0,name,age,height
0,jack,19,1.68
1,李四,19,1.37
2,王五,17,1.62
3,赵六,20,1.55


In [44]:
df.columns

Index(['name', 'age', 'height'], dtype='object')

In [45]:
df.columns[1:3]

Index(['age', 'height'], dtype='object')

In [46]:
df[df.columns[1:3]]

Unnamed: 0,age,height
0,19,1.68
1,19,1.37
2,17,1.62
3,20,1.55


In [47]:
df

Unnamed: 0,name,age,height
0,jack,19,1.68
1,李四,19,1.37
2,王五,17,1.62
3,赵六,20,1.55


In [48]:
import datetime

In [49]:
df['year'] =  datetime.datetime.now().year - df.age
df

Unnamed: 0,name,age,height,year
0,jack,19,1.68,2000
1,李四,19,1.37,2000
2,王五,17,1.62,2002
3,赵六,20,1.55,1999


In [50]:
df.drop('year', axis = 1)

Unnamed: 0,name,age,height
0,jack,19,1.68
1,李四,19,1.37
2,王五,17,1.62
3,赵六,20,1.55


In [51]:
df.drop(['year','height'], axis = 1)

Unnamed: 0,name,age
0,jack,19
1,李四,19
2,王五,17
3,赵六,20


In [52]:
df

Unnamed: 0,name,age,height,year
0,jack,19,1.68,2000
1,李四,19,1.37,2000
2,王五,17,1.62,2002
3,赵六,20,1.55,1999


In [53]:
df.drop(df.columns[1::2], axis = 1)

Unnamed: 0,name,height
0,jack,1.68
1,李四,1.37
2,王五,1.62
3,赵六,1.55


In [54]:
df

Unnamed: 0,name,age,height,year
0,jack,19,1.68,2000
1,李四,19,1.37,2000
2,王五,17,1.62,2002
3,赵六,20,1.55,1999


In [55]:
df.loc[1]

name        李四
age         19
height    1.37
year      2000
Name: 1, dtype: object

In [56]:
df.loc[[1]]

Unnamed: 0,name,age,height,year
1,李四,19,1.37,2000


In [57]:
df.loc[[1, 3]]

Unnamed: 0,name,age,height,year
1,李四,19,1.37,2000
3,赵六,20,1.55,1999


In [58]:
# 我不知道索引值， 最后两个数据的 name和age
df.index 

RangeIndex(start=0, stop=4, step=1)

In [59]:
df.index[-2:]

RangeIndex(start=2, stop=4, step=1)

In [60]:
df.loc[df.index[-2:]]

Unnamed: 0,name,age,height,year
2,王五,17,1.62,2002
3,赵六,20,1.55,1999


In [61]:
df.loc[df.index[-2:], ['name', 'age']]

Unnamed: 0,name,age
2,王五,17
3,赵六,20


In [62]:
df

Unnamed: 0,name,age,height,year
0,jack,19,1.68,2000
1,李四,19,1.37,2000
2,王五,17,1.62,2002
3,赵六,20,1.55,1999


In [63]:
df.shape

(4, 4)

In [64]:
# 在最后一行插入数据
df.loc[df.shape[0]] = {'age': 21, 'name': '吴九', 'height': 1.66, 'year': 0}
df

Unnamed: 0,name,age,height,year
0,jack,19,1.68,2000
1,李四,19,1.37,2000
2,王五,17,1.62,2002
3,赵六,20,1.55,1999
4,吴九,21,1.66,0


In [65]:
df2 = df.drop(2)
df2

Unnamed: 0,name,age,height,year
0,jack,19,1.68,2000
1,李四,19,1.37,2000
3,赵六,20,1.55,1999
4,吴九,21,1.66,0


In [66]:
df2.index = range(df2.shape[0])

In [67]:
df2

Unnamed: 0,name,age,height,year
0,jack,19,1.68,2000
1,李四,19,1.37,2000
2,赵六,20,1.55,1999
3,吴九,21,1.66,0


In [68]:
df

Unnamed: 0,name,age,height,year
0,jack,19,1.68,2000
1,李四,19,1.37,2000
2,王五,17,1.62,2002
3,赵六,20,1.55,1999
4,吴九,21,1.66,0


In [69]:
df2 = df.drop(2)
df2

Unnamed: 0,name,age,height,year
0,jack,19,1.68,2000
1,李四,19,1.37,2000
3,赵六,20,1.55,1999
4,吴九,21,1.66,0


In [70]:
df2.loc[2]

KeyError: 'the label [2] is not in the [index]'

In [71]:
df2.iloc[2]  # 内存中存储的物理顺序

name        赵六
age         20
height    1.55
year      1999
Name: 3, dtype: object

In [72]:
df2

Unnamed: 0,name,age,height,year
0,jack,19,1.68,2000
1,李四,19,1.37,2000
3,赵六,20,1.55,1999
4,吴九,21,1.66,0


In [73]:
df2.index = list('ABCD')
df2

Unnamed: 0,name,age,height,year
A,jack,19,1.68,2000
B,李四,19,1.37,2000
C,赵六,20,1.55,1999
D,吴九,21,1.66,0


In [74]:
df2.iloc[1:3]

Unnamed: 0,name,age,height,year
B,李四,19,1.37,2000
C,赵六,20,1.55,1999


In [75]:
df2

Unnamed: 0,name,age,height,year
A,jack,19,1.68,2000
B,李四,19,1.37,2000
C,赵六,20,1.55,1999
D,吴九,21,1.66,0


In [76]:
df2.iat[1, 1]

19

In [77]:
df2.iat[1, 1] = 66
df2

Unnamed: 0,name,age,height,year
A,jack,19,1.68,2000
B,李四,66,1.37,2000
C,赵六,20,1.55,1999
D,吴九,21,1.66,0


In [78]:
df2.iat[1, 2]

1.37

In [79]:
df2.iat[1, 2] = 88 

In [80]:
df2

Unnamed: 0,name,age,height,year
A,jack,19,1.68,2000
B,李四,66,88.0,2000
C,赵六,20,1.55,1999
D,吴九,21,1.66,0


## 使用比较运算筛选数据


In [81]:
df2['height'] >= 1.65

A     True
B     True
C    False
D     True
Name: height, dtype: bool

In [82]:
df2[df2['height'] >= 1.65]

Unnamed: 0,name,age,height,year
A,jack,19,1.68,2000
B,李四,66,88.0,2000
D,吴九,21,1.66,0


In [83]:
df2[(df2['height'] >= 1.65) & (df2['age'] <= 20)]

Unnamed: 0,name,age,height,year
A,jack,19,1.68,2000


In [84]:
df

Unnamed: 0,name,age,height,year
0,jack,19,1.68,2000
1,李四,19,1.37,2000
2,王五,17,1.62,2002
3,赵六,20,1.55,1999
4,吴九,21,1.66,0


In [85]:
df.query('height >= 1.65 and age <= 20')

Unnamed: 0,name,age,height,year
0,jack,19,1.68,2000


In [86]:
df.query('height >= 1.65 and age <= 20 or name == "吴九"')

Unnamed: 0,name,age,height,year
0,jack,19,1.68,2000
4,吴九,21,1.66,0


In [87]:
age = 20
df.query('age < @age')

Unnamed: 0,name,age,height,year
0,jack,19,1.68,2000
1,李四,19,1.37,2000
2,王五,17,1.62,2002


In [88]:
df['age'].isin([18, 19])

0     True
1     True
2    False
3    False
4    False
Name: age, dtype: bool

In [89]:
df[df['age'].isin([17, 19])]

Unnamed: 0,name,age,height,year
0,jack,19,1.68,2000
1,李四,19,1.37,2000
2,王五,17,1.62,2002


In [90]:
df.T

Unnamed: 0,0,1,2,3,4
name,jack,李四,王五,赵六,吴九
age,19,19,17,20,21
height,1.68,1.37,1.62,1.55,1.66
year,2000,2000,2002,1999,0


## pandas导入数据


### txt

In [91]:
pd.read_table('./pandas加载数据/01.txt')

Unnamed: 0,email
0,jack@example.com
1,mary@example.com
2,lily@example.com
3,tom@example.com


In [92]:
pd.read_table('./pandas加载数据/02.txt') # 列分割 通过tab按键

Unnamed: 0,name,age,email
0,jack,18,jack@example.com
1,mary,19,mary@example.com
2,lily,17,lily@example.com
3,tom,17,tom@example.com
4,joe,20,joe@example.com


In [93]:
pd.read_table('./pandas加载数据/03.txt', sep = ':', header = None)

Unnamed: 0,0,1,2,3,4,5,6
0,root,x,0,0,root,/root,/bin/bash
1,daemon,x,1,1,daemon,/usr/sbin,/usr/sbin/nologin
2,bin,x,2,2,bin,/bin,/usr/sbin/nologin
3,sys,x,3,3,sys,/dev,/usr/sbin/nologin
4,sync,x,4,65534,sync,/bin,/bin/sync
5,games,x,5,60,games,/usr/games,/usr/sbin/nologin
6,man,x,6,12,man,/var/cache/man,/usr/sbin/nologin
7,lp,x,7,7,lp,/var/spool/lpd,/usr/sbin/nologin
8,mail,x,8,8,mail,/var/mail,/usr/sbin/nologin
9,news,x,9,9,news,/var/spool/news,/usr/sbin/nologin


In [94]:
pd.read_table('./pandas加载数据/03.txt', sep = ':', header = None, names =['name', 'pwd', 'uid', 'gid', 'local', 'home', 'shell'])

Unnamed: 0,name,pwd,uid,gid,local,home,shell
0,root,x,0,0,root,/root,/bin/bash
1,daemon,x,1,1,daemon,/usr/sbin,/usr/sbin/nologin
2,bin,x,2,2,bin,/bin,/usr/sbin/nologin
3,sys,x,3,3,sys,/dev,/usr/sbin/nologin
4,sync,x,4,65534,sync,/bin,/bin/sync
5,games,x,5,60,games,/usr/games,/usr/sbin/nologin
6,man,x,6,12,man,/var/cache/man,/usr/sbin/nologin
7,lp,x,7,7,lp,/var/spool/lpd,/usr/sbin/nologin
8,mail,x,8,8,mail,/var/mail,/usr/sbin/nologin
9,news,x,9,9,news,/var/spool/news,/usr/sbin/nologin


## CSV格式

In [95]:
pd.read_csv('./pandas加载数据/04.csv')

Unnamed: 0,书名,单价
0,我在未来等你,29.9
1,原则,63.0
2,半小时漫画世界史,28.9


## excel 


In [96]:
# 安装pip install xlrd
pd.read_excel('./pandas加载数据/05.xlsx')

Unnamed: 0,电影名称,上映时间
0,神秘巨星,2018/1/19
1,移动迷宫3,2018/1/26
2,阿凡达2,2018/12/25


## html

In [97]:
# 安装 pip install lxml
tables = pd.read_html('./pandas加载数据/06.html', header = 0)

In [98]:
tables[0]

Unnamed: 0,排名,城市,工资
0,1,北京,9240
1,2,上海,8962
2,3,深圳,8315
3,4,广州,7409
4,1,杭州,7330


In [99]:
tables[1]

Unnamed: 0,aa,bb,cc
0,11,22,33
1,44,55,66


In [100]:
tables = pd.read_html('./pandas加载数据/06.html', header = 0, attrs = {'class': 'mydata'})

In [101]:
tables

[   排名  城市    工资
 0   1  北京  9240
 1   2  上海  8962
 2   3  深圳  8315
 3   4  广州  7409
 4   1  杭州  7330]

In [102]:
len(tables)

1

## mysql

In [103]:
# pip install pymysql
import pymysql
con = pymysql.connect(host='localhost', user = 'root', password = '12345678', database = 'doubandb', charset = 'utf8', use_unicode = True)

# sql命令
sql_cmd = 'select * from books2'

df = pd.read_sql(sql_cmd, con)



In [104]:
df.head(2)

Unnamed: 0,ID,title,author,press,original,translator,imprint,pages,price,binding,series,isbn,score,number
0,1000019,政治无意识,弗雷德里克.詹姆逊,中国社会科学出版社,,王逢振/陈永国,1999-8,297,35.00元,平装,知识分子图书馆,9787500425564,7.5,107
1,1000034,生死遗言,伊能静,现代出版社,,,2002-10,203,18.00元,平装,,9787800288494,7.4,2377


## sort

In [105]:
st = pd.Series(list('CAD'), index = [3, 1, 2])
st

3    C
1    A
2    D
dtype: object

In [106]:
st.sort_index()

1    A
2    D
3    C
dtype: object

In [107]:
st.sort_values()

1    A
3    C
2    D
dtype: object

In [108]:
st.sort_index(ascending=False)

3    C
2    D
1    A
dtype: object

In [109]:
st.sort_values(ascending=False)

2    D
3    C
1    A
dtype: object

In [110]:
arr = [[9, 4, 8],
       [4, 6, 5],
       [4, 5, 3]]

In [111]:
df = pd.DataFrame(arr, index = [0, 2, 1], columns = list('cab'))
df


Unnamed: 0,c,a,b
0,9,4,8
2,4,6,5
1,4,5,3


In [112]:
df.sort_index()

Unnamed: 0,c,a,b
0,9,4,8
1,4,5,3
2,4,6,5


In [113]:
df.sort_index(axis = 1)

Unnamed: 0,a,b,c
0,4,8,9
2,6,5,4
1,5,3,4


In [114]:
# 对DataFrame 某一series排序
df.sort_values(by = 'c')

Unnamed: 0,c,a,b
2,4,6,5
1,4,5,3
0,9,4,8


In [115]:
df.sort_values(by = 'c', ascending = False)

Unnamed: 0,c,a,b
0,9,4,8
2,4,6,5
1,4,5,3


In [116]:
df.sort_values(by = ['c', 'a'])

Unnamed: 0,c,a,b
1,4,5,3
2,4,6,5
0,9,4,8


## rank

In [117]:
df

Unnamed: 0,c,a,b
0,9,4,8
2,4,6,5
1,4,5,3


In [118]:
df.rank()

Unnamed: 0,c,a,b
0,3.0,1.0,3.0
2,1.5,3.0,2.0
1,1.5,2.0,1.0


In [119]:
df.rank(method = 'first')  # average

Unnamed: 0,c,a,b
0,3.0,1.0,3.0
2,1.0,3.0,2.0
1,2.0,2.0,1.0


In [120]:
df.rank(method = 'max')

Unnamed: 0,c,a,b
0,3.0,1.0,3.0
2,2.0,3.0,2.0
1,2.0,2.0,1.0


In [121]:
df.rank(method = 'min')

Unnamed: 0,c,a,b
0,3.0,1.0,3.0
2,1.0,3.0,2.0
1,1.0,2.0,1.0


## merge

In [122]:
import numpy as np

In [123]:
df1 = pd.DataFrame({'stu_no': ['s1', 's2', 's1', 's3', 's1', 's1', 's2', 's4'],
                    'score': np.random.randint(50, 100, size = 8)
})

In [124]:
df1

Unnamed: 0,stu_no,score
0,s1,61
1,s2,54
2,s1,58
3,s3,87
4,s1,77
5,s1,62
6,s2,70
7,s4,69


In [125]:
df2 = pd.DataFrame({'stu_no': ['s1', 's2', 's3', 's5'],
                    'name': ['张三', '李四', '王五', '赵六']
})

df2

Unnamed: 0,stu_no,name
0,s1,张三
1,s2,李四
2,s3,王五
3,s5,赵六


In [126]:
pd.merge(df1, df2, on = 'stu_no') # 属于inner join 连接方式


Unnamed: 0,stu_no,score,name
0,s1,61,张三
1,s1,58,张三
2,s1,77,张三
3,s1,62,张三
4,s2,54,李四
5,s2,70,李四
6,s3,87,王五


In [127]:
pd.merge(df1, df2, on = 'stu_no', how = 'left') # 属于left join 连接方式

Unnamed: 0,stu_no,score,name
0,s1,61,张三
1,s2,54,李四
2,s1,58,张三
3,s3,87,王五
4,s1,77,张三
5,s1,62,张三
6,s2,70,李四
7,s4,69,


In [128]:
pd.merge(df1, df2, on = 'stu_no', how = 'right') # 属于right join 连接方式

Unnamed: 0,stu_no,score,name
0,s1,61.0,张三
1,s1,58.0,张三
2,s1,77.0,张三
3,s1,62.0,张三
4,s2,54.0,李四
5,s2,70.0,李四
6,s3,87.0,王五
7,s5,,赵六


In [129]:
pd.merge(df1, df2, on = 'stu_no', how = 'outer') # 属于outer join 连接方式

Unnamed: 0,stu_no,score,name
0,s1,61.0,张三
1,s1,58.0,张三
2,s1,77.0,张三
3,s1,62.0,张三
4,s2,54.0,李四
5,s2,70.0,李四
6,s3,87.0,王五
7,s4,69.0,
8,s5,,赵六


## concat

In [130]:
df1 = pd.DataFrame(np.arange(1, 5).reshape(2, -1))
df1

Unnamed: 0,0,1
0,1,2
1,3,4


In [131]:
df2 = pd.DataFrame(np.zeros((2, 2)))
df2

Unnamed: 0,0,1
0,0.0,0.0
1,0.0,0.0


In [132]:
pd.concat([df1, df2])

Unnamed: 0,0,1
0,1.0,2.0
1,3.0,4.0
0,0.0,0.0
1,0.0,0.0


In [133]:
pd.concat([df1, df2], axis = 1)

Unnamed: 0,0,1,0.1,1.1
0,1,2,0.0,0.0
1,3,4,0.0,0.0


##  异常值的分析

In [134]:
data = [[1, None],
        [4,5],
        [None,None],
        [8,9],
        [3,4]]
df = pd.DataFrame(data, columns=['a','b'])
df

Unnamed: 0,a,b
0,1.0,
1,4.0,5.0
2,,
3,8.0,9.0
4,3.0,4.0


In [135]:
df.head(2)

Unnamed: 0,a,b
0,1.0,
1,4.0,5.0


In [136]:
df.tail(3)

Unnamed: 0,a,b
2,,
3,8.0,9.0
4,3.0,4.0


In [137]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 2 columns):
a    4 non-null float64
b    3 non-null float64
dtypes: float64(2)
memory usage: 160.0 bytes


In [138]:
df.describe()

Unnamed: 0,a,b
count,4.0,3.0
mean,4.0,6.0
std,2.94392,2.645751
min,1.0,4.0
25%,2.5,4.5
50%,3.5,5.0
75%,5.0,7.0
max,8.0,9.0


In [139]:
df.count()

a    4
b    3
dtype: int64

In [140]:
df.mean()

a    4.0
b    6.0
dtype: float64

In [141]:
df.sum()

a    16.0
b    18.0
dtype: float64

In [142]:
df.sum(axis = 1)

0     1.0
1     9.0
2     0.0
3    17.0
4     7.0
dtype: float64

In [143]:
df.a

0    1.0
1    4.0
2    NaN
3    8.0
4    3.0
Name: a, dtype: float64

In [144]:
df.a.sum()

16.0

In [145]:
df.std()  # 标准差

a    2.943920
b    2.645751
dtype: float64

In [146]:
df.var()   # 方差     方差开根号就是标准差

a    8.666667
b    7.000000
dtype: float64

In [147]:
# A、B两组各有6位学生参加同一次语文测验，A组的分数为95、85、75、65、55、45，
# B组的分数为73、72、71、69、68、67。这两组的平均数都是70，但A组的标准差为17.078分，
# B组的标准差为2.16分，说明A组学生之间的差距要比B组学生之间的差距大得多

# 计算方差
my_var = np.mean((df.a - df.a.mean()) ** 2)

In [148]:
my_var

6.5

In [149]:
df.max(axis = 1)

0    1.0
1    5.0
2    NaN
3    9.0
4    4.0
dtype: float64