# Python数据分析（二）：基本数据结构

本讲主要介绍Python数据分析相关库提供的几个主要基本数据结构：numpy.array、pandas.Series、pandas.DataFrame，并简介基于这几个数据结构的编程基本思路。

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## numpy.array

numpy.array是numpy的基本数据结构。简单地来说，这是一个类似于MATLAB数组的n维数组，在narray的基础上，numpy提供了一系列基于narray数组的处理和运算工具。

### 创建narray

In [2]:
# 用嵌套列表创建数组
mylist = [[1,2,3],[4,5,6],[7,8,9]]
myarray = np.array(mylist)
myarray

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [3]:
# 特殊数组
np.zeros([4,5])

array([[ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.]])

In [4]:
np.ones([2,4,3])

array([[[ 1.,  1.,  1.],
        [ 1.,  1.,  1.],
        [ 1.,  1.,  1.],
        [ 1.,  1.,  1.]],

       [[ 1.,  1.,  1.],
        [ 1.,  1.,  1.],
        [ 1.,  1.,  1.],
        [ 1.,  1.,  1.]]])

In [6]:
np.empty(5)  # 只分配空间，不填充值

array([ 0.,  0.,  0.,  0.,  0.])

In [7]:
np.eye(5)

array([[ 1.,  0.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  0.],
       [ 0.,  0.,  0.,  0.,  1.]])

In [8]:
np.arange(14)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13])

### narray数据类型

In [9]:
myarray.dtype

dtype('int64')

In [10]:
myarray[0,0].dtype

dtype('int64')

In [11]:
type(myarray[0,0])

numpy.int64

In [12]:
myarray2 = np.array(mylist, dtype=np.float64)

In [13]:
myarray2.dtype

dtype('float64')

In [14]:
myarray3 = np.array(mylist, dtype=np.object) # np.object: Python对象类型

In [15]:
myarray3.dtype

dtype('O')

In [16]:
type(myarray3[0,0])

int

### 索引和切片

In [17]:
# 索引
myarray[0]

array([1, 2, 3])

In [19]:
myarray[0,1]

2

In [18]:
myarray[0][1]

2

In [20]:
# 切片
myarray[1:2]

array([[4, 5, 6]])

In [21]:
myarray[1:2,1:2]

array([[5]])

In [22]:
myarray[:,1:2]

array([[2],
       [5],
       [8]])

In [23]:
# 布尔索引
myarray[myarray>5]

array([6, 7, 8, 9])

In [24]:
# 花式索引
myarray[:,[0,0,1,0,2]]

array([[1, 1, 2, 1, 3],
       [4, 4, 5, 4, 6],
       [7, 7, 8, 7, 9]])

### 基本运算和通用函数

In [25]:
# 数组与标量
myarray*3

array([[ 3,  6,  9],
       [12, 15, 18],
       [21, 24, 27]])

In [27]:
myarray/3.

array([[ 0.33333333,  0.66666667,  1.        ],
       [ 1.33333333,  1.66666667,  2.        ],
       [ 2.33333333,  2.66666667,  3.        ]])

In [28]:
3./myarray

array([[ 3.        ,  1.5       ,  1.        ],
       [ 0.75      ,  0.6       ,  0.5       ],
       [ 0.42857143,  0.375     ,  0.33333333]])

In [29]:
myarray+3

array([[ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [30]:
myarray-3

array([[-2, -1,  0],
       [ 1,  2,  3],
       [ 4,  5,  6]])

In [31]:
myarray**2

array([[ 1,  4,  9],
       [16, 25, 36],
       [49, 64, 81]])

In [32]:
# 数组与数组
myarray+myarray

array([[ 2,  4,  6],
       [ 8, 10, 12],
       [14, 16, 18]])

In [33]:
myarray*myarray

array([[ 1,  4,  9],
       [16, 25, 36],
       [49, 64, 81]])

In [34]:
myarray**myarray

array([[        1,         4,        27],
       [      256,      3125,     46656],
       [   823543,  16777216, 387420489]])

In [35]:
# 通用函数
np.mean(myarray)

5.0

In [36]:
# 等价的实例方法
myarray.mean()

5.0

常用的通用函数：
- abs
- sqrt
- exp
- log
- sign

### 排序

In [37]:
myarray_new = np.array([[4,8],[7,3]])
myarray_new

array([[4, 8],
       [7, 3]])

In [38]:
myarray_new.sort()
myarray_new

array([[4, 8],
       [3, 7]])

### 随机数

In [39]:
np.random.normal(size=(4,5))

array([[-0.58588278,  0.25429223,  0.67943064,  1.72624058, -0.87256767],
       [ 0.06976233, -0.43575416, -0.47660296,  1.42710086, -0.03891215],
       [ 0.57106525, -1.5387785 , -0.40382786, -0.62195885, -0.37355521],
       [ 1.78452011,  0.64323167,  1.50397685, -0.79359365, -1.05193023]])

## pandas.Series

### 创建Series

In [41]:
# 列表/数组
series = pd.Series([4,5,2,5,5],index=['a','b','c','d','e'])
series

a    4
b    5
c    2
d    5
e    5
dtype: int64

In [42]:
# 标量
series = pd.Series(4,index=['a','b','c','d','e'])
series

a    4
b    4
c    4
d    4
e    4
dtype: int64

In [43]:
# 字典
series = pd.Series({'a':4,'b':5,'c':2,'d':5,'e':5}, name='name')
series

a    4
b    5
c    2
d    5
e    5
Name: name, dtype: int64

### 基本属性

In [44]:
# index
series.index

Index([u'a', u'b', u'c', u'd', u'e'], dtype='object')

In [45]:
# 修改index
series.index = range(5)
series

0    4
1    5
2    2
3    5
4    5
Name: name, dtype: int64

In [46]:
# 另一种修改index的方法
series.rename(lambda x:x*2)

0    4
2    5
4    2
6    5
8    5
Name: name, dtype: int64

In [48]:
# index name
series.index.name = 'index'
series

index
0    4
1    5
2    2
3    5
4    5
Name: name, dtype: int64

In [49]:
# name
series.name = 'value'
series

index
0    4
1    5
2    2
3    5
4    5
Name: value, dtype: int64

In [50]:
# 另一种修改列名的方法
series.rename('myvalue')

index
0    4
1    5
2    2
3    5
4    5
Name: myvalue, dtype: int64

### 索引

In [53]:
# 特殊的一维narray
series = pd.Series({'a':4,'b':5,'c':2,'d':5,'e':5}, name='name')
series[2]

2

In [54]:
# 有序字典
series['c']

2

### 基本运算

In [55]:
# 描述
series.describe()

count    5.00000
mean     4.20000
std      1.30384
min      2.00000
25%      4.00000
50%      5.00000
75%      5.00000
max      5.00000
Name: name, dtype: float64

In [56]:
# 同一维narray
series.sum()

21

In [57]:
np.exp(series)

a     54.598150
b    148.413159
c      7.389056
d    148.413159
e    148.413159
Name: name, dtype: float64

### 基本函数

In [58]:
# 排序
# 升序
series.sort_values()

c    2
a    4
b    5
d    5
e    5
Name: name, dtype: int64

In [59]:
# 降序
series.sort_values(ascending=False)

e    5
d    5
b    5
a    4
c    2
Name: name, dtype: int64

In [60]:
# 头
series.head(2)

a    4
b    5
Name: name, dtype: int64

In [61]:
# 尾
series.tail(2)

d    5
e    5
Name: name, dtype: int64

In [62]:
# sort+head
series.nlargest(2)

b    5
d    5
Name: name, dtype: int64

In [63]:
# sort+tail
series.nsmallest(3)

c    2
a    4
b    5
Name: name, dtype: int64

In [64]:
# apply方法
series.apply(lambda x:x+4)

a    8
b    9
c    6
d    9
e    9
Name: name, dtype: int64

## pandas.DataFrame

DataFrame是Pandas的基本数据结构之一，是一个基于narray的特殊的二维数组，类似于R的data.frame、Excel的工作表或者SQL的表，DataFrame提供了一系列数据操作的方法和属性。

### 创建DataFrame

In [65]:
# dict of dict
data = {
    'one':{'a':1,'b':3},
    'two':{'a':2,'b':4}
}
pd.DataFrame(data)

Unnamed: 0,one,two
a,1,2
b,3,4


In [66]:
# dict of list
data = {
    'one':[1,3],
    'two':[2,4],
}
pd.DataFrame(data, index=['a','b'])

Unnamed: 0,one,two
a,1,2
b,3,4


In [67]:
# record list/array
data = [[1,2],[3,4]]
pd.DataFrame(data, index=['a','b'], columns=['one','two'])

Unnamed: 0,one,two
a,1,2
b,3,4


In [68]:
# list of dict
data = [{'one':1,'two':2},{'one':3,'two':4}]
pd.DataFrame(data, index=['a','b'])

Unnamed: 0,one,two
a,1,2
b,3,4


小结：dict在外，外层是列，里层每个单位是一列数据；list/array在外，外层是行，里层每个单位是一行数据

### 基本属性

In [69]:
df = pd.read_csv('data/sample1.csv')

In [70]:
# index
df.index

RangeIndex(start=0, stop=60, step=1)

In [71]:
# 设置index
df = df.set_index('goodsID')
df.head()

Unnamed: 0_level_0,goodsName,goodsURL,shopName,shopURL,monthly_sales,price,price_ave,comments
goodsID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
14985545685,【天猫超市】舒可曼纯正白砂糖400g碳化工艺细白糖干爽带盖易保存,http://detail.tmall.com/item.htm?id=1498554568...,天猫超市,http://store.taobao.com/search.htm?user_number...,42000,6.6,6.19元/500g,38000
44466463444,【巧厨烘焙 】展艺精制白砂糖 幼砂糖西点细砂糖 家用调味品 400g,http://detail.tmall.com/item.htm?id=4446646344...,巧厨食品专营店,http://store.taobao.com/search.htm?user_number...,25000,7.5,9.38元/500g,26000
15918515574,【天猫超市】舒可曼纯正白砂糖800g大包装碳化糖细白糖细砂幼砂糖,http://detail.tmall.com/item.htm?id=1591851557...,天猫超市,http://store.taobao.com/search.htm?user_number...,17000,12.5,7.82元/500g,11000
39863630205,【巧厨烘焙】太古糖白砂糖 细糖 细砂糖 纯正白砂糖 原装454克,http://detail.tmall.com/item.htm?id=3986363020...,巧厨食品专营店,http://store.taobao.com/search.htm?user_number...,9990,9.8,10.79元/500g,9867
43331131373,【天猫超市】舒可曼糖霜 250g特细糖粉 烘焙原料面包饼干蛋糕装饰,http://detail.tmall.com/item.htm?id=4333113137...,天猫超市,http://store.taobao.com/search.htm?user_number...,5966,12.0,24.00元/500g,3854


In [72]:
df.reset_index().head()

Unnamed: 0,goodsID,goodsName,goodsURL,shopName,shopURL,monthly_sales,price,price_ave,comments
0,14985545685,【天猫超市】舒可曼纯正白砂糖400g碳化工艺细白糖干爽带盖易保存,http://detail.tmall.com/item.htm?id=1498554568...,天猫超市,http://store.taobao.com/search.htm?user_number...,42000,6.6,6.19元/500g,38000
1,44466463444,【巧厨烘焙 】展艺精制白砂糖 幼砂糖西点细砂糖 家用调味品 400g,http://detail.tmall.com/item.htm?id=4446646344...,巧厨食品专营店,http://store.taobao.com/search.htm?user_number...,25000,7.5,9.38元/500g,26000
2,15918515574,【天猫超市】舒可曼纯正白砂糖800g大包装碳化糖细白糖细砂幼砂糖,http://detail.tmall.com/item.htm?id=1591851557...,天猫超市,http://store.taobao.com/search.htm?user_number...,17000,12.5,7.82元/500g,11000
3,39863630205,【巧厨烘焙】太古糖白砂糖 细糖 细砂糖 纯正白砂糖 原装454克,http://detail.tmall.com/item.htm?id=3986363020...,巧厨食品专营店,http://store.taobao.com/search.htm?user_number...,9990,9.8,10.79元/500g,9867
4,43331131373,【天猫超市】舒可曼糖霜 250g特细糖粉 烘焙原料面包饼干蛋糕装饰,http://detail.tmall.com/item.htm?id=4333113137...,天猫超市,http://store.taobao.com/search.htm?user_number...,5966,12.0,24.00元/500g,3854


In [73]:
# columns
df.columns

Index([u'goodsName', u'goodsURL', u'shopName', u'shopURL', u'monthly_sales',
       u'price', u'price_ave', u'comments'],
      dtype='object')

In [74]:
# 修改columns
df2 = pd.read_csv('data/sample2.csv')
df2.columns = range(len(df2.columns))
df2.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,540395660668,【天猫超市】红棉食糖方糖454g/盒*2咖啡伴侣调糖白糖每盒120粒,http://detail.tmall.com/item.htm?id=5403956606...,天猫超市,http://store.taobao.com/search.htm?user_number...,250,19.2,13.77元/500g,39
1,520304659943,包邮买3送1 安琪百钻优级绵白糖 食用白糖 绵砂糖 烘焙调味 400g,http://detail.tmall.com/item.htm?id=5203046599...,嘉然食品专营店,http://store.taobao.com/search.htm?user_number...,250,7.3,9.13元/500g,262
2,520555206122,安琪百钻精幼砂糖 烘焙细砂糖白砂糖 冲饮食用白糖烘培原料400g,http://detail.tmall.com/item.htm?id=5205552061...,安琪酵母旗舰店,http://store.taobao.com/search.htm?user_number...,232,9.5,11.88元/500g,443
3,528043879271,烘焙原料 大卫贝克优质细砂糖 白砂糖食糖西点饼干用 幼砂糖400g,http://detail.tmall.com/item.htm?id=5280438792...,购美食品专营店,http://store.taobao.com/search.htm?user_number...,227,6.8,8.50元/500g,134
4,40141612623,烘焙原料 太古糖白砂糖 细砂糖幼砂糖 蛋糕面包饼干用 原装454g,http://detail.tmall.com/item.htm?id=4014161262...,范美焙亲食品专营店,http://store.taobao.com/search.htm?user_number...,224,8.0,8.81元/500g,548


In [75]:
df.rename(columns = {'goodsName':'name'}).head()

Unnamed: 0_level_0,name,goodsURL,shopName,shopURL,monthly_sales,price,price_ave,comments
goodsID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
14985545685,【天猫超市】舒可曼纯正白砂糖400g碳化工艺细白糖干爽带盖易保存,http://detail.tmall.com/item.htm?id=1498554568...,天猫超市,http://store.taobao.com/search.htm?user_number...,42000,6.6,6.19元/500g,38000
44466463444,【巧厨烘焙 】展艺精制白砂糖 幼砂糖西点细砂糖 家用调味品 400g,http://detail.tmall.com/item.htm?id=4446646344...,巧厨食品专营店,http://store.taobao.com/search.htm?user_number...,25000,7.5,9.38元/500g,26000
15918515574,【天猫超市】舒可曼纯正白砂糖800g大包装碳化糖细白糖细砂幼砂糖,http://detail.tmall.com/item.htm?id=1591851557...,天猫超市,http://store.taobao.com/search.htm?user_number...,17000,12.5,7.82元/500g,11000
39863630205,【巧厨烘焙】太古糖白砂糖 细糖 细砂糖 纯正白砂糖 原装454克,http://detail.tmall.com/item.htm?id=3986363020...,巧厨食品专营店,http://store.taobao.com/search.htm?user_number...,9990,9.8,10.79元/500g,9867
43331131373,【天猫超市】舒可曼糖霜 250g特细糖粉 烘焙原料面包饼干蛋糕装饰,http://detail.tmall.com/item.htm?id=4333113137...,天猫超市,http://store.taobao.com/search.htm?user_number...,5966,12.0,24.00元/500g,3854


### 索引

In [76]:
# 列索引
df['price'].head()

goodsID
14985545685     6.6
44466463444     7.5
15918515574    12.5
39863630205     9.8
43331131373    12.0
Name: price, dtype: float64

In [77]:
df[['price','monthly_sales']].head()

Unnamed: 0_level_0,price,monthly_sales
goodsID,Unnamed: 1_level_1,Unnamed: 2_level_1
14985545685,6.6,42000
44466463444,7.5,25000
15918515574,12.5,17000
39863630205,9.8,9990
43331131373,12.0,5966


In [78]:
# 按编号的行索引
df.iloc[1]

goodsName                       【巧厨烘焙 】展艺精制白砂糖 幼砂糖西点细砂糖 家用调味品 400g
goodsURL         http://detail.tmall.com/item.htm?id=4446646344...
shopName                                                   巧厨食品专营店
shopURL          http://store.taobao.com/search.htm?user_number...
monthly_sales                                                25000
price                                                          7.5
price_ave                                               9.38元/500g
comments                                                     26000
Name: 44466463444, dtype: object

In [79]:
# 按index的行索引
df.loc[44466463444]

goodsName                       【巧厨烘焙 】展艺精制白砂糖 幼砂糖西点细砂糖 家用调味品 400g
goodsURL         http://detail.tmall.com/item.htm?id=4446646344...
shopName                                                   巧厨食品专营店
shopURL          http://store.taobao.com/search.htm?user_number...
monthly_sales                                                25000
price                                                          7.5
price_ave                                               9.38元/500g
comments                                                     26000
Name: 44466463444, dtype: object

In [80]:
# 元素索引
df.loc[44466463444,'price']

7.5

In [81]:
# 布尔索引
df[df['price']>25]

Unnamed: 0_level_0,goodsName,goodsURL,shopName,shopURL,monthly_sales,price,price_ave,comments
goodsID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
525290382664,【天猫超市】舒可曼白砂糖1000g 一级白糖砂糖调味品烹饪大包装,http://detail.tmall.com/item.htm?id=5252903826...,天猫超市,http://store.taobao.com/search.htm?user_number...,4489,25.8,12.90元/500g,2294
536613533939,阿鹏哥白砂糖8斤烘焙原料手工甘蔗中颗细砂糖袋4000g包邮白糖批发,http://detail.tmall.com/item.htm?id=5366135339...,特鹏食品专营店,http://store.taobao.com/search.htm?user_number...,963,39.2,4.90元/500g,863
537815368957,【天猫超市】SUGARMAN/舒可曼一级白砂糖1000g*2包装白糖烹饪调味,http://detail.tmall.com/item.htm?id=5378153689...,天猫超市,http://store.taobao.com/search.htm?user_number...,677,26.8,6.23元/500g,294
36005505455,蜜福堂木糖醇食品 木糖醇代糖 无糖食品烘焙蛋糕点饼干白砂糖500g,http://detail.tmall.com/item.htm?id=3600550545...,美联食品专营店,http://store.taobao.com/search.htm?user_number...,311,28.0,28.00元/500g,2805
41323698923,【天猫超市】川崎元贞糖500g无糖食品烘焙代糖替代木糖醇白砂糖,http://detail.tmall.com/item.htm?id=4132369892...,天猫超市,http://store.taobao.com/search.htm?user_number...,250,29.0,29.00元/500g,463


### 基本运算

In [82]:
df.describe()

Unnamed: 0,monthly_sales,price,comments
count,60.0,60.0,60.0
mean,2939.466667,12.935,3247.65
std,6491.08509,7.224437,5978.098254
min,250.0,5.8,83.0
25%,571.25,7.0,675.25
50%,1134.5,10.2,1561.5
75%,2141.25,16.25,2832.0
max,42000.0,39.2,38000.0


In [83]:
df.mean()

monthly_sales    2939.466667
price              12.935000
comments         3247.650000
dtype: float64

In [84]:
np.mean(df[['price','monthly_sales']])

price              12.935000
monthly_sales    2939.466667
dtype: float64

### 基本函数

In [85]:
# 排序
# 升序
df.sort_values(['price','monthly_sales']).head()

Unnamed: 0_level_0,goodsName,goodsURL,shopName,shopURL,monthly_sales,price,price_ave,comments
goodsID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
15602091891,【天猫超市】太古优级白砂糖 300g/袋 食用糖 甜蜜滋味 健康美丽,http://detail.tmall.com/item.htm?id=1560209189...,天猫超市,http://store.taobao.com/search.htm?user_number...,1545,5.8,9.67元/500g,1346
41029108722,【天猫超市】福临门 优质白砂糖 405g/袋 洁白纯净 中粮出品,http://detail.tmall.com/item.htm?id=4102910872...,天猫超市,http://store.taobao.com/search.htm?user_number...,791,5.9,18.55元/500g,2619
40136006635,圣家优质细砂糖做面包diy蛋糕幼砂糖白砂糖烘焙原料原装200g,http://detail.tmall.com/item.htm?id=4013600663...,宏河圣齐食品专营店,http://store.taobao.com/search.htm?user_number...,906,5.9,14.75元/500g,6963
523015597397,可可西厨 大卫贝克 优质细砂糖 白砂糖 烘焙用糖 400g,http://detail.tmall.com/item.htm?id=5230155973...,可可西厨食品专营店,http://store.taobao.com/search.htm?user_number...,1187,6.4,8.00元/500g,2611
37369322311,烘焙原料 舒可曼糖霜 糖粉细砂糖 蛋糕面包饼干装饰原装糖粉250g,http://detail.tmall.com/item.htm?id=3736932231...,芝选食品专营店,http://store.taobao.com/search.htm?user_number...,264,6.5,13.00元/500g,810


In [86]:
# 降序
df.sort_values(['price','monthly_sales'], ascending=False).head()

Unnamed: 0_level_0,goodsName,goodsURL,shopName,shopURL,monthly_sales,price,price_ave,comments
goodsID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
536613533939,阿鹏哥白砂糖8斤烘焙原料手工甘蔗中颗细砂糖袋4000g包邮白糖批发,http://detail.tmall.com/item.htm?id=5366135339...,特鹏食品专营店,http://store.taobao.com/search.htm?user_number...,963,39.2,4.90元/500g,863
41323698923,【天猫超市】川崎元贞糖500g无糖食品烘焙代糖替代木糖醇白砂糖,http://detail.tmall.com/item.htm?id=4132369892...,天猫超市,http://store.taobao.com/search.htm?user_number...,250,29.0,29.00元/500g,463
36005505455,蜜福堂木糖醇食品 木糖醇代糖 无糖食品烘焙蛋糕点饼干白砂糖500g,http://detail.tmall.com/item.htm?id=3600550545...,美联食品专营店,http://store.taobao.com/search.htm?user_number...,311,28.0,28.00元/500g,2805
537815368957,【天猫超市】SUGARMAN/舒可曼一级白砂糖1000g*2包装白糖烹饪调味,http://detail.tmall.com/item.htm?id=5378153689...,天猫超市,http://store.taobao.com/search.htm?user_number...,677,26.8,6.23元/500g,294
525290382664,【天猫超市】舒可曼白砂糖1000g 一级白糖砂糖调味品烹饪大包装,http://detail.tmall.com/item.htm?id=5252903826...,天猫超市,http://store.taobao.com/search.htm?user_number...,4489,25.8,12.90元/500g,2294


In [87]:
# 头
df.head(3)

Unnamed: 0_level_0,goodsName,goodsURL,shopName,shopURL,monthly_sales,price,price_ave,comments
goodsID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
14985545685,【天猫超市】舒可曼纯正白砂糖400g碳化工艺细白糖干爽带盖易保存,http://detail.tmall.com/item.htm?id=1498554568...,天猫超市,http://store.taobao.com/search.htm?user_number...,42000,6.6,6.19元/500g,38000
44466463444,【巧厨烘焙 】展艺精制白砂糖 幼砂糖西点细砂糖 家用调味品 400g,http://detail.tmall.com/item.htm?id=4446646344...,巧厨食品专营店,http://store.taobao.com/search.htm?user_number...,25000,7.5,9.38元/500g,26000
15918515574,【天猫超市】舒可曼纯正白砂糖800g大包装碳化糖细白糖细砂幼砂糖,http://detail.tmall.com/item.htm?id=1591851557...,天猫超市,http://store.taobao.com/search.htm?user_number...,17000,12.5,7.82元/500g,11000


In [88]:
# 尾
df.tail(3)

Unnamed: 0_level_0,goodsName,goodsURL,shopName,shopURL,monthly_sales,price,price_ave,comments
goodsID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
37369322311,烘焙原料 舒可曼糖霜 糖粉细砂糖 蛋糕面包饼干装饰原装糖粉250g,http://detail.tmall.com/item.htm?id=3736932231...,芝选食品专营店,http://store.taobao.com/search.htm?user_number...,264,6.5,13.00元/500g,810
39998369382,亿龙源优质一级白砂糖1250g 精制烘焙细糖白糖沙糖大袋装包邮批发,http://detail.tmall.com/item.htm?id=3999836938...,亿龙源旗舰店,http://store.taobao.com/search.htm?user_number...,257,18.8,7.52元/500g,654
41323698923,【天猫超市】川崎元贞糖500g无糖食品烘焙代糖替代木糖醇白砂糖,http://detail.tmall.com/item.htm?id=4132369892...,天猫超市,http://store.taobao.com/search.htm?user_number...,250,29.0,29.00元/500g,463


In [89]:
# sort+head
df.nlargest(5,'price')

Unnamed: 0_level_0,goodsName,goodsURL,shopName,shopURL,monthly_sales,price,price_ave,comments
goodsID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
536613533939,阿鹏哥白砂糖8斤烘焙原料手工甘蔗中颗细砂糖袋4000g包邮白糖批发,http://detail.tmall.com/item.htm?id=5366135339...,特鹏食品专营店,http://store.taobao.com/search.htm?user_number...,963,39.2,4.90元/500g,863
41323698923,【天猫超市】川崎元贞糖500g无糖食品烘焙代糖替代木糖醇白砂糖,http://detail.tmall.com/item.htm?id=4132369892...,天猫超市,http://store.taobao.com/search.htm?user_number...,250,29.0,29.00元/500g,463
36005505455,蜜福堂木糖醇食品 木糖醇代糖 无糖食品烘焙蛋糕点饼干白砂糖500g,http://detail.tmall.com/item.htm?id=3600550545...,美联食品专营店,http://store.taobao.com/search.htm?user_number...,311,28.0,28.00元/500g,2805
537815368957,【天猫超市】SUGARMAN/舒可曼一级白砂糖1000g*2包装白糖烹饪调味,http://detail.tmall.com/item.htm?id=5378153689...,天猫超市,http://store.taobao.com/search.htm?user_number...,677,26.8,6.23元/500g,294
525290382664,【天猫超市】舒可曼白砂糖1000g 一级白糖砂糖调味品烹饪大包装,http://detail.tmall.com/item.htm?id=5252903826...,天猫超市,http://store.taobao.com/search.htm?user_number...,4489,25.8,12.90元/500g,2294


In [90]:
# sort+tail
df.nsmallest(4,'comments')

Unnamed: 0_level_0,goodsName,goodsURL,shopName,shopURL,monthly_sales,price,price_ave,comments
goodsID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
540394892624,【天猫超市】红棉食糖一级白砂糖454g/袋*2碳化糖白糖粗砂糖,http://detail.tmall.com/item.htm?id=5403948926...,天猫超市,http://store.taobao.com/search.htm?user_number...,498,13.0,9.91元/500g,83
531223828362,【天猫超市】舒可曼优质白砂糖800g 细砂糖白糖面包饼干烘焙原料,http://detail.tmall.com/item.htm?id=5312238283...,天猫超市,http://store.taobao.com/search.htm?user_number...,473,24.9,15.57元/500g,188
531959709443,阳光美橙 冰糖 白冰糖400g单晶冰糖白砂糖煲汤炖粥泡茶辅料包邮,http://detail.tmall.com/item.htm?id=5319597094...,阳光美橙旗舰店,http://store.taobao.com/search.htm?user_number...,272,9.6,12.00元/500g,193
520249810226,烘焙原料 taikoo太古白砂糖 细砂糖 蛋糕面包饼干打发用 原装1kg,http://detail.tmall.com/item.htm?id=5202498102...,范美焙亲食品专营店,http://store.taobao.com/search.htm?user_number...,294,15.0,7.50元/500g,251


In [91]:
# apply方法
# 按列操作
df[['price','monthly_sales','comments']].apply(lambda x: sum(x))

price               776.1
monthly_sales    176368.0
comments         194859.0
dtype: float64

In [92]:
# 按行apply
df[['price','monthly_sales','comments']].apply(lambda x: sum(x), axis=1).head()

goodsID
14985545685    80006.6
44466463444    51007.5
15918515574    28012.5
39863630205    19866.8
43331131373     9832.0
dtype: float64

## 作业

### 要求
1. 用numpy的随机数模块随机生成一个26*4的数组，数据范围是（0,1)，且服从卡方分布；选取其中大于0.5的全部数据；
2. 把该数组转换成DataFrame，行索引为26个字母，列索引为'one','two','three','four'；取出按two排序前5位的行；
3. 取出one列，并计算该列的和、中值和方差。

### 提示
1. 部分需要的函数不在Note里，请查询官方文档寻找合适的函数；
2. 作业提交到GitHub上，并把链接提交到https://github.com/xmucpp/cpp_courses/issues/2 。
