# Pandas

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
pd.options.display.max_rows = 10

## Dataframe创建

### 从字典创建

In [5]:
temp_dict = {
    'score': [8.9, 8.2, 9.3],
    'category': ['悬疑', '动作', '爱情']
}
temp_pd = pd.DataFrame(temp_dict)
print(temp_pd)

  category  score
0       悬疑    8.9
1       动作    8.2
2       爱情    9.3


In [6]:
temp_pd.columns

Index(['category', 'score'], dtype='object')

In [7]:
temp_pd.columns = ["目录", "评分"]

In [8]:
temp_pd

Unnamed: 0,目录,评分
0,悬疑,8.9
1,动作,8.2
2,爱情,9.3


In [12]:
# 不能直接这样修改列名
temp_pd.columns[0] = "电影名"

TypeError: Index does not support mutable operations

In [13]:
temp_pd.columns.values[0] = "电影名"

In [14]:
temp_pd

Unnamed: 0,电影名,评分
0,悬疑,8.9
1,动作,8.2
2,爱情,9.3


In [15]:
type(temp_pd.columns), type(temp_pd.columns.values)

(pandas.core.indexes.base.Index, numpy.ndarray)

In [16]:
temp_pd['评分'].values

array([8.9, 8.2, 9.3])

In [17]:
temp_pd.评分.values # recommended, support auto complete

array([8.9, 8.2, 9.3])

In [18]:
temp_pd.评分

0    8.9
1    8.2
2    9.3
Name: 评分, dtype: float64

In [19]:
temp_pd.values

array([['悬疑', 8.9],
       ['动作', 8.2],
       ['爱情', 9.3]], dtype=object)

## 从列表创建

In [20]:
pos_pd = pd.DataFrame(np.random.rand(1000,3), columns=["x", "y", "z"])

In [21]:
pos_pd.head()

Unnamed: 0,x,y,z
0,0.508912,0.389847,0.346418
1,0.863006,0.483965,0.739297
2,0.621245,0.13776,0.632489
3,0.532062,0.263008,0.22338
4,0.260328,0.270906,0.928473


## 从文件读取

In [22]:
movie_pd = pd.read_excel("豆瓣电影.xlsx")

In [23]:
movie_pd.head()

Unnamed: 0,电影名,评分,评价人数,链接,时间,演员
0,"""ExtremeChampionshipWrestling""",(评价人数不足),,https://movie.douban.com/subject/1963120/,2006-06-13 00:00:00,[]
1,"""TheXFiles""SE6.15Monday",9.4,52.0,https://movie.douban.com/subject/3158584/,1999-02-28 00:00:00,"['大卫·杜楚尼', '吉莲·安德森', '美国', 'Kim Manners', 'Arg..."
2,"""WWEMondayNightRAW""Episodedated11Decembe",(评价人数不足),,https://movie.douban.com/subject/1957739/,2006-12-11 00:00:00,[]
3,"""WWEMondayNightRAW""Episodedated11June200",(评价人数不足),,https://movie.douban.com/subject/2136118/,2007-06-11 00:00:00,['Kevin Dunn']
4,"""WWEMondayNightRAW""Episodedated13Novembe",(评价人数不足),,https://movie.douban.com/subject/1957743/,2006-11-13 00:00:00,[]


In [24]:
movie_pd[200:205]

Unnamed: 0,电影名,评分,评价人数,链接,时间,演员
200,2B青年的不醉人生,6.3,3969.0,https://movie.douban.com/subject/20491427/,2012-12-28 00:00:00,"['黄渤', '中国大陆', '黄渤', '18分钟', '喜剧']"
201,2PMSHOW,8.8,137.0,https://movie.douban.com/subject/25957143/,2011-07-09 00:00:00,"['玉泽演', '黄灿盛', '张祐荣', 'Jun. K', '尼坤']"
202,2人三足,6.8,1165.0,https://movie.douban.com/subject/1306657/,2002-01-17(香港),"['车婉婉', '张家辉', '朱茵', '罗兰', '吴浣仪']"
203,0.03,6.8,139.0,https://movie.douban.com/subject/26705642/,2016-11-25 00:00:00,"['Bianca Comparato', '朱奥·米格尔', '巴西', '恺撒·查隆', ..."
204,3-4X10月,7.3,1749.0,https://movie.douban.com/subject/1297290/,1990-09-15 00:00:00,"['石田百合子', '柳忧怜', '北野武', '邓肯', '布施绘里']"


In [25]:
movie_pd.describe(include='all')

Unnamed: 0,电影名,评分,评价人数,链接,时间,演员
count,87081,87073,76243.00000,87081,87075,87075
unique,40810,83,,41815,26922,40803
top,哈尔的移动城堡,(评价人数不足),,https://movie.douban.com/subject/3254235/,美国,"['美国', 'Kevin Dunn', '英语']"
freq,6,9330,,6,1296,281
mean,,,15041.19893,,,
...,...,...,...,...,...,...
min,,,20.00000,,,
25%,,,278.00000,,,
50%,,,1442.00000,,,
75%,,,8151.00000,,,


In [26]:
movie_pd.describe(include=[np.number])

Unnamed: 0,评价人数
count,76243.0
mean,15041.19893
std,44238.241727
min,20.0
25%,278.0
50%,1442.0
75%,8151.0
max,762311.0


## 数据筛选
## 按行

In [27]:
movie_pd.loc[0]

电影名                "ExtremeChampionshipWrestling"
评分                                       (评价人数不足)
评价人数                                          NaN
链接      https://movie.douban.com/subject/1963120/
时间                            2006-06-13 00:00:00
演员                                             []
Name: 0, dtype: object

In [28]:
movie_pd.loc[0:10:3]

Unnamed: 0,电影名,评分,评价人数,链接,时间,演员
0,"""ExtremeChampionshipWrestling""",(评价人数不足),,https://movie.douban.com/subject/1963120/,2006-06-13 00:00:00,[]
3,"""WWEMondayNightRAW""Episodedated11June200",(评价人数不足),,https://movie.douban.com/subject/2136118/,2007-06-11 00:00:00,['Kevin Dunn']
6,"""WWEMondayNightRAW""Episodedated18June200",(评价人数不足),,https://movie.douban.com/subject/2136121/,2007-06-18 00:00:00,['Kevin Dunn']
9,"""WWEMondayNightRAW""Episodedated27Novembe",(评价人数不足),,https://movie.douban.com/subject/1957741/,2006-11-27 00:00:00,[]


In [29]:
movie_pd.loc[[1, 100, 1000]]

Unnamed: 0,电影名,评分,评价人数,链接,时间,演员
1,"""TheXFiles""SE6.15Monday",9.4,52.0,https://movie.douban.com/subject/3158584/,1999-02-28 00:00:00,"['大卫·杜楚尼', '吉莲·安德森', '美国', 'Kim Manners', 'Arg..."
100,17岁之风景,7.2,90.0,https://movie.douban.com/subject/1760143/,2006-04-04 00:00:00,"['Tasuku Emoto', 'Mansaku Fuwa', 'Ichir Hariu'..."
1000,NearEqual森山大道,8.3,599.0,https://movie.douban.com/subject/3922263/,森山大道,"['荒木経惟', '西井一夫', '日本', '藤井謙二郎', '84分钟']"


In [30]:
movie_pd[0:2]

Unnamed: 0,电影名,评分,评价人数,链接,时间,演员
0,"""ExtremeChampionshipWrestling""",(评价人数不足),,https://movie.douban.com/subject/1963120/,2006-06-13 00:00:00,[]
1,"""TheXFiles""SE6.15Monday",9.4,52.0,https://movie.douban.com/subject/3158584/,1999-02-28 00:00:00,"['大卫·杜楚尼', '吉莲·安德森', '美国', 'Kim Manners', 'Arg..."


## 按列

In [31]:
movie_pd.columns

Index(['电影名', '评分', '评价人数', '链接', '时间', '演员'], dtype='object')

In [32]:
movie_pd[['电影名', '评分']].head()

Unnamed: 0,电影名,评分
0,"""ExtremeChampionshipWrestling""",(评价人数不足)
1,"""TheXFiles""SE6.15Monday",9.4
2,"""WWEMondayNightRAW""Episodedated11Decembe",(评价人数不足)
3,"""WWEMondayNightRAW""Episodedated11June200",(评价人数不足)
4,"""WWEMondayNightRAW""Episodedated13Novembe",(评价人数不足)


In [33]:
movie_pd.评分

0        (评价人数不足)
1             9.4
2        (评价人数不足)
3        (评价人数不足)
4        (评价人数不足)
           ...   
87076         5.9
87077         5.7
87078         7.5
87079         8.2
87080         5.7
Name: 评分, Length: 87081, dtype: object

In [34]:
type(movie_pd.评分), type(movie_pd["评分"]),type(movie_pd[["评分"]])

(pandas.core.series.Series,
 pandas.core.series.Series,
 pandas.core.frame.DataFrame)