# Review

### 准备工作

In [4]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

### Series

#### 创建 Series

In [2]:
# 1、基于 Python 的 list 创建
s1 = Series([1, 2, 3, 4, 5])

In [3]:
s1

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [4]:
type(s1)

pandas.core.series.Series

In [5]:
s1.values

array([1, 2, 3, 4, 5], dtype=int64)

In [6]:
s1.index

RangeIndex(start=0, stop=5, step=1)

<hr>

In [11]:
# 2、基于 NumPy 的 array 创建
arr1 = np.arange(0, 11, 2)
s2 = Series(arr1)

In [12]:
s2

0     0
1     2
2     4
3     6
4     8
5    10
dtype: int32

In [13]:
type(s2)

pandas.core.series.Series

In [14]:
s2.values

array([ 0,  2,  4,  6,  8, 10])

In [15]:
s2.index

RangeIndex(start=0, stop=6, step=1)

<hr>

In [20]:
# 3、创建时自定义 index
# 必须确保 index 的数量与值的数量是一致的
s3 = Series([1, 3, 5, 7, 9], index=['A', 'B', 'C', 'D', 'E'])

In [21]:
s3

A    1
B    3
C    5
D    7
E    9
dtype: int64

#### 使用 Series

In [27]:
# 1、取出 Series 中的元素，需要 index
s2[3]

6

In [28]:
s3['B']

3

### DataFrame

#### 创建 DataFrame

In [33]:
# 1、基于 Python 的 dict 创建

# dict 中的 key 将会对应列名，list_value 将会对应每一列的内容
dict1 = {'学号': ['0001', '0002', '0003'], '姓名': ['张三', '李四', '王五'], '性别': ['男', '女', '男']}
df1 = DataFrame(dict1)
# 自定义 index
df2 = DataFrame(dict1, index=[1, 2, 3])

In [31]:
df1

Unnamed: 0,学号,姓名,性别
0,1,张三,男
1,2,李四,女
2,3,王五,男


In [34]:
df2

Unnamed: 0,学号,姓名,性别
1,1,张三,男
2,2,李四,女
3,3,王五,男


In [22]:
# 此处 dict 的形式与上面的不同
# dict 中的 key 将会对应列名，dict_value 将会对应每一列的内容，dict_value 中的 key 是行索引
dict2 = {'学号': {'A': '0001', 'B': '0002', 'C': '0003'}, 
         '姓名': {'A': '张三', 'B': '李四', 'C': '王五'}, 
         '性别': {'A': '男', 'B': '女', 'C': '男'}}
df8 = DataFrame(dict2)

In [23]:
df8

Unnamed: 0,学号,姓名,性别
A,1,张三,男
B,2,李四,女
C,3,王五,男


<hr>

In [5]:
# 2、基于 Series 创建 DataFrame
s4 = Series(['0001', '张三', '男'])
s5 = Series(['0002', '李四', '女'])
s6 = Series(['0003', '王五', '男'])
df3 = DataFrame([s4, s5, s6])
df3.columns = ['学号', '姓名', '性别']
df3.index = ['1', '2', '3']

In [6]:
df3

Unnamed: 0,学号,姓名,性别
1,1,张三,男
2,2,李四,女
3,3,王五,男


<hr>

In [87]:
# 3、基于 DataFrame 创建新的 DataFrame 可以考虑对列进行过滤
df4 = DataFrame(df3, columns=['学号', '姓名'])

In [88]:
df4

Unnamed: 0,学号,姓名
1,1,张三
2,2,李四
3,3,王五


<hr>

In [91]:
# 4、TODO

#### 使用 DataFrame

In [79]:
# 1、读取 DataFrame 中的一列，每一列都是 Series
df3['学号']

1    0001
2    0002
3    0003
Name: 学号, dtype: object

In [80]:
type(df3['性别'])

pandas.core.series.Series

<hr>

In [82]:
# 2、读取 DataFrame 中的一个值，先读取列，再根据实际的 index 来获取
df3['姓名']['3']

'王五'

<hr>

In [83]:
# 3、按行便利 DataFrame
for row in df3.iterrows():
    print(row)
    print('-------------------------------------')
    print('type(row): ' + str(type(row)))
    print('-------------------------------------')
    print('len(row): ' + str(len(row)))
    print('-------------------------------------')
    print('row[0]: ' + str(row[0]))
    print('-------------------------------------')
    print('type(row[0]): ' + str(type(row[0])))
    print('-------------------------------------')
    print('row[1]: ' + str(row[1]))
    print('-------------------------------------')
    print('type(row[1]): ' + str(type(row[1])))
    print('-------------------------------------')
    print(row[1]['姓名'])
    print('*************************************')

('1', 学号    0001
姓名      张三
性别       男
Name: 1, dtype: object)
-------------------------------------
type(row): <class 'tuple'>
-------------------------------------
len(row): 2
-------------------------------------
row[0]: 1
-------------------------------------
type(row[0]): <class 'str'>
-------------------------------------
row[1]: 学号    0001
姓名      张三
性别       男
Name: 1, dtype: object
-------------------------------------
type(row[1]): <class 'pandas.core.series.Series'>
-------------------------------------
张三
*************************************
('2', 学号    0002
姓名      李四
性别       女
Name: 2, dtype: object)
-------------------------------------
type(row): <class 'tuple'>
-------------------------------------
len(row): 2
-------------------------------------
row[0]: 2
-------------------------------------
type(row[0]): <class 'str'>
-------------------------------------
row[1]: 学号    0002
姓名      李四
性别       女
Name: 2, dtype: object
-------------------------------------
type(ro

<hr>

In [7]:
# 4、转置 DataFrame
df5 = df3.T

In [8]:
df5

Unnamed: 0,1,2,3
学号,0001,0002,0003
姓名,张三,李四,王五
性别,男,女,男


#### I/O 操作

官方文档：<https://pandas.pydata.org/pandas-docs/stable/reference/io.html>

In [9]:
# 1.1、从“粘贴板”中读取数据创建 DataFrame
df6 = pd.read_clipboard()

In [10]:
df6

Unnamed: 0,Dec 2019,Dec 2018,Change,Programming Language,Ratings,Change.1
0,1,1,,Java,17.253%,+1.32%
1,2,2,,C,16.086%,+1.80%
2,3,3,,Python,10.308%,+1.93%
3,4,4,,C++,6.196%,-1.37%
4,5,6,change,C#,4.801%,+1.35%


In [12]:
# 1.2  将 DataFrame 写入“粘贴板”
df6.to_clipboard()

<hr>

In [15]:
# 2.1 将 DataFrame 写入 CSV 文件
df6.to_csv('..\\test\\df6_1.csv')

# 写入时不带索引列
df6.to_csv('..\\test\\df6_2.csv', index=False)

In [18]:
# 2.2 从 CSV 文件中读取数据创建 DataFrame
df7 = pd.read_csv('..\\test\\df6_2.csv')

In [19]:
df7

Unnamed: 0,Dec 2019,Dec 2018,Change,Programming Language,Ratings,Change.1
0,1,1,,Java,17.253%,+1.32%
1,2,2,,C,16.086%,+1.80%
2,3,3,,Python,10.308%,+1.93%
3,4,4,,C++,6.196%,-1.37%
4,5,6,change,C#,4.801%,+1.35%


<hr>

In [36]:
# 3.1 将 DataFrame 转为 JSON 格式数据
json_str1 = df7.to_json()

In [37]:
# 得到的 JSON 数据类似于创建 DataFrame 时所使用的 Python 的 dict
json_str1

'{"Dec 2019":{"0":1,"1":2,"2":3,"3":4,"4":5},"Dec 2018":{"0":1,"1":2,"2":3,"3":4,"4":6},"Change":{"0":null,"1":null,"2":null,"3":null,"4":"change"},"Programming Language":{"0":"Java","1":"C","2":"Python","3":"C++","4":"C#"},"Ratings":{"0":"17.253%","1":"16.086%","2":"10.308%","3":"6.196%","4":"4.801%"},"Change.1":{"0":"+1.32%","1":"+1.80%","2":"+1.93%","3":"-1.37%","4":"+1.35%"}}'

In [38]:
# 3.2 将符合的 JSON 数据转为 DataFrame
df9 = pd.read_json(json_str1)

In [39]:
df9

Unnamed: 0,Dec 2019,Dec 2018,Change,Programming Language,Ratings,Change.1
0,1,1,,Java,17.253%,+1.32%
1,2,2,,C,16.086%,+1.80%
2,3,3,,Python,10.308%,+1.93%
3,4,4,,C++,6.196%,-1.37%
4,5,6,change,C#,4.801%,+1.35%


<hr>

In [26]:
# 4.1 将 DataFrame 转为 HTML
html_str = df9.to_html()

In [27]:
html_str

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Dec 2019</th>\n      <th>Dec 2018</th>\n      <th>Change</th>\n      <th>Programming Language</th>\n      <th>Ratings</th>\n      <th>Change.1</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>1</td>\n      <td>1</td>\n      <td>None</td>\n      <td>Java</td>\n      <td>17.253%</td>\n      <td>+1.32%</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>2</td>\n      <td>2</td>\n      <td>None</td>\n      <td>C</td>\n      <td>16.086%</td>\n      <td>+1.80%</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>3</td>\n      <td>3</td>\n      <td>None</td>\n      <td>Python</td>\n      <td>10.308%</td>\n      <td>+1.93%</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>4</td>\n      <td>4</td>\n      <td>None</td>\n      <td>C++</td>\n      <td>6.196%</td>\n      <td>-1.37%</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>5</td>\n    

In [29]:
# 转换为 HTML 文件
df9.to_html('..\\test\\df9_1.html')
df9.to_html('..\\test\\df9_2.html', index=False)

<hr>

In [50]:
# 5.1 将 DataFrame 写入 Excel 文件
df9.to_excel('..\\test\\df9_1.xls')
df9.to_excel('..\\test\\df9_2.xls', index=False)

In [51]:
# 5.2 从 Excel 文件中读取数据创建 DataFrame
df10 = pd.read_excel('..\\test\\df9_2.xls')

In [52]:
df10

Unnamed: 0,Dec 2019,Dec 2018,Change,Programming Language,Ratings,Change.1
0,1,1,,Java,17.253%,+1.32%
1,2,2,,C,16.086%,+1.80%
2,3,3,,Python,10.308%,+1.93%
3,4,4,,C++,6.196%,-1.37%
4,5,6,change,C#,4.801%,+1.35%
