In [1]:
import numpy as np
import pandas as pd

# DataFrame

## 01.构建

### 1.1 字典类

#### 1.1.1 数组、列表或元组构成的字典构造DataFrame

In [2]:
# 先构造一个字典
data = {
    'a': [1,2,3,4],
    'b': (5,6,7,8),
    'c': np.arange(9,13)
}
# 构造DataFrame
df = pd.DataFrame(data)
df

Unnamed: 0,a,b,c
0,1,5,9
1,2,6,10
2,3,7,11
3,4,8,12


In [3]:
# index属性查看行索引
df.index

RangeIndex(start=0, stop=4, step=1)

In [4]:
# columns属性查看列索引
df.columns

Index(['a', 'b', 'c'], dtype='object')

In [5]:
# values属性查看值
df.values

array([[ 1,  5,  9],
       [ 2,  6, 10],
       [ 3,  7, 11],
       [ 4,  8, 12]], dtype=int64)

In [6]:
# 指定行索引index
df2 = pd.DataFrame(data,index=['A','B','C','D'])
df2

Unnamed: 0,a,b,c
A,1,5,9
B,2,6,10
C,3,7,11
D,4,8,12


In [7]:
# 指定列索引columns
df3 = pd.DataFrame(data, index=['A','B','C','D'], columns=['a','b','c','d'])
df3

Unnamed: 0,a,b,c,d
A,1,5,9,
B,2,6,10,
C,3,7,11,
D,4,8,12,


#### 1.1.2 Series构成的字典构造DataFrame

In [8]:
df = pd.DataFrame({
    'a': pd.Series(np.arange(3)),
    'b': pd.Series(np.arange(3,5))})
df

Unnamed: 0,a,b
0,0,3.0
1,1,4.0
2,2,


#### 1.1.3 字典构成的字典构造DataFrame（字典嵌套）

In [9]:
data = {
    '语文': {'张三': 78, '李四': 87, '王五':92},
    '数学': {'张三': 93, '李四': 67, '王五':84},
    '英语': {'张三': 83, '李四': 99, },
}

df = pd.DataFrame(data)
df

Unnamed: 0,语文,数学,英语
张三,78,93,83.0
李四,87,67,99.0
王五,92,84,


### 1.2 列表类

#### 1.2.1 2D ndarray 构造DataFrame

In [10]:
arr = np.arange(12).reshape(4,3)
df = pd.DataFrame(arr)
df

Unnamed: 0,0,1,2
0,0,1,2
1,3,4,5
2,6,7,8
3,9,10,11


#### 1.2.2 字典构成的列表构造DataFrame

In [11]:
data = [{'张三': 78, '李四': 87, '王五':92},
       {'张三': 93, '李四': 67, '王五':84},
       {'张三': 83, '李四': 99, }]
df = pd.DataFrame(data)
df

Unnamed: 0,张三,李四,王五
0,78,87,92.0
1,93,67,84.0
2,83,99,


#### 1.2.3 Series构成的列表构造DataFrame

In [12]:
data = [pd.Series(np.arange(0,3)), pd.Series(np.arange(3,5))]
df = pd.DataFrame(data)
df

Unnamed: 0,0,1,2
0,0.0,1.0,2.0
1,3.0,4.0,


## 02.基本用法

### 2.1 转置

In [13]:
df = pd.DataFrame(np.arange(9).reshape(3,3),index=['a','b','c'],columns=['A','B','C'])
df

Unnamed: 0,A,B,C
a,0,1,2
b,3,4,5
c,6,7,8


In [14]:
df.T

Unnamed: 0,a,b,c
A,0,3,6
B,1,4,7
C,2,5,8


### 2.2 通过列索引获取列数据

In [15]:
print(type(df['A']))
df['A']

<class 'pandas.core.series.Series'>


a    0
b    3
c    6
Name: A, dtype: int32

### 2.3 增加列

In [16]:
df['D']=[100,101,102]
df

Unnamed: 0,A,B,C,D
a,0,1,2,100
b,3,4,5,101
c,6,7,8,102


### 2.4 删除列

In [17]:
del df['D']
df

Unnamed: 0,A,B,C
a,0,1,2
b,3,4,5
c,6,7,8


## 03.作业练习

In [18]:
import numpy as np
import pandas as pd

1.创建表格

In [19]:
data = {
    "姓名":['张三','李四','王五','小明','小红','小刚','小亮'],
    "语文":[89,78,79,89,90,87,83],
    "数学":[59,83,85,92,67,81,77],
    "英语":[84,97,88,83,67,73,71],
    "体育":[0,0,0,0,0,0,0]
}

df = pd.DataFrame(data)
df

Unnamed: 0,姓名,语文,数学,英语,体育
0,张三,89,59,84,0
1,李四,78,83,97,0
2,王五,79,85,88,0
3,小明,89,92,83,0
4,小红,90,67,67,0
5,小刚,87,81,73,0
6,小亮,83,77,71,0


2.进行转置

In [20]:
df.T

Unnamed: 0,0,1,2,3,4,5,6
姓名,张三,李四,王五,小明,小红,小刚,小亮
语文,89,78,79,89,90,87,83
数学,59,83,85,92,67,81,77
英语,84,97,88,83,67,73,71
体育,0,0,0,0,0,0,0


3.删除“体育列”

In [21]:
del df['体育']
df

Unnamed: 0,姓名,语文,数学,英语
0,张三,89,59,84
1,李四,78,83,97
2,王五,79,85,88
3,小明,89,92,83
4,小红,90,67,67
5,小刚,87,81,73
6,小亮,83,77,71


In [22]:
df['综合'] = [97, 87, 78, 76, 84, 88, 91]
df

Unnamed: 0,姓名,语文,数学,英语,综合
0,张三,89,59,84,97
1,李四,78,83,97,87
2,王五,79,85,88,78
3,小明,89,92,83,76
4,小红,90,67,67,84
5,小刚,87,81,73,88
6,小亮,83,77,71,91


In [30]:
df = pd.DataFrame(np.arange(12).reshape(3,4), index=['a','b','c'], columns=['A','B','C','D'])
df

Unnamed: 0,A,B,C,D
a,0,1,2,3
b,4,5,6,7
c,8,9,10,11


In [33]:
# 列索引
df['A']

a    0
b    4
c    8
Name: A, dtype: int32

In [38]:
# 元素索引
df['A']['b']

4

In [39]:
# 行索引
df.loc['a']

A    0
B    1
C    2
D    3
Name: a, dtype: int32

In [45]:
df[0:1]

Unnamed: 0,A,B,C,D
a,0,1,2,3


In [46]:
df

Unnamed: 0,A,B,C,D
a,0,1,2,3
b,4,5,6,7
c,8,9,10,11


In [54]:
df.loc['a':'b','B']= [100, 101]
df

Unnamed: 0,A,B,C,D
a,0,100,2,3
b,4,101,6,7
c,8,9,10,11


In [51]:
df.iloc[0:2,1:3]

Unnamed: 0,B,C
a,1,2
b,5,6
