# 1.创建DataFrame

### DataFrame可以看作是Series组成的字典

### (1)参数传入一个字典，键是列名，值是Series或列表

#### (i) 值是Series

In [2]:
import pandas as pd

In [4]:
s_id = pd.Series(["01","02","03","04","05"])
s_class = pd.Series(["C2","C1","C2","C3","C1"])
s_grade = pd.Series([92,67,70,88,76])

In [9]:
df1 = pd.DataFrame({"学号":s_id,"班级":s_class,"成绩":s_grade})
df1

Unnamed: 0,学号,班级,成绩
0,1,C2,92
1,2,C1,67
2,3,C2,70
3,4,C3,88
4,5,C1,76


#### (ii) 值是列表

In [10]:
l_id = ["01","02","03","04","05"]
l_class = ["C2","C1","C2","C3","C1"]
l_grade = [92,67,70,88,76]
df2 = pd.DataFrame({"学号":l_id,"班级":l_class,"成绩":l_grade})
df2

Unnamed: 0,学号,班级,成绩
0,1,C2,92
1,2,C1,67
2,3,C2,70
3,4,C3,88
4,5,C1,76


#### (iii) 标签索引

In [11]:
s_id = pd.Series(["01","02","03","04","05"],index=["小明","小红","小杰","小丽","小华"])
s_class = pd.Series(["C2","C1","C2","C3","C1"],index=["小明","小红","小杰","小丽","小华"])
s_grade = pd.Series([92,67,70,88,76],index=["小明","小红","小杰","小丽","小华"])
df3 = pd.DataFrame({"学号":s_id,"班级":s_class,"成绩":s_grade})
df3

Unnamed: 0,学号,班级,成绩
小明,1,C2,92
小红,2,C1,67
小杰,3,C2,70
小丽,4,C3,88
小华,5,C1,76


### (2)嵌套字典

In [16]:
df4 = pd.DataFrame({
    "学号":{"小明":"01","小红":"02","小杰":"03","小丽":"04","小华":"05"},
    "班级":{"小明":"C2","小红":"C1","小杰":"C2","小丽":"C3","小华":"C1"},
    "成绩":{"小明":92,"小红":67,"小杰":70,"小丽":88,"小华":76}
})
df4

Unnamed: 0,学号,班级,成绩
小明,1,C2,92
小红,2,C1,67
小杰,3,C2,70
小丽,4,C3,88
小华,5,C1,76


# 2.获得DataFrame的索引和列名

### (1)获取索引 ：

In [17]:
df4.index

Index(['小明', '小红', '小杰', '小丽', '小华'], dtype='object')

### (2)获取列名 ：

In [18]:
df4.columns

Index(['学号', '班级', '成绩'], dtype='object')

### (3)获取值 : 返回NumPy数组

In [19]:
df4.values

array([['01', 'C2', 92],
       ['02', 'C1', 67],
       ['03', 'C2', 70],
       ['04', 'C3', 88],
       ['05', 'C1', 76]], dtype=object)

# 3.对DataFrame进行转置

In [20]:
df4.T

Unnamed: 0,小明,小红,小杰,小丽,小华
学号,01,02,03,04,05
班级,C2,C1,C2,C3,C1
成绩,92,67,70,88,76


# 4.提取DataFrame中的数据

### (1)提取列

#### (i)用Series的索引

In [21]:
df4["班级"]

小明    C2
小红    C1
小杰    C2
小丽    C3
小华    C1
Name: 班级, dtype: object

In [22]:
df4['成绩']

小明    92
小红    67
小杰    70
小丽    88
小华    76
Name: 成绩, dtype: int64

#### (ii)用属性名获取(Series属于DataFrame的属性)

In [24]:
df4.成绩

小明    92
小红    67
小杰    70
小丽    88
小华    76
Name: 成绩, dtype: int64

In [25]:
df4.班级

小明    C2
小红    C1
小杰    C2
小丽    C3
小华    C1
Name: 班级, dtype: object

##### Tip:若列名含有空字符等特殊符号，则不能使用属性名获取列，只能用Series的索引

#### (iii)提取任意列

In [26]:
df4 [["学号","成绩"]]

Unnamed: 0,学号,成绩
小明,1,92
小红,2,67
小杰,3,70
小丽,4,88
小华,5,76


### (2)提取行

#### (i)提取单行

In [29]:
df4.loc["小丽"]

学号    04
班级    C3
成绩    88
Name: 小丽, dtype: object

In [30]:
df4.iloc[3]

学号    04
班级    C3
成绩    88
Name: 小丽, dtype: object

#### (ii)提取连续多行

In [31]:
df4.loc["小红":"小丽"]

Unnamed: 0,学号,班级,成绩
小红,2,C1,67
小杰,3,C2,70
小丽,4,C3,88


In [32]:
df4.iloc[1:3]

Unnamed: 0,学号,班级,成绩
小红,2,C1,67
小杰,3,C2,70


#### (ii)提取不连续多行

In [34]:
df4.loc[["小红","小丽"]]

Unnamed: 0,学号,班级,成绩
小红,2,C1,67
小丽,4,C3,88


In [35]:
df4.iloc[[3,1]]

Unnamed: 0,学号,班级,成绩
小丽,4,C3,88
小红,2,C1,67


### (3)提取元素

#### (i)提取单个元素

In [36]:
df4.loc["小杰","学号"]

'03'

In [37]:
df4.iloc[2,0]

'03'

#### (ii)提取相邻元素

In [38]:
df4.loc["小红":"小杰","班级":"成绩"]

Unnamed: 0,班级,成绩
小红,C1,67
小杰,C2,70


In [39]:
df4.iloc[1:3,1:3]

Unnamed: 0,班级,成绩
小红,C1,67
小杰,C2,70


#### (iii)提取整行/列相邻元素

In [45]:
df4.iloc[:,0:2]

Unnamed: 0,学号,班级
小明,1,C2
小红,2,C1
小杰,3,C2
小丽,4,C3
小华,5,C1


In [41]:
df4.loc["小明":"小杰",:]

Unnamed: 0,学号,班级,成绩
小明,1,C2,92
小红,2,C1,67
小杰,3,C2,70


#### (iv)提取不相邻元素

In [43]:
df4.iloc[[1,3],0:2]

Unnamed: 0,学号,班级
小红,2,C1
小丽,4,C3


# 5.根据条件筛选DataFrame的行

#### Why？
#### Because each row represents an instance, and each column represents the attributes of the instance. 
#### Filtering rows that meet specified conditions is equivalent to selecting qualifying instances from existing ones.

In [46]:
df4[df4["成绩"]>80]

Unnamed: 0,学号,班级,成绩
小明,1,C2,92
小丽,4,C3,88


In [47]:
df4[df4.成绩>80]

Unnamed: 0,学号,班级,成绩
小明,1,C2,92
小丽,4,C3,88


In [50]:
df4[(df4["成绩"]>80) & (df4["班级"]=="C3")] 

Unnamed: 0,学号,班级,成绩
小丽,4,C3,88


# 6.获得DataFrame的前N行

In [51]:
df4.head()

Unnamed: 0,学号,班级,成绩
小明,1,C2,92
小红,2,C1,67
小杰,3,C2,70
小丽,4,C3,88
小华,5,C1,76


In [52]:
df4.head(2)

Unnamed: 0,学号,班级,成绩
小明,1,C2,92
小红,2,C1,67
