In [1]:
import pandas as pd
import numpy as np

In [2]:
dates = pd.date_range('20170101', periods=6)
df1 = pd.DataFrame(np.arange(24).reshape((6, 4)), index=dates, columns=['A', 'B', 'C', 'D'])
df1

Unnamed: 0,A,B,C,D
2017-01-01,0,1,2,3
2017-01-02,4,5,6,7
2017-01-03,8,9,10,11
2017-01-04,12,13,14,15
2017-01-05,16,17,18,19
2017-01-06,20,21,22,23


In [3]:
df1['A']    # 将DataFrame的列获取位一个Series

2017-01-01     0
2017-01-02     4
2017-01-03     8
2017-01-04    12
2017-01-05    16
2017-01-06    20
Freq: D, Name: A, dtype: int64

In [4]:
df1.A

2017-01-01     0
2017-01-02     4
2017-01-03     8
2017-01-04    12
2017-01-05    16
2017-01-06    20
Freq: D, Name: A, dtype: int64

In [5]:
df1[0:2]    # 取 0~1 行数据 (基于索引时，半开半闭区间)

Unnamed: 0,A,B,C,D
2017-01-01,0,1,2,3
2017-01-02,4,5,6,7


In [7]:
df1['20170102' : '20170104']    # 闭区间

Unnamed: 0,A,B,C,D
2017-01-02,4,5,6,7
2017-01-03,8,9,10,11
2017-01-04,12,13,14,15


In [8]:
df1.loc['20170102']    # 通过标签选择数据

A    4
B    5
C    6
D    7
Name: 2017-01-02 00:00:00, dtype: int64

In [9]:
df1.loc['20170102', ['A', 'C']]

A    4
C    6
Name: 2017-01-02 00:00:00, dtype: int64

In [10]:
df1.loc[:, ['A', 'C']]

Unnamed: 0,A,C
2017-01-01,0,2
2017-01-02,4,6
2017-01-03,8,10
2017-01-04,12,14
2017-01-05,16,18
2017-01-06,20,22


In [11]:
df1.iloc[2]    # 通过位置选择数据 （第二行，起始位置为‘0’）

A     8
B     9
C    10
D    11
Name: 2017-01-03 00:00:00, dtype: int64

In [12]:
df1.iloc[1:3, 2:4]

Unnamed: 0,C,D
2017-01-02,6,7
2017-01-03,10,11


In [14]:
df1.iloc[[1, 2, 4], [1, 3]]    # 提取不连续行、列的数据

Unnamed: 0,B,D
2017-01-02,5,7
2017-01-03,9,11
2017-01-05,17,19


In [16]:
df1.A

2017-01-01     0
2017-01-02     4
2017-01-03     8
2017-01-04    12
2017-01-05    16
2017-01-06    20
Freq: D, Name: A, dtype: int64

In [17]:
df1.A > 6

2017-01-01    False
2017-01-02    False
2017-01-03     True
2017-01-04     True
2017-01-05     True
2017-01-06     True
Freq: D, Name: A, dtype: bool

In [18]:
df1[df1.A > 6]

Unnamed: 0,A,B,C,D
2017-01-03,8,9,10,11
2017-01-04,12,13,14,15
2017-01-05,16,17,18,19
2017-01-06,20,21,22,23


## 没有 index 和 columns 时进行数据选择

In [2]:
df2 = pd.DataFrame(np.arange(24).reshape(4, 6))
df2

Unnamed: 0,0,1,2,3,4,5
0,0,1,2,3,4,5
1,6,7,8,9,10,11
2,12,13,14,15,16,17
3,18,19,20,21,22,23


In [4]:
df2[0 : 2]    # 选择 [0, 2) 行数据, 注意: 索引从0开始

Unnamed: 0,0,1,2,3,4,5
0,0,1,2,3,4,5
1,6,7,8,9,10,11


In [19]:
df2.loc[0 : 2]    # 选择 [0, 1, 2] 行数据, 注意: 是通过标签选择数据

Unnamed: 0,0,1,2,3,4,5
0,0,1,2,3,4,5
1,6,7,8,9,10,11
2,12,13,14,15,16,17


In [20]:
df2[2]      # 选择了第 2 列数据, 注意: 索引从0开始

0     2
1     8
2    14
3    20
Name: 2, dtype: int64

In [7]:
df2.loc[2]   # 选择了第 2 行数据, 注意: 是通过标签选择数据

0    12
1    13
2    14
3    15
4    16
5    17
Name: 2, dtype: int64

In [21]:
df2.iloc[2]    # 通过位置选择数据

0    12
1    13
2    14
3    15
4    16
5    17
Name: 2, dtype: int64

In [16]:
df2.loc[2, [0, 2]]    # 选择了第 2 行的第 0, 2 个元素

0    12
2    14
Name: 2, dtype: int64

In [15]:
df2.loc[[0, 2], 2]   # 选择了第 2 列的第 0， 2 个元素

0     2
2    14
Name: 2, dtype: int64

In [17]:
df2.loc[:, [0, 2]]   # 选择了第 0, 2 列的是所有数据

Unnamed: 0,0,2
0,0,2
1,6,8
2,12,14
3,18,20


In [18]:
df2.loc[[0, 2], :]    # 选择了第 0, 2 行的所有数据

Unnamed: 0,0,1,2,3,4,5
0,0,1,2,3,4,5
2,12,13,14,15,16,17


In [23]:
df2.iloc[[0, 2], :]    # 通过位置选择数据   

Unnamed: 0,0,1,2,3,4,5
0,0,1,2,3,4,5
2,12,13,14,15,16,17
