In [2]:
import pandas as pd
import numpy as np

dates = pd.date_range('20130101', periods=6)
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2013-01-01,1.832368,1.177498,0.638396,-0.182525
2013-01-02,1.485888,-0.521183,-0.54646,-1.124263
2013-01-03,-0.731605,0.124034,1.148756,-0.423043
2013-01-04,1.593673,2.411398,-0.979668,1.477386
2013-01-05,-0.239526,-0.492755,0.707605,-0.589724
2013-01-06,0.632588,-2.243278,-0.957501,0.143698


In [3]:
# 选择单列，产生 Series，与 df.A 等效
df['A']

2013-01-01    1.832368
2013-01-02    1.485888
2013-01-03   -0.731605
2013-01-04    1.593673
2013-01-05   -0.239526
2013-01-06    0.632588
Freq: D, Name: A, dtype: float64

In [4]:
# 用 [] 切片行
df[0:3]

Unnamed: 0,A,B,C,D
2013-01-01,1.832368,1.177498,0.638396,-0.182525
2013-01-02,1.485888,-0.521183,-0.54646,-1.124263
2013-01-03,-0.731605,0.124034,1.148756,-0.423043


In [5]:
df['20130102':'20130104']

Unnamed: 0,A,B,C,D
2013-01-02,1.485888,-0.521183,-0.54646,-1.124263
2013-01-03,-0.731605,0.124034,1.148756,-0.423043
2013-01-04,1.593673,2.411398,-0.979668,1.477386


In [7]:
# 用标签提取一行数据
df.loc[dates[0]]

A    1.832368
B    1.177498
C    0.638396
D   -0.182525
Name: 2013-01-01 00:00:00, dtype: float64

In [8]:
df.loc[:, ['A', 'B']] # 用标签选择多列数据

Unnamed: 0,A,B
2013-01-01,1.832368,1.177498
2013-01-02,1.485888,-0.521183
2013-01-03,-0.731605,0.124034
2013-01-04,1.593673,2.411398
2013-01-05,-0.239526,-0.492755
2013-01-06,0.632588,-2.243278


In [9]:
df.loc['20130102':'20130104', ['A', 'B']]

Unnamed: 0,A,B
2013-01-02,1.485888,-0.521183
2013-01-03,-0.731605,0.124034
2013-01-04,1.593673,2.411398


In [10]:
df.loc['20130102', ['A', 'B']]

A    1.485888
B   -0.521183
Name: 2013-01-02 00:00:00, dtype: float64

In [11]:
df.loc[dates[0], 'A']

1.8323683347112978

In [12]:
df.at[dates[0], 'A']

1.8323683347112978

In [13]:
df.iloc[3]

A    1.593673
B    2.411398
C   -0.979668
D    1.477386
Name: 2013-01-04 00:00:00, dtype: float64

In [14]:
df.iloc[3:5, 0:2]

Unnamed: 0,A,B
2013-01-04,1.593673,2.411398
2013-01-05,-0.239526,-0.492755


In [15]:
df.iloc[[1, 2, 4], [0, 2]]

Unnamed: 0,A,C
2013-01-02,1.485888,-0.54646
2013-01-03,-0.731605,1.148756
2013-01-05,-0.239526,0.707605


In [16]:
df.iloc[1:3, :]

Unnamed: 0,A,B,C,D
2013-01-02,1.485888,-0.521183,-0.54646,-1.124263
2013-01-03,-0.731605,0.124034,1.148756,-0.423043


In [17]:
df.iloc[:, 1:3]

Unnamed: 0,B,C
2013-01-01,1.177498,0.638396
2013-01-02,-0.521183,-0.54646
2013-01-03,0.124034,1.148756
2013-01-04,2.411398,-0.979668
2013-01-05,-0.492755,0.707605
2013-01-06,-2.243278,-0.957501


In [18]:
df.iloc[1, 1]

-0.5211834570661475

In [19]:
df.iat[1, 1]

-0.5211834570661475

In [20]:
# 布尔索引
df[df.A > 0]

Unnamed: 0,A,B,C,D
2013-01-01,1.832368,1.177498,0.638396,-0.182525
2013-01-02,1.485888,-0.521183,-0.54646,-1.124263
2013-01-04,1.593673,2.411398,-0.979668,1.477386
2013-01-06,0.632588,-2.243278,-0.957501,0.143698


In [21]:
# 选择 DataFrame 里满足条件的值
df[df > 0]

Unnamed: 0,A,B,C,D
2013-01-01,1.832368,1.177498,0.638396,
2013-01-02,1.485888,,,
2013-01-03,,0.124034,1.148756,
2013-01-04,1.593673,2.411398,,1.477386
2013-01-05,,,0.707605,
2013-01-06,0.632588,,,0.143698


In [22]:
df2 = df.copy()
df2['E'] = ['one', 'one', 'two', 'three', 'four', 'three']

df2

Unnamed: 0,A,B,C,D,E
2013-01-01,1.832368,1.177498,0.638396,-0.182525,one
2013-01-02,1.485888,-0.521183,-0.54646,-1.124263,one
2013-01-03,-0.731605,0.124034,1.148756,-0.423043,two
2013-01-04,1.593673,2.411398,-0.979668,1.477386,three
2013-01-05,-0.239526,-0.492755,0.707605,-0.589724,four
2013-01-06,0.632588,-2.243278,-0.957501,0.143698,three


In [23]:
# 用 isin() 筛选
df2[df2['E'].isin(['two', 'four'])]

Unnamed: 0,A,B,C,D,E
2013-01-03,-0.731605,0.124034,1.148756,-0.423043,two
2013-01-05,-0.239526,-0.492755,0.707605,-0.589724,four


In [24]:
s1 = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range('20130102', periods=6))
s1

2013-01-02    1
2013-01-03    2
2013-01-04    3
2013-01-05    4
2013-01-06    5
2013-01-07    6
Freq: D, dtype: int64

In [26]:
# 赋值
df['F'] = s1
df

Unnamed: 0,A,B,C,D,F
2013-01-01,1.832368,1.177498,0.638396,-0.182525,
2013-01-02,1.485888,-0.521183,-0.54646,-1.124263,1.0
2013-01-03,-0.731605,0.124034,1.148756,-0.423043,2.0
2013-01-04,1.593673,2.411398,-0.979668,1.477386,3.0
2013-01-05,-0.239526,-0.492755,0.707605,-0.589724,4.0
2013-01-06,0.632588,-2.243278,-0.957501,0.143698,5.0


In [27]:
df.at[dates[0], 'A'] = 0
df.iat[0, 1] = 0
df.loc[:, 'D'] = np.array([5] * len(df))
df

Unnamed: 0,A,B,C,D,F
2013-01-01,0.0,0.0,0.638396,5,
2013-01-02,1.485888,-0.521183,-0.54646,5,1.0
2013-01-03,-0.731605,0.124034,1.148756,5,2.0
2013-01-04,1.593673,2.411398,-0.979668,5,3.0
2013-01-05,-0.239526,-0.492755,0.707605,5,4.0
2013-01-06,0.632588,-2.243278,-0.957501,5,5.0


In [28]:
df2 = df.copy()
df2[df2 > 0] = -df2
df2

Unnamed: 0,A,B,C,D,F
2013-01-01,0.0,0.0,-0.638396,-5,
2013-01-02,-1.485888,-0.521183,-0.54646,-5,-1.0
2013-01-03,-0.731605,-0.124034,-1.148756,-5,-2.0
2013-01-04,-1.593673,-2.411398,-0.979668,-5,-3.0
2013-01-05,-0.239526,-0.492755,-0.707605,-5,-4.0
2013-01-06,-0.632588,-2.243278,-0.957501,-5,-5.0
