In [1]:
import numpy as np
import pandas as pd

#### Pandas现在支持三种类型的多轴索引
- .loc     基于标签
- .iloc    基于整数
- .ix     基于标签和整数

In [2]:
df = pd.DataFrame(np.random.randn(8,4),
                 index=['a','b','c','d','e','f','g','h'],
                  columns = ['A', 'B', 'C', 'D'])
df

Unnamed: 0,A,B,C,D
a,1.637541,0.525852,-0.815922,0.237627
b,-0.215777,0.285644,0.196529,-0.374995
c,-1.575076,0.76656,0.597478,-0.423306
d,0.425084,0.580469,2.106732,-1.132376
e,0.973325,-0.138681,0.33715,-0.442461
f,1.01581,0.896994,0.753502,0.296023
g,0.767961,0.913784,1.264717,-1.011307
h,1.906192,0.077434,1.120128,0.354334


##### .loc()
- 需要两个参数，第一个表示行，第二个表示列
- 输入一个参数时选择行

In [3]:
print(df.loc[:,'C'])

a   -0.815922
b    0.196529
c    0.597478
d    2.106732
e    0.337150
f    0.753502
g    1.264717
h    1.120128
Name: C, dtype: float64


In [4]:
print(df.loc[['b','e'],['A','C']])

          A         C
b -0.215777  0.196529
e  0.973325  0.337150


In [5]:
print(df.loc['a':'e'])

          A         B         C         D
a  1.637541  0.525852 -0.815922  0.237627
b -0.215777  0.285644  0.196529 -0.374995
c -1.575076  0.766560  0.597478 -0.423306
d  0.425084  0.580469  2.106732 -1.132376
e  0.973325 -0.138681  0.337150 -0.442461


In [6]:
print(df.loc['b'] > 0)

A    False
B     True
C     True
D    False
Name: b, dtype: bool


In [7]:
print(df.loc['b'][df.loc['b'] > 0])

B    0.285644
C    0.196529
Name: b, dtype: float64


In [8]:
print(df.loc[:,'A'] > 0)

a     True
b    False
c    False
d     True
e     True
f     True
g     True
h     True
Name: A, dtype: bool


##### .iloc
纯整数索引

In [9]:
df = pd.DataFrame(np.random.randn(8,4),
                 columns=['A', 'B', 'C', 'D'])
df

Unnamed: 0,A,B,C,D
0,0.874386,0.86523,0.472691,-0.388515
1,1.575397,-0.023746,2.142885,1.563591
2,-1.143848,0.03552,1.959298,0.111515
3,-0.7691,1.700512,-0.450816,-0.642036
4,-1.348551,1.896085,-1.177693,-0.205088
5,1.186869,-1.152398,-0.128733,0.53567
6,2.055234,1.083284,0.02062,0.373573
7,1.489464,1.630541,-0.860682,1.003008


In [10]:
print(df.iloc[:4])

          A         B         C         D
0  0.874386  0.865230  0.472691 -0.388515
1  1.575397 -0.023746  2.142885  1.563591
2 -1.143848  0.035520  1.959298  0.111515
3 -0.769100  1.700512 -0.450816 -0.642036


In [11]:
print(df.iloc[[2,3],1:3])

          B         C
2  0.035520  1.959298
3  1.700512 -0.450816


In [12]:
print(df.iloc[[1,3,5],[1,2,3]])

          B         C         D
1 -0.023746  2.142885  1.563591
3  1.700512 -0.450816 -0.642036
5 -1.152398 -0.128733  0.535670


##### .ix
除了基于纯标签和整数之外，Pandas还提供了一种使用.ix()运算符进行选择和子集化对象的混合方法

In [13]:
df = pd.DataFrame(np.random.randn(8,4),
                 columns=['A', 'B', 'C', 'D'])
df

Unnamed: 0,A,B,C,D
0,0.76283,-0.612935,1.527079,0.328054
1,1.352134,-1.275036,0.056576,-0.313414
2,1.147115,-0.991426,-0.702798,-1.007665
3,1.126891,1.251847,1.998576,-0.560872
4,-0.837773,-0.430303,-0.488772,-0.87968
5,-0.505453,-0.863307,-1.078854,1.053906
6,-1.16701,2.365442,1.409395,-0.537281
7,0.798214,1.605708,0.815651,-0.380213


In [14]:
print(df.ix[:4])

          A         B         C         D
0  0.762830 -0.612935  1.527079  0.328054
1  1.352134 -1.275036  0.056576 -0.313414
2  1.147115 -0.991426 -0.702798 -1.007665
3  1.126891  1.251847  1.998576 -0.560872
4 -0.837773 -0.430303 -0.488772 -0.879680


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.


In [15]:
print(df.ix[:,'B'])

0   -0.612935
1   -1.275036
2   -0.991426
3    1.251847
4   -0.430303
5   -0.863307
6    2.365442
7    1.605708
Name: B, dtype: float64


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.


In [16]:
print(df.ix[:,[2,3]])
print(df.ix[:,['C','D']])

          C         D
0  1.527079  0.328054
1  0.056576 -0.313414
2 -0.702798 -1.007665
3  1.998576 -0.560872
4 -0.488772 -0.879680
5 -1.078854  1.053906
6  1.409395 -0.537281
7  0.815651 -0.380213
          C         D
0  1.527079  0.328054
1  0.056576 -0.313414
2 -0.702798 -1.007665
3  1.998576 -0.560872
4 -0.488772 -0.879680
5 -1.078854  1.053906
6  1.409395 -0.537281
7  0.815651 -0.380213


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  


In [17]:
df

Unnamed: 0,A,B,C,D
0,0.76283,-0.612935,1.527079,0.328054
1,1.352134,-1.275036,0.056576,-0.313414
2,1.147115,-0.991426,-0.702798,-1.007665
3,1.126891,1.251847,1.998576,-0.560872
4,-0.837773,-0.430303,-0.488772,-0.87968
5,-0.505453,-0.863307,-1.078854,1.053906
6,-1.16701,2.365442,1.409395,-0.537281
7,0.798214,1.605708,0.815651,-0.380213


In [18]:
print(df['A'])
print(df[['A','B']])
print(df[2:4])

0    0.762830
1    1.352134
2    1.147115
3    1.126891
4   -0.837773
5   -0.505453
6   -1.167010
7    0.798214
Name: A, dtype: float64
          A         B
0  0.762830 -0.612935
1  1.352134 -1.275036
2  1.147115 -0.991426
3  1.126891  1.251847
4 -0.837773 -0.430303
5 -0.505453 -0.863307
6 -1.167010  2.365442
7  0.798214  1.605708
          A         B         C         D
2  1.147115 -0.991426 -0.702798 -1.007665
3  1.126891  1.251847  1.998576 -0.560872


##### 可以使用属性运算符.来选择列

In [19]:
print(df.C)

0    1.527079
1    0.056576
2   -0.702798
3    1.998576
4   -0.488772
5   -1.078854
6    1.409395
7    0.815651
Name: C, dtype: float64


### 增删改查

In [20]:
df = pd.DataFrame({'a': [1, 2, 3], 'b': ['a', 'b', 'c'],'c': ["A","B","C"]})
print(df)

   a  b  c
0  1  a  A
1  2  b  B
2  3  c  C


In [21]:
# 选择某行
print(df.loc[1,:])

a    2
b    b
c    B
Name: 1, dtype: object


In [24]:
#选择所有行
print(df.loc[::, 'c'])

0    A
1    B
2    C
Name: c, dtype: object


In [25]:
df.loc[:,'a'] >2

0    False
1    False
2     True
Name: a, dtype: bool

#### isin

In [26]:
s = pd.Series(np.arange(5), index=np.arange(5)[::-1], dtype='int64')
s

4    0
3    1
2    2
1    3
0    4
dtype: int64

In [27]:
s.isin([2,4,6])

4    False
3    False
2     True
1    False
0     True
dtype: bool