## 算术和数据对齐

In [1]:
import pandas as pd
s1 = pd.Series([7.3,-2.5,3.4,1.5],index=['a','c','d','e'])
s1

a    7.3
c   -2.5
d    3.4
e    1.5
dtype: float64

In [2]:
s2 = pd.Series([-2.1,3.6,-1.5,4,3.1],index=['a','c','e','f','g'])
s2

a   -2.1
c    3.6
e   -1.5
f    4.0
g    3.1
dtype: float64

In [9]:
s1+s2

a    5.2
c    1.1
d    NaN
e    0.0
f    NaN
g    NaN
dtype: float64

In [11]:
import numpy as np
df1 = pd.DataFrame(np.arange(9.).reshape((3,3)),columns=list('bcd'),index=['Ohio','Texas','Colorado'])
df1

Unnamed: 0,b,c,d
Ohio,0.0,1.0,2.0
Texas,3.0,4.0,5.0
Colorado,6.0,7.0,8.0


In [12]:
df2 = pd.DataFrame(np.arange(12.).reshape((4,3)),columns=list('bde'),index=['Utah','Ohio','Texas','Oregon'])
df2

Unnamed: 0,b,d,e
Utah,0.0,1.0,2.0
Ohio,3.0,4.0,5.0
Texas,6.0,7.0,8.0
Oregon,9.0,10.0,11.0


In [13]:
df1+df2

Unnamed: 0,b,c,d,e
Colorado,,,,
Ohio,3.0,,6.0,
Oregon,,,,
Texas,9.0,,12.0,
Utah,,,,


In [14]:
df1.add(df2,fill_value=0)

Unnamed: 0,b,c,d,e
Colorado,6.0,7.0,8.0,
Ohio,3.0,1.0,6.0,5.0
Oregon,9.0,,10.0,11.0
Texas,9.0,4.0,12.0,8.0
Utah,0.0,,1.0,2.0


### DataFrame和Series之间的运算

In [15]:
arr = np.arange(12.).reshape((3,4))
arr

array([[ 0.,  1.,  2.,  3.],
       [ 4.,  5.,  6.,  7.],
       [ 8.,  9., 10., 11.]])

In [16]:
arr[0]

array([0., 1., 2., 3.])

In [17]:
arr - arr[0]

array([[0., 0., 0., 0.],
       [4., 4., 4., 4.],
       [8., 8., 8., 8.]])

In [18]:
frame = pd.DataFrame(np.arange(12.).reshape((4,3)),columns=list('bde'),index=['Utah','Ohio','Texas','Oregon'])
frame

Unnamed: 0,b,d,e
Utah,0.0,1.0,2.0
Ohio,3.0,4.0,5.0
Texas,6.0,7.0,8.0
Oregon,9.0,10.0,11.0


In [21]:
series = frame.iloc[0]
series

b    0.0
d    1.0
e    2.0
Name: Utah, dtype: float64

In [22]:
frame-series

Unnamed: 0,b,d,e
Utah,0.0,0.0,0.0
Ohio,3.0,3.0,3.0
Texas,6.0,6.0,6.0
Oregon,9.0,9.0,9.0


In [23]:
series2 = pd.Series(range(3),index=['b','e','f'])
series2

b    0
e    1
f    2
dtype: int64

In [24]:
frame+series2

Unnamed: 0,b,d,e,f
Utah,0.0,,3.0,
Ohio,3.0,,6.0,
Texas,6.0,,9.0,
Oregon,9.0,,12.0,


In [25]:
series3 = frame['d']
series3

Utah       1.0
Ohio       4.0
Texas      7.0
Oregon    10.0
Name: d, dtype: float64

In [26]:
frame

Unnamed: 0,b,d,e
Utah,0.0,1.0,2.0
Ohio,3.0,4.0,5.0
Texas,6.0,7.0,8.0
Oregon,9.0,10.0,11.0


In [28]:
frame.sub(series3,axis=0)

Unnamed: 0,b,d,e
Utah,-1.0,0.0,1.0
Ohio,-1.0,0.0,1.0
Texas,-1.0,0.0,1.0
Oregon,-1.0,0.0,1.0


### 函数应用和映射

In [32]:
frame = pd.DataFrame(np.random.randn(4,3),columns=list('bde'),index=['Utah','Ohio','Texas','Ore'])
frame

Unnamed: 0,b,d,e
Utah,-0.427037,1.409723,-1.338799
Ohio,1.890271,0.289196,-0.24304
Texas,-0.158083,0.659819,0.332092
Ore,0.561596,-1.137378,-0.968764


In [33]:
np.abs(frame)

Unnamed: 0,b,d,e
Utah,0.427037,1.409723,1.338799
Ohio,1.890271,0.289196,0.24304
Texas,0.158083,0.659819,0.332092
Ore,0.561596,1.137378,0.968764


In [34]:
f = lambda x:x.max() - x.min()
frame.apply(f)

b    2.317309
d    2.547101
e    1.670891
dtype: float64

In [35]:
frame.apply(f,axis=1)

Utah     2.748522
Ohio     2.133312
Texas    0.817902
Ore      1.698975
dtype: float64

In [37]:
def f(x):
    return pd.Series([x.min(),x.max()],index=['min','max'])

frame.apply(f)

Unnamed: 0,b,d,e
min,-0.427037,-1.137378,-1.338799
max,1.890271,1.409723,0.332092


In [38]:
format = lambda x:'%.2f' %x
frame.applymap(format)

Unnamed: 0,b,d,e
Utah,-0.43,1.41,-1.34
Ohio,1.89,0.29,-0.24
Texas,-0.16,0.66,0.33
Ore,0.56,-1.14,-0.97


In [39]:
frame['e'].map(format)

Utah     -1.34
Ohio     -0.24
Texas     0.33
Ore      -0.97
Name: e, dtype: object

### 排序和排名

In [40]:
obj = pd.Series(range(4),index=['d','a','b','c'])
obj

d    0
a    1
b    2
c    3
dtype: int64

In [41]:
obj.sort_index()

a    1
b    2
c    3
d    0
dtype: int64

In [42]:
frame = pd.DataFrame(np.arange(8).reshape((2,4)),index=['three','one'],columns=['d','a','b','c'])
frame.sort_index()

Unnamed: 0,d,a,b,c
one,4,5,6,7
three,0,1,2,3


In [43]:
frame.sort_index(axis=1)

Unnamed: 0,a,b,c,d
three,1,2,3,0
one,5,6,7,4


In [44]:
frame.sort_index(axis=1,ascending=False)

Unnamed: 0,d,c,b,a
three,0,3,2,1
one,4,7,6,5


In [47]:
frame = pd.DataFrame({'b':[4,7,-3,2],'a':[0,1,0,1]})
frame

Unnamed: 0,b,a
0,4,0
1,7,1
2,-3,0
3,2,1


In [48]:
frame.sort_index(by='b')

  """Entry point for launching an IPython kernel.


Unnamed: 0,b,a
2,-3,0
3,2,1
0,4,0
1,7,1


In [49]:
frame.sort_index(by=['a','b'])

  """Entry point for launching an IPython kernel.


Unnamed: 0,b,a
2,-3,0
0,4,0
3,2,1
1,7,1


In [50]:
obj = pd.Series([7,-5,7,4,2,0,4])

In [51]:
obj.rank()

0    6.5
1    1.0
2    6.5
3    4.5
4    3.0
5    2.0
6    4.5
dtype: float64

In [52]:
obj.rank(method='first')

0    6.0
1    1.0
2    7.0
3    4.0
4    3.0
5    2.0
6    5.0
dtype: float64

In [53]:
obj.rank(ascending=False,method='max')

0    2.0
1    7.0
2    2.0
3    4.0
4    5.0
5    6.0
6    4.0
dtype: float64

### 带有重复值的轴索引

In [55]:
obj = pd.Series(range(5),index=['a','a','b','b','c'])
obj

a    0
a    1
b    2
b    3
c    4
dtype: int64

In [56]:
obj.index.is_unique

False

In [57]:
obj['a']

a    0
a    1
dtype: int64

In [58]:
obj['b']

b    2
b    3
dtype: int64

In [60]:
df = pd.DataFrame(np.random.randn(4,3),index=['a','a','b','b'])
df

Unnamed: 0,0,1,2
a,-0.140137,-0.902333,0.8087
a,-0.990744,-1.291059,-1.290603
b,0.459129,-0.738205,-0.883468
b,0.180628,0.366212,1.942733


In [62]:
df.loc['b']

Unnamed: 0,0,1,2
b,0.459129,-0.738205,-0.883468
b,0.180628,0.366212,1.942733
