# Series和DataFrame的简单数学运算

In [1]:
import numpy as np
import pandas as pd
from pandas import Series
from pandas import DataFrame

In [4]:
s1 = Series([1,2,3],index=['A','B','C'])
s1

A    1
B    2
C    3
dtype: int64

In [5]:
s2 = Series([4,5,6,7],index=['B','C','D','E'])
s2

B    4
C    5
D    6
E    7
dtype: int64

In [6]:
# Series相加，是根据index相加的
s1 + s2

A    NaN
B    6.0
C    8.0
D    NaN
E    NaN
dtype: float64

## DataFrame运算

In [7]:
df1 = DataFrame(np.arange(4).reshape(2,2),index=['A','B'],columns=['BJ','SH'])
df1

Unnamed: 0,BJ,SH
A,0,1
B,2,3


In [8]:
df2 = DataFrame(np.arange(9).reshape(3,3),index=['A','B','C'],columns=['BJ','SH','GZ'])
df2

Unnamed: 0,BJ,SH,GZ
A,0,1,2
B,3,4,5
C,6,7,8


In [9]:
df1 + df2

Unnamed: 0,BJ,GZ,SH
A,0.0,,2.0
B,5.0,,7.0
C,,,


In [10]:
df3 = DataFrame([[1,2,3],[4,5,np.nan],[7,8,9]],index=['A','B','C'],columns=['BJ','SH','GZ'])
df3

Unnamed: 0,BJ,SH,GZ
A,1,2,3.0
B,4,5,
C,7,8,9.0


In [11]:
df3.sum()

BJ    12.0
SH    15.0
GZ    12.0
dtype: float64

In [12]:
df3.sum(axis=1)

A     6.0
B     9.0
C    24.0
dtype: float64

# Series和DataFrame的排序
## Series的排序

In [2]:
s1 = Series(np.random.randn(10))
s1

0   -0.587737
1    0.449568
2   -0.116989
3   -1.436653
4    1.545281
5    2.263602
6   -1.786431
7   -0.156642
8    0.866232
9   -1.021229
dtype: float64

In [4]:
s2 = s1.sort_values(ascending=False)
s2

5    2.263602
4    1.545281
8    0.866232
1    0.449568
2   -0.116989
7   -0.156642
0   -0.587737
9   -1.021229
3   -1.436653
6   -1.786431
dtype: float64

## DataFrame排序

In [6]:
df1 = DataFrame(np.random.randn(40).reshape(5,8),columns=list('ABCDEFGH'))
df1

Unnamed: 0,A,B,C,D,E,F,G,H
0,-0.111706,0.650607,-1.507147,2.236373,0.081369,0.384915,-0.546167,-1.371941
1,-1.157665,0.828504,-0.514134,-0.97485,0.930559,-1.880653,0.039765,1.049065
2,-0.514379,0.98566,0.966726,-0.743508,-0.122355,-0.974603,0.324743,0.202711
3,1.065124,0.490932,1.435294,0.00992,1.167809,0.108557,0.696401,-2.300613
4,-1.326682,-1.732278,2.60367,-0.331107,-1.171604,-0.192988,0.03989,1.378903


In [7]:
df1['A'].sort_values()

4   -1.326682
1   -1.157665
2   -0.514379
0   -0.111706
3    1.065124
Name: A, dtype: float64

In [9]:
df2 = df1.sort_values('A')
df2

Unnamed: 0,A,B,C,D,E,F,G,H
4,-1.326682,-1.732278,2.60367,-0.331107,-1.171604,-0.192988,0.03989,1.378903
1,-1.157665,0.828504,-0.514134,-0.97485,0.930559,-1.880653,0.039765,1.049065
2,-0.514379,0.98566,0.966726,-0.743508,-0.122355,-0.974603,0.324743,0.202711
0,-0.111706,0.650607,-1.507147,2.236373,0.081369,0.384915,-0.546167,-1.371941
3,1.065124,0.490932,1.435294,0.00992,1.167809,0.108557,0.696401,-2.300613


In [10]:
df2.sort_index()

Unnamed: 0,A,B,C,D,E,F,G,H
0,-0.111706,0.650607,-1.507147,2.236373,0.081369,0.384915,-0.546167,-1.371941
1,-1.157665,0.828504,-0.514134,-0.97485,0.930559,-1.880653,0.039765,1.049065
2,-0.514379,0.98566,0.966726,-0.743508,-0.122355,-0.974603,0.324743,0.202711
3,1.065124,0.490932,1.435294,0.00992,1.167809,0.108557,0.696401,-2.300613
4,-1.326682,-1.732278,2.60367,-0.331107,-1.171604,-0.192988,0.03989,1.378903


# 重命名DataFrame的index

In [4]:
df1 = DataFrame(np.arange(9).reshape(3,3),index=['BJ','SH','GZ'],columns=["A","B","C"])
df1

Unnamed: 0,A,B,C
BJ,0,1,2
SH,3,4,5
GZ,6,7,8


In [5]:
df1.index = ['bj','sh','gz']
df1

Unnamed: 0,A,B,C
bj,0,1,2
sh,3,4,5
gz,6,7,8


In [8]:
df1.index.map(str.upper)

Index(['BJ', 'SH', 'GZ'], dtype='object')

In [9]:
df1

Unnamed: 0,A,B,C
bj,0,1,2
sh,3,4,5
gz,6,7,8


In [11]:
df1.rename(index=str.upper,columns=str.lower)

Unnamed: 0,a,b,c
BJ,0,1,2
SH,3,4,5
GZ,6,7,8


In [12]:
df1

Unnamed: 0,A,B,C
bj,0,1,2
sh,3,4,5
gz,6,7,8


In [13]:
df1.rename(index={'bj':'beijing'},columns={'A':'a'})

Unnamed: 0,a,B,C
beijing,0,1,2
sh,3,4,5
gz,6,7,8
