In [1]:
import pandas as pd
obj = pd.Series([4, 5, 6, -7])
print(obj)

print(obj.index) # 打印索引
print(obj.values) # 打印值

0    4
1    5
2    6
3   -7
dtype: int64
RangeIndex(start=0, stop=4, step=1)
[ 4  5  6 -7]


In [2]:
obj2 = pd.Series([4, 5, 6, 3], index = {'a', 'b', 'd', 'c'})
print(obj2)
obj2['c'] = 6
print(obj2)
print('c' in obj2) # 判断是否存在键'c'
print('f' in obj2)

# 将字典转化到我们的series中
sdata = {'beijing' : 35000, 'shanghai' : 70000, 'guangzhou' : 16000, 'shenzhen' : 5000}
obj3 = pd.Series(sdata)
print(obj3)
obj3.index = ['bj', 'sh', 'gz', 'sz'] # 修改索引为它们的缩写
print(obj3)

d    4
c    5
b    6
a    3
dtype: int64
d    4
c    6
b    6
a    3
dtype: int64
True
False
beijing      35000
shanghai     70000
guangzhou    16000
shenzhen      5000
dtype: int64
bj    35000
sh    70000
gz    16000
sz     5000
dtype: int64


In [5]:
data = {'city' : ['shanghai', 'guangzhou', 'beijing', 'shenzhen'],
        'year' : [2016, 2017, 2018, 2019],
        'pop' : [1.5, 1.8, 1.3, 2.0]}
frame = pd.DataFrame(data)
print(frame)

        city  year  pop
0   shanghai  2016  1.5
1  guangzhou  2017  1.8
2    beijing  2018  1.3
3   shenzhen  2019  2.0


In [7]:
frame2 = pd.DataFrame(data, columns = ['year', 'city', 'pop'])
print(frame2)

# 将二维表格转换为一维的数据
print(frame2['city'])
print(frame2.year)

   year       city  pop
0  2016   shanghai  1.5
1  2017  guangzhou  1.8
2  2018    beijing  1.3
3  2019   shenzhen  2.0
0     shanghai
1    guangzhou
2      beijing
3     shenzhen
Name: city, dtype: object
0    2016
1    2017
2    2018
3    2019
Name: year, dtype: int64


In [8]:
# 为pandas增加一个新的列
frame2['cap'] = frame2.city == 'beijing'
print(frame2)

   year       city  pop    cap
0  2016   shanghai  1.5  False
1  2017  guangzhou  1.8  False
2  2018    beijing  1.3   True
3  2019   shenzhen  2.0  False


In [11]:
pop = { 'beijing' : {2008:1.5, 2009:2.0},
        'shanghai' : {2008:2.0, 2009:3.6}
       }
frame3 = pd.DataFrame(pop)
print(frame3)
# 行和列的转换
print(frame3.T)

      beijing  shanghai
2008      1.5       2.0
2009      2.0       3.6
          2008  2009
beijing    1.5   2.0
shanghai   2.0   3.6


In [14]:
obj4 = pd.Series([4.5, 7.2, -5.3, 3.6], index = ['b', 'd', 'c', 'a'])
obj5 = obj4.reindex(['a', 'b', 'c', 'd', 'e']) # 按索引顺序排序，不存在的索引将出现空值
print(obj5)

a    3.6
b    4.5
c   -5.3
d    7.2
e    NaN
dtype: float64


In [16]:
obj6 = obj4.reindex(['a', 'b', 'c', 'd', 'e'], fill_value = 0) # 给空值填充0
print(obj6)

a    3.6
b    4.5
c   -5.3
d    7.2
e    0.0
dtype: float64


In [19]:
obj7 = pd.Series(['blue', 'yellow', 'pink'], index = [0, 2, 4])
print(obj7.reindex(range(6)))
print(obj7.reindex(range(6), method = 'ffill')) # 空值填充为其前一个数值

0      blue
1       NaN
2    yellow
3       NaN
4      pink
5       NaN
dtype: object
0      blue
1      blue
2    yellow
3    yellow
4      pink
5      pink
dtype: object


In [25]:
from numpy import nan as NA
# 删除缺失值的一行数据
data = pd.Series([1, NA, 2])
print(data.dropna())

0    1.0
2    2.0
dtype: float64


In [31]:
# 在DataFrame删除缺失值的情况
# 1. 某一列的某一行有缺失
data = pd.DataFrame([[1.,  6.5, 3], [1., NA, NA], [NA, NA, NA]])
data[4] = NA
print(data)
print(data.dropna()) # 发现只要出现了na的一行就会被全部删掉
print(data.dropna(how = 'all')) # 只删掉全部出现na的一行
print(data.dropna(axis = 1, how = 'all')) # 删掉全部出现na的一列
data.fillna(0) # 这种方法是对data副本进行填充0，并返回，并不会直接修改data
data.fillna(0, inplace = True) # 使用了inplace参数则代表是对data进行填充修改
print(data)

     0    1    2   4
0  1.0  6.5  3.0 NaN
1  1.0  NaN  NaN NaN
2  NaN  NaN  NaN NaN
Empty DataFrame
Columns: [0, 1, 2, 4]
Index: []
     0    1    2   4
0  1.0  6.5  3.0 NaN
1  1.0  NaN  NaN NaN
     0    1    2
0  1.0  6.5  3.0
1  1.0  NaN  NaN
2  NaN  NaN  NaN
     0    1    2    4
0  1.0  6.5  3.0  0.0
1  1.0  0.0  0.0  0.0
2  0.0  0.0  0.0  0.0


In [38]:
import numpy as np
data3 = pd.Series(np.random.randn(10),
               index = [['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'd', 'd'],
                        [1, 2, 3, 1, 2, 3, 1, 2, 2, 3]])
# 层次化索引的用途
print(data3)
print(data3.unstack()) # 将一维数据转化为二维数据
print(data3.unstack().stack()) # 再转换回来

a  1    0.483451
   2   -0.008558
   3    0.402170
b  1   -0.105652
   2   -1.403142
   3   -0.091159
c  1    1.223893
   2   -1.952023
d  2   -1.181098
   3    1.774148
dtype: float64
          1         2         3
a  0.483451 -0.008558  0.402170
b -0.105652 -1.403142 -0.091159
c  1.223893 -1.952023       NaN
d       NaN -1.181098  1.774148
a  1    0.483451
   2   -0.008558
   3    0.402170
b  1   -0.105652
   2   -1.403142
   3   -0.091159
c  1    1.223893
   2   -1.952023
d  2   -1.181098
   3    1.774148
dtype: float64
