# 索引的重建

In [1]:
import  numpy as np
import pandas as pd

In [4]:
# Series索引重建实现排序
ser1 = pd.Series(np.random.randint(10,69,5),index=list("dcaeb"))
ser1

d    57
c    41
a    46
e    50
b    14
dtype: int32

In [8]:
# 重建索引排序
ser1.reindex(list("abcde"))

a    46
b    14
c    41
d    57
e    50
dtype: int32

In [18]:
# Series的删除

ser1.reindex(list("abc"))

a    46
b    14
c    41
dtype: int32

In [11]:
# 数据框进行索引重建
pan1 = pd.DataFrame(np.random.randint(10,69,(5,6)),index=list("dcaeb"),
                    columns=list("a"+str(i) for i in range(6)))
pan1

Unnamed: 0,a0,a1,a2,a3,a4,a5
d,26,60,23,10,42,60
c,53,49,63,12,42,35
a,60,65,42,20,60,26
e,49,66,59,15,39,17
b,29,61,16,49,19,47


In [12]:
# 数据框的行排序
# 一次性
pan1.reindex(index=list("abcde"))

Unnamed: 0,a0,a1,a2,a3,a4,a5
a,60,65,42,20,60,26
b,29,61,16,49,19,47
c,53,49,63,12,42,35
d,26,60,23,10,42,60
e,49,66,59,15,39,17


In [15]:
# 数据框的列排序
# 一次性
pan1.reindex(columns=["a"+str(i) for i in range(5,-1,-1)])

Unnamed: 0,a5,a4,a3,a2,a1,a0
d,60,42,10,23,60,26
c,35,42,12,63,49,53
a,26,60,20,42,65,60
e,17,39,15,59,66,49
b,47,19,49,16,61,29


In [24]:
# 数据框的删除
# 行删除
pan1.reindex(index=list("dc"))

Unnamed: 0,a0,a1,a2,a3,a4,a5
d,26,60,23,10,42,60
c,53,49,63,12,42,35


In [26]:
# 数据框的删除
# 列删除
pan1.reindex(columns=list(["a2","a3","a4"]))

Unnamed: 0,a2,a3,a4
d,23,10,42
c,63,12,42
a,42,20,60
e,59,15,39
b,16,49,19


In [30]:
pan1

Unnamed: 0,a0,a1,a2,a3,a4,a5
d,26,60,23,10,42,60
c,53,49,63,12,42,35
a,60,65,42,20,60,26
e,49,66,59,15,39,17
b,29,61,16,49,19,47


In [37]:
# 数据框对的添加
# 列添加
pan1["a7"] = [12,13,14,15,16]
pan1

Unnamed: 0,a0,a1,a2,a3,a4,a5,a7
d,26,60,23,10,42,60,12
c,53,49,63,12,42,35,13
a,60,65,42,20,60,26,14
e,49,66,59,15,39,17,15
b,29,61,16,49,19,47,16


#### drop删除

In [41]:
# drop对Series的删除
ser3 = pd.Series(np.random.randint(10,69,10),index=["r"+str(i) for i in range(10)])
ser3

r0    55
r1    13
r2    42
r3    10
r4    11
r5    51
r6    46
r7    34
r8    22
r9    55
dtype: int32

In [43]:
# 删除
ser3.drop(["r0","r1","r2"])

r3    10
r4    11
r5    51
r6    46
r7    34
r8    22
r9    55
dtype: int32

In [44]:
# drop删除数据框数据

pan4 = pd.DataFrame(np.random.randint(10,69,(5,6)),index=list("dcaeb"),
                    columns=list("a"+str(i) for i in range(6)))
pan4

Unnamed: 0,a0,a1,a2,a3,a4,a5
d,60,16,40,31,36,39
c,44,26,62,65,65,32
a,40,30,10,24,57,60
e,43,18,60,17,44,17
b,18,56,14,67,43,41


In [45]:
# drop删除行，axis=0
pan4.drop(["d","c","a"],axis=0)

Unnamed: 0,a0,a1,a2,a3,a4,a5
e,43,18,60,17,44,17
b,18,56,14,67,43,41


In [46]:
# drop删除列，axis=1
pan4.drop(["a1","a2","a3"],axis=1)

Unnamed: 0,a0,a4,a5
d,60,36,39
c,44,65,32
a,40,57,60
e,43,44,17
b,18,43,41


In [47]:
# 花式索引进行删除
pan5 = pd.DataFrame(np.random.randint(10,69,(5,6)),index=list("dcaeb"),
                    columns=list("a"+str(i) for i in range(6)))
pan5

Unnamed: 0,a0,a1,a2,a3,a4,a5
d,37,66,34,63,25,57
c,68,15,51,35,62,11
a,15,50,19,21,46,34
e,12,46,64,47,31,65
b,48,47,65,48,29,62


In [53]:
pan5.loc[["d","c","a"],:]

Unnamed: 0,a0,a1,a2,a3,a4,a5
d,37,66,34,63,25,57
c,68,15,51,35,62,11
a,15,50,19,21,46,34


In [55]:
pan5.loc[:,["a0","a1"]]

Unnamed: 0,a0,a1
d,37,66
c,68,15
a,15,50
e,12,46
b,48,47


In [56]:
# 使用花式索引取出具体某一个值
pan5.loc[["c","a"],"a1"]

c    15
a    50
Name: a1, dtype: int32

In [58]:
# 使用花式索引取出某一个区
pan5.loc[list("dca"),["a0","a1"]]

Unnamed: 0,a0,a1
d,37,66
c,68,15
a,15,50


##### 布尔值做索引

In [59]:
# 使用布尔索引进行
pan6 = pd.DataFrame(np.random.randint(10,69,(5,6)),index=list("abcde"),
                    columns=list("b"+str(i) for i in range(6)))
pan6

Unnamed: 0,b0,b1,b2,b3,b4,b5
a,68,66,32,23,33,33
b,65,43,54,25,21,41
c,47,45,65,62,22,43
d,22,11,41,30,12,68
e,53,40,17,58,38,42


In [66]:
(pan6["b0"]>30)&(pan6["b0"]<50)

a    False
b    False
c     True
d    False
e    False
Name: b0, dtype: bool

In [85]:
# 将一列中满足条件的选中
pan6[(pan6["b0"]>30)&(pan6["b0"]<50)]

Unnamed: 0,b0,b1,b2,b3,b4,b5
c,47,45,65,62,22,43


In [89]:
# 将一行中满足条件的选中
pan6.loc[:,(pan6.loc["b"]>30)&(pan6.loc["b"]<50)]

Unnamed: 0,b1,b5
a,66,33
b,43,41
c,45,43
d,11,68
e,40,42


In [92]:
(pan6["b0"]>30)&(pan6["b0"]<50)

a    False
b    False
c     True
d    False
e    False
Name: b0, dtype: bool

In [91]:
(pan6.loc["b"]>30)&(pan6.loc["b"]<50)

b0    False
b1     True
b2    False
b3    False
b4    False
b5     True
Name: b, dtype: bool

In [90]:
# 某行某列满足条件的进行选区
pan6.loc[(pan6["b0"]>30)&(pan6["b0"]<50),(pan6.loc["b"]>30)&(pan6.loc["b"]<50)]

Unnamed: 0,b1,b5
c,45,43


In [115]:
pan8 = pd.DataFrame(np.random.randint(10,69,(5,6)),index=list("abcde"),
                    columns=list("b"+str(i) for i in range(6)))
pan8

Unnamed: 0,b0,b1,b2,b3,b4,b5
a,41,24,65,54,20,45
b,27,16,45,66,61,48
c,55,54,65,59,34,48
d,59,34,30,51,65,30
e,53,45,24,33,16,56


In [137]:
# b2列下满足条件的行
pan8[(pan8["b2"]>25)&(pan8["b2"]<60)]

Unnamed: 0,b0,b1,b2,b3,b4,b5
b,27,16,45,66,61,48
d,59,34,30,51,65,30


In [133]:
# b行下满足条件的所有行
pan8.loc[:,pan8.loc["b"]>45]

Unnamed: 0,b3,b4,b5
a,54,20,45
b,66,61,48
c,59,34,48
d,51,65,30
e,33,16,56


In [138]:
# 满足b2列下的行，行下的满足b行>45的条件的所有行
pan8[(pan8["b2"]>25)&(pan8["b2"]<60)].loc[:,pan8.loc["b"]>45]

Unnamed: 0,b3,b4,b5
b,66,61,48
d,51,65,30


##### 字符串Series的向量处理

In [97]:
ser4 = pd.Series(["awdw","zadwrww111","wweeee777",np.nan,"1rrett"],index=list("abcde"))
ser4

a          awdw
b    zadwrww111
c     wweeee777
d           NaN
e        1rrett
dtype: object

In [98]:
# 统计字符串Series长度
ser4.count()

4

In [99]:
# 统计每个字符串的长度
ser4.str.len()

a     4.0
b    10.0
c     9.0
d     NaN
e     6.0
dtype: float64

In [100]:
# 使用join（），以什么格式将字符串进行分割
ser4.str.join(",")

a                a,w,d,w
b    z,a,d,w,r,w,w,1,1,1
c      w,w,e,e,e,e,7,7,7
d                    NaN
e            1,r,r,e,t,t
dtype: object

In [101]:
# 将字符串改为大写
ser4.str.upper()

a          AWDW
b    ZADWRWW111
c     WWEEEE777
d           NaN
e        1RRETT
dtype: object

In [110]:
# 将字符串的长度改为20，长度不够从右部进行添加，添加字符为m
res = ser4.str.pad(20,side="right",fillchar="m")
res

a    awdwmmmmmmmmmmmmmmmm
b    zadwrww111mmmmmmmmmm
c    wweeee777mmmmmmmmmmm
d                     NaN
e    1rrettmmmmmmmmmmmmmm
dtype: object

In [111]:
# replace()函数将字符进行替换
res.str.replace("m","-")

a    awdw----------------
b    zadwrww111----------
c    wweeee777-----------
d                     NaN
e    1rrett--------------
dtype: object

In [112]:
# split()以什么方式进行切割
res.str.split("m")

a    [awdw, , , , , , , , , , , , , , , , ]
b          [zadwrww111, , , , , , , , , , ]
c         [wweeee777, , , , , , , , , , , ]
d                                       NaN
e      [1rrett, , , , , , , , , , , , , , ]
dtype: object

In [114]:
# strip()去除前后空格
ser4.str.strip()

a          awdw
b    zadwrww111
c     wweeee777
d           NaN
e        1rrett
dtype: object