In [2]:
import numpy as np
import pandas as pd

In [3]:
df1 = pd.DataFrame(np.arange(9).reshape(3,3),index = ['one', 'two', 'three'],columns=['A','B','C'])
df1

Unnamed: 0,A,B,C
one,0,1,2
two,3,4,5
three,6,7,8


1.通过属性的方式去修改索引  
语法：  
DataFrame.index/columns = 值

In [4]:
# 修改行索引
df1.index = ['NO1','NO2','NO3']
df1

Unnamed: 0,A,B,C
NO1,0,1,2
NO2,3,4,5
NO3,6,7,8


In [5]:
# 修改列索引
df1.columns = ['NOA','NOB','NOC']
df1

Unnamed: 0,NOA,NOB,NOC
NO1,0,1,2
NO2,3,4,5
NO3,6,7,8


2.使用rename的方式修改  
语法：  
DataFrame.rename(mapper=None, index=None, columns=None, axis=None, inplace=False)  
index:新的行索引数据
columns：用做新的列索引数据
axis:轴方向
inplace：是否对原数据进行修改

In [7]:
# 使用rename函数的方式来修改行索引
def df_rename(x):
    return x+'_'
#这里没有设置inplace的值为True所以原数据还是没有变化
df1.rename(index = df_rename)

Unnamed: 0,NOA,NOB,NOC
NO1_,0,1,2
NO2_,3,4,5
NO3_,6,7,8


In [18]:
# rename跟字典结合
# 字典内容：['原索引'：'修改后的索引']
df1.rename(columns={'NOA':'A','NOB':'B','NOC':'C'}, inplace=True)

3.将某一列数据设置为索引：  
语法：  
DataFrame.set_index(keys, drop = True, inplace = False)  
keys:需要设置为索引的列名  
drop:是否在原数据中删除该列  
inplace:是否对原数据进行修改

In [19]:
df1.set_index('A',drop=True)

Unnamed: 0_level_0,B,C
A,Unnamed: 1_level_1,Unnamed: 2_level_1
0,1,2
3,4,5
6,7,8


In [20]:
df1.set_index('A',drop=False)

Unnamed: 0_level_0,A,B,C
A,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0,1,2
3,3,4,5
6,6,7,8


4.将某条数据设置为索引  
DataFrame.set_axis(labels, axis = 0, inplace)  
labels:新索引的值  


In [28]:
df1.set_axis(df1['B'],axis = 1)

B,1,4,7
NO1,0,1,2
NO2,3,4,5
NO3,6,7,8


#### 添加数据

DataFrame.append(other, ignore_index:  = False, verify_integrity = False)  
other:需要添加的数据
ignore_index:是否给新加的数据设置索引。当添加的数据为字典时需要为True  
verify_integrity：是否禁止出现重复索引，如果设置为True，那么在出现重复索引时会报错。

In [31]:
# randint 生产随机整数 rand生产满足正太分布的数  randn生产满足标准正态分布的数
df2 = pd.DataFrame(np.random.rand(16).reshape(4,4)*100)
df2

Unnamed: 0,0,1,2,3
0,96.968523,84.652119,22.483794,78.636988
1,63.774104,14.243372,60.359393,44.791847
2,85.408744,34.903808,24.53237,58.289586
3,86.569151,42.250721,19.444005,94.002093


In [32]:
df2.iloc[0:2,:]

Unnamed: 0,0,1,2,3
0,96.968523,84.652119,22.483794,78.636988
1,63.774104,14.243372,60.359393,44.791847


In [33]:
# append不会修改原数据，如果需要保存修改后的结果那么需要拿变量来接收
df2.append(df2.iloc[0:2,:],
           # ignore_index:是否给新加的数据设置索引。
           ignore_index = True, 
           # verify_integrity：是否禁止出现重复索引
           verify_integrity = True)

Unnamed: 0,0,1,2,3
0,96.968523,84.652119,22.483794,78.636988
1,63.774104,14.243372,60.359393,44.791847
2,85.408744,34.903808,24.53237,58.289586
3,86.569151,42.250721,19.444005,94.002093
4,96.968523,84.652119,22.483794,78.636988
5,63.774104,14.243372,60.359393,44.791847


In [45]:
data_dict = {
    0:1,
    'B':5
}
data_dict

{0: 1, 'B': 5}

In [46]:
df2.append(data_dict, ignore_index=True)

Unnamed: 0,0,1,2,3,B
0,96.968523,84.652119,22.483794,78.636988,
1,63.774104,14.243372,60.359393,44.791847,
2,85.408744,34.903808,24.53237,58.289586,
3,86.569151,42.250721,19.444005,94.002093,
4,1.0,,,,5.0


使用insert在指定位置添加数据  
DataFrame.insert(loc, column, value, allow_duplicates: 'bool' = False)  
loc:int型数据，表示需要将数据添加到第几列
column：需要添加的列名  
value：需要添加的值  
allow_duplicates:是否允许列名重复

In [52]:
# 这里会对原数据进行修改，不会有返回结果
df2.insert(2,'A',[1,2,3,4],allow_duplicates = True)
df2

Unnamed: 0,0,1,A,A.1,2,3
0,96.968523,84.652119,1,1,22.483794,78.636988
1,63.774104,14.243372,2,2,60.359393,44.791847
2,85.408744,34.903808,3,3,24.53237,58.289586
3,86.569151,42.250721,4,4,19.444005,94.002093


#### 数据拼接

1.pd.concat(objs, axis=0, join='outer', ignore_index = False,verify_integrity = False)   
objs:需要合并的数据，如果书是多个需要用括号括起来  
axis：轴，数据合并的方式  
join：{'inner','outer'},其他轴上索引的处理方式，默认为‘outer’可以理解为并集，‘inner’可以理解为交集  
ignore_index：是否给新加的数据设置索引
verify_integrity:是否禁止出现重复索引

In [65]:
df3 = pd.DataFrame(np.arange(1,17).reshape(4,4),columns = ['A','B','C','D'])
df3

Unnamed: 0,A,B,C,D
0,1,2,3,4
1,5,6,7,8
2,9,10,11,12
3,13,14,15,16


In [66]:
df4 = pd.DataFrame(np.ones(16).reshape(4,4),columns = ['C','D','E','F'])
df4

Unnamed: 0,C,D,E,F
0,1.0,1.0,1.0,1.0
1,1.0,1.0,1.0,1.0
2,1.0,1.0,1.0,1.0
3,1.0,1.0,1.0,1.0


In [67]:
# 沿0的方向拼接
df5 = pd.concat([df3,df4])
df5

Unnamed: 0,A,B,C,D,E,F
0,1.0,2.0,3.0,4.0,,
1,5.0,6.0,7.0,8.0,,
2,9.0,10.0,11.0,12.0,,
3,13.0,14.0,15.0,16.0,,
0,,,1.0,1.0,1.0,1.0
1,,,1.0,1.0,1.0,1.0
2,,,1.0,1.0,1.0,1.0
3,,,1.0,1.0,1.0,1.0


In [72]:
# 沿1的方向拼接
df6 = pd.concat([df3,df4],
                axis = 0,# joint针对是列名
                # 其他轴上的处理方式
                join = 'outer', # 交集outer并集
                ignore_index = True
                )
df6

Unnamed: 0,A,B,C,D,E,F
0,1.0,2.0,3.0,4.0,,
1,5.0,6.0,7.0,8.0,,
2,9.0,10.0,11.0,12.0,,
3,13.0,14.0,15.0,16.0,,
4,,,1.0,1.0,1.0,1.0
5,,,1.0,1.0,1.0,1.0
6,,,1.0,1.0,1.0,1.0
7,,,1.0,1.0,1.0,1.0


2.pd.merge(left, right, how = 'inner', on = None, left_on = None, right_on = None, left_index = False, right_index = False, sort = False, suffixes)  
left:参与合并的左侧DataFrame/Series对象  
right：参与合并的右侧DataFrame/Series对象how:连接方式，默认‘inner’，可选参数：   
'left'：使用左侧的DataFrame的键，左连接  
'right'：使用DataFrame的键，右连接  
'outer'：使用2个DataFrame全部的键，外连接  
'inner'：使用2个DataFrame键的交集，内连接  
'on':用于连接的列名。必须存在于2个DataFrame对象中  
left_on:以左侧的DataFrame的键作为连接键
right_on:以右侧的DataFrame的键作为连接键
left_index:以左侧DataFrame的行索引作为连接键
right_index:以右侧DataFrame的行索引作为连接键
sort:是否排序
suffixes：用于追加到重叠列名的末尾，默认是（_x,_y)

In [75]:
left = pd.DataFrame({
    'key':['k0','k1','k2'],
    'A':['A0','A1','A2'],
    'B':['B0','B1','B2']
})
left

Unnamed: 0,key,A,B
0,k0,A0,B0
1,k1,A1,B1
2,k2,A2,B2


In [77]:
right = pd.DataFrame({
    'key':['k0','k1','k2','K3'],
    'C':['C0','C1','C2','C3'],
    'D':['D0','D1','D2','D3']
})
right

Unnamed: 0,key,C,D
0,k0,C0,D0
1,k1,C1,D1
2,k2,C2,D2
3,K3,C3,D3


In [79]:
# 外连接
pd.merge(right,left,how = 'outer')

Unnamed: 0,key,C,D,A,B
0,k0,C0,D0,A0,B0
1,k1,C1,D1,A1,B1
2,k2,C2,D2,A2,B2
3,K3,C3,D3,,


In [80]:
# 右连接
pd.merge(right,left,how = 'right')

Unnamed: 0,key,C,D,A,B
0,k0,C0,D0,A0,B0
1,k1,C1,D1,A1,B1
2,k2,C2,D2,A2,B2


In [86]:
left1 = pd.DataFrame({
    'A':['A0','A1','A2'],
    'B':['B0','B1','B2']
})
left1

Unnamed: 0,A,B
0,A0,B0
1,A1,B1
2,A2,B2


In [85]:
right1 = pd.DataFrame({
    'C':['C0','C1','C2','C3'],
    'D':['D0','D1','D2','D3']
})
right1

Unnamed: 0,C,D
0,C0,D0
1,C1,D1
2,C2,D2
3,C3,D3


In [84]:
# 使用merge来拼接2个没有共同列但右共同行的DataFrame对象
pd.merge(left1,right1,
         # 这里是使用2个DataFrame对象的行索引来连接
         left_index = True,right_index = True)

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2


In [91]:
left2 = pd.DataFrame({
    'A':['A0','A1','A2'],
    'B':['B0','B1','B2']
},index = ['one','two','three'])
left2

Unnamed: 0,A,B
one,A0,B0
two,A1,B1
three,A2,B2


In [92]:
right2 = pd.DataFrame({
    'C':['C0','C1','C2','C3'],
    'D':['D0','D1','D2','D3']
})
right2

Unnamed: 0,C,D
0,C0,D0
1,C1,D1
2,C2,D2
3,C3,D3


In [93]:
# 使用merge来拼接2个没有共同列但右共同行的DataFrame对象
pd.merge(left2,right2,
         # 这里是使用2个DataFrame对象的行索引来连接
         left_index = True,
         right_index = True，)

Unnamed: 0,A,B,C,D


3.DataFrame.join(other, on = None, how = 'left', lsuffix = '', rsuffix = '', sort = False)   
on:用于连接的列名
how:数据连接方式，可选参数：{'left', 'right', 'outer', 'inner'}
lsuffix:左侧重叠的列名添加后缀
rsuffix:右侧重叠的列名添加后缀
sort:是否根据连接键对合并的数据进行排序

In [100]:
left3 = pd.DataFrame({
    'a':['a0','a1'],
    'b':['b0','b1']
})
left3

Unnamed: 0,a,b
0,a0,b0
1,a1,b1


In [102]:
right3 = pd.DataFrame({
    'c':['c0','c1'],
    'd':['d0','d1']
},index = ['No1','No2'])
right3

Unnamed: 0,c,d
No1,c0,d0
No2,c1,d1


In [103]:
# 使用join合并2个没有共同行索引和列索引名的数据时，other对应的数据会用nan填充
right3.join(left3)

Unnamed: 0,c,d,a,b
No1,c0,d0,,
No2,c1,d1,,


#### 删除数据

1.使用del关键字

In [104]:
data =right3.join(left3)
data

Unnamed: 0,c,d,a,b
No1,c0,d0,,
No2,c1,d1,,


In [105]:
# del + 需要删除的数据
del data['a']

In [108]:
data

Unnamed: 0,c,d,b
No1,c0,d0,
No2,c1,d1,


2.DataFrame.drop(labels = None, axis = 0, index = None, columns = None,  inplace = False)  
labels:要删除的行或者列，如果需要删除多个，就需要传入一个列表    
axis：数据处理的方向  
index：需要删除的行  
columns：需要删除的列    
inplace:是否修改原数据  

In [109]:
data

Unnamed: 0,c,d,b
No1,c0,d0,
No2,c1,d1,


In [110]:
data.drop(['c','d'],axis = 1)

Unnamed: 0,b
No1,
No2,
