In [32]:
import pandas as pd
import numpy as np
test_df = pd.DataFrame([
    {"a": 1, 'b': 2, 'c': 3},
    {"a": 11, 'b': 22, 'c': 33},
    {"a": 111, 'b': 22, 'c': 333},
    {"a": 111, 'b': 44, 'c': 444},
    {"a": 5, 'b': 55, },
])
test_df

Unnamed: 0,a,b,c
0,1,2,3.0
1,11,22,33.0
2,111,22,333.0
3,111,44,444.0
4,5,55,


In [8]:
# 条件获取行/列数据
test_df.loc[test_df['a']==111, ['b', 'c']]

Unnamed: 0,b,c
2,222,333
3,444,444


In [9]:
# 修改列名称
test_df.rename({'c': 'cc', 'b': 'bb'}, axis=1)

Unnamed: 0,a,bb,cc
0,1,2,3
1,11,22,33
2,111,222,333
3,111,444,444


In [33]:
# 处理空值
print(test_df.fillna({'c': 0}))
print(test_df.fillna(method="ffill"))   # 使用该列上一个值填充当前nan值

     a   b      c
0    1   2    3.0
1   11  22   33.0
2  111  22  333.0
3  111  44  444.0
4    5  55    0.0
     a   b      c
0    1   2    3.0
1   11  22   33.0
2  111  22  333.0
3  111  44  444.0
4    5  55  444.0


In [14]:
# 设置列数据类型
test_df['b'].astype(str)

0     2
1    22
2    22
3    44
4    55
Name: b, dtype: object

In [16]:
# 统计
test_df.loc[test_df['c']!=np.nan].groupby(['a']).sum().reset_index()

Unnamed: 0,a,b,c
0,1,2,3.0
1,5,55,0.0
2,11,22,33.0
3,111,66,777.0


In [24]:
# 字段赋值
test_df.loc[:, 'd'] = test_df['a'] + test_df['c']
test_df.loc[:, 'e'] = test_df.apply(lambda v: v['a'] + v['c'], axis=1)
test_df.loc[:, 'f'] = test_df['a'].apply(lambda v: v * v)
test_df.loc[:, 'f'] = test_df['f'].astype(str)
test_df.loc[:, 'g'] = test_df['f'].str.slice(0, 2)
test_df


Unnamed: 0,a,b,c,d,e,f,g
0,1,2,3.0,4.0,4.0,1,1
1,11,22,33.0,44.0,44.0,121,12
2,111,22,333.0,444.0,444.0,12321,12
3,111,44,444.0,555.0,555.0,12321,12
4,5,55,,,,25,25


In [29]:
# 获取单列所有数值
print(test_df['f'].unique().tolist())   # 去重
print(test_df['f'].tolist())

['1', '121', '12321', '25']
['1', '121', '12321', '12321', '25']


In [36]:
# DataFrame 关联 merge
test2_df = pd.DataFrame([
    {"a": 5, 'b': 55, },
    {"a": 5, 'b': 55, },
])
pd.merge(test_df, test2_df, on=['a'], how='outer', suffixes=['', '_y'])

Unnamed: 0,a,b,c,b_y
0,1,2,3.0,
1,11,22,33.0,
2,111,22,333.0,
3,111,44,444.0,
4,5,55,,55.0
5,5,55,,55.0


In [37]:
# DataFrame 添加行
test_df.append([
{"a": 6, 'b': 66, 'c': 666},
{"a": 7, 'b': 77, },
], ignore_index=True)

Unnamed: 0,a,b,c
0,1,2,3.0
1,11,22,33.0
2,111,22,333.0
3,111,44,444.0
4,5,55,
5,6,66,666.0
6,7,77,


In [38]:
# DataFrame 合并
test3_df = pd.DataFrame([
    {"a": 8, 'b': 88, 'c': 888 },
    {"a": 9, 'b': 99, },
])
pd.concat([test_df, test3_df])

Unnamed: 0,a,b,c
0,1,2,3.0
1,11,22,33.0
2,111,22,333.0
3,111,44,444.0
4,5,55,
0,8,88,888.0
1,9,99,


In [43]:
# 判断 DataFrame 是否为空
print(test_df.empty)
print(pd.DataFrame({"a": [], "b": []}).empty)

False
True
