In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.DataFrame(
    np.random.randn(5, 3),
    index=["a", "c", "e", "f", "h"],
    columns=["one", "two", "three"],
)

df['four'] = 'bar'
df['five'] = df['one'] > 0

df = df.reindex(["a", "b", "c", "d", "e", "f", "g", "h"])
df

Unnamed: 0,one,two,three,four,five
a,-0.438646,0.364397,1.831542,bar,False
b,,,,,
c,1.976154,-0.255192,0.29976,bar,True
d,,,,,
e,-1.053312,0.032852,-1.627264,bar,False
f,-0.738551,-0.214429,-0.013392,bar,False
g,,,,,
h,-0.769934,0.943905,-0.233727,bar,False


In [3]:
# 1. 判断是否有空值
# df.isna()

# df.notna()

df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 8 entries, a to h
Data columns (total 5 columns):
one      5 non-null float64
two      5 non-null float64
three    5 non-null float64
four     5 non-null object
five     5 non-null object
dtypes: float64(3), object(2)
memory usage: 384.0+ bytes


In [4]:
# 2. 填充NAN
df['two'].fillna(0)

a    0.364397
b    0.000000
c   -0.255192
d    0.000000
e    0.032852
f   -0.214429
g    0.000000
h    0.943905
Name: two, dtype: float64

In [5]:
df['two'].fillna(df['two'].mean())

a    0.364397
b    0.174306
c   -0.255192
d    0.174306
e    0.032852
f   -0.214429
g    0.174306
h    0.943905
Name: two, dtype: float64

In [6]:
# 3. 删除NAN
df.dropna(axis=0)

Unnamed: 0,one,two,three,four,five
a,-0.438646,0.364397,1.831542,bar,False
c,1.976154,-0.255192,0.29976,bar,True
e,-1.053312,0.032852,-1.627264,bar,False
f,-0.738551,-0.214429,-0.013392,bar,False
h,-0.769934,0.943905,-0.233727,bar,False


In [7]:
df.dropna(axis=1)

a
b
c
d
e
f
g
h


In [8]:
# 4. 替换
d = {"a": list(range(4)), "b": list("ab.."), "c": ["a", "b", np.nan, "d"]}
df = pd.DataFrame(d)
df

Unnamed: 0,a,b,c
0,0,a,a
1,1,b,b
2,2,.,
3,3,.,d


In [9]:
# df.replace(".", np.NAN)
df.replace(np.NAN, "0")

Unnamed: 0,a,b,c
0,0,a,a
1,1,b,b
2,2,.,0
3,3,.,d
