In [3]:
import pandas as pd
import numpy as np

In [4]:
toy_df = pd.DataFrame({"col1" : [100, np.nan, np.nan, np.nan, np.nan, 600, np.nan, 800],
                       "col2" : [1, 2, 3, 4, 5, 6, 7, 8]})

In [3]:
toy_df

Unnamed: 0,col1,col2
0,100.0,1
1,,2
2,,3
3,,4
4,,5
5,600.0,6
6,,7
7,800.0,8


### This will return a dataframe with boolean values indicating whether a value was null or not

In [4]:
toy_df.isnull()

Unnamed: 0,col1,col2
0,False,False
1,True,False
2,True,False
3,True,False
4,True,False
5,False,False
6,True,False
7,False,False


### This will return the count of null values in each column

In [5]:
toy_df.isnull().sum()

col1    5
col2    0
dtype: int64

### This will fill in null values with the value in the dataframe right before that

In [6]:
toy_df.fillna(method = "pad")

Unnamed: 0,col1,col2
0,100.0,1
1,100.0,2
2,100.0,3
3,100.0,4
4,100.0,5
5,600.0,6
6,600.0,7
7,800.0,8


### This will fill in null values with the value in the dataframe right before it, but upto a limit of 1

In [7]:
toy_df.fillna(method = "pad", limit = 1)

Unnamed: 0,col1,col2
0,100.0,1
1,100.0,2
2,,3
3,,4
4,,5
5,600.0,6
6,600.0,7
7,800.0,8


### This will fill in missing values with the value in the dataframe right before it

In [8]:
toy_df.fillna(method = "ffill")

Unnamed: 0,col1,col2
0,100.0,1
1,100.0,2
2,100.0,3
3,100.0,4
4,100.0,5
5,600.0,6
6,600.0,7
7,800.0,8


### This will drop rows with null values in them

In [5]:
toy_df.dropna(axis = 0)

Unnamed: 0,col1,col2
0,100.0,1
5,600.0,6
7,800.0,8


### This will drop columns with null values in them

In [6]:
toy_df.dropna(axis = 1)

Unnamed: 0,col2
0,1
1,2
2,3
3,4
4,5
5,6
6,7
7,8


### This will drop the column if the null values values in it exceeds half the number of records in a dataframe

In [7]:
toy_df.dropna(thresh = int(toy_df.shape[0] * 0.5), axis = 1)

Unnamed: 0,col2
0,1
1,2
2,3
3,4
4,5
5,6
6,7
7,8


### This will fill all the null values with the mean of col1

In [8]:
toy_df.fillna(toy_df["col1"].mean())

Unnamed: 0,col1,col2
0,100.0,1
1,500.0,2
2,500.0,3
3,500.0,4
4,500.0,5
5,600.0,6
6,500.0,7
7,800.0,8


### This will interpolate the null values and fill them in

In [9]:
toy_df.interpolate()

Unnamed: 0,col1,col2
0,100.0,1
1,200.0,2
2,300.0,3
3,400.0,4
4,500.0,5
5,600.0,6
6,700.0,7
7,800.0,8


### This will replace all the null values in toy_df with 0

In [10]:
toy_df.replace(np.nan, 0)

Unnamed: 0,col1,col2
0,100.0,1
1,0.0,2
2,0.0,3
3,0.0,4
4,0.0,5
5,600.0,6
6,0.0,7
7,800.0,8


In [11]:
toy_df = pd.DataFrame({"col1" : [1, 2, np.inf, -np.inf, np.nan],
                       "col2" : [100, np.nan, np.inf, -np.inf, 200]})

In [12]:
toy_df

Unnamed: 0,col1,col2
0,1.0,100.0
1,2.0,
2,inf,inf
3,-inf,-inf
4,,200.0


In [13]:
toy_df.isnull().sum()

col1    1
col2    1
dtype: int64

### This will ensure that pandas counts infinity and negative infinity as null values

In [14]:
pd.options.mode.use_inf_as_na = True

### Now when we count the number of null values, pandas also counts the infinity and negative infinity values


In [16]:
toy_df.isnull().sum()

col1    3
col2    3
dtype: int64