# #️⃣ Quick Solutions for NaN Values

#### » Create a dataframe containing NaN values 

In [1]:
import numpy as np
import pandas as pd
vector1 = np.array([5,3,6,np.nan,7,2,8,np.nan,4,np.nan])
vector2 = np.array([10,1,5,3,2,8,np.nan,6,np.nan,11])
vector3 = np.array([np.nan,7,6,np.nan,7,np.nan,9,np.nan,1,np.nan])
df = pd.DataFrame({"V1":vector1,"V2":vector2,"V3":vector3})
df

Unnamed: 0,V1,V2,V3
0,5.0,10.0,
1,3.0,1.0,7.0
2,6.0,5.0,6.0
3,,3.0,
4,7.0,2.0,7.0
5,2.0,8.0,
6,8.0,,9.0
7,,6.0,
8,4.0,,1.0
9,,11.0,


#### » Display the dataframe with boolean related to being NaN

In [2]:
df.isnull()

Unnamed: 0,V1,V2,V3
0,False,False,True
1,False,False,False
2,False,False,False
3,True,False,True
4,False,False,False
5,False,False,True
6,False,True,False
7,True,False,True
8,False,True,False
9,True,False,True


#### » Display the number of NaN values in each column

In [3]:
df.isnull().sum()

V1    3
V2    2
V3    5
dtype: int64

#### » Display the number of not NaN values in each column

In [4]:
df.notnull().sum()

V1    7
V2    8
V3    5
dtype: int64

#### » Display the total NaN values

In [5]:
df.isnull().sum().sum()

np.int64(10)

#### » Display the dataframe's rows if it contains at least one NaN value

In [6]:
df[df.isnull().any(axis=1)]

Unnamed: 0,V1,V2,V3
0,5.0,10.0,
3,,3.0,
5,2.0,8.0,
6,8.0,,9.0
7,,6.0,
8,4.0,,1.0
9,,11.0,


#### » Display the dataframe's rows if it does not contain any NaN value

In [7]:
df[df.notnull().all(axis=1)]

Unnamed: 0,V1,V2,V3
1,3.0,1.0,7.0
2,6.0,5.0,6.0
4,7.0,2.0,7.0


#### Same as above

In [17]:
df[df["V1"].notnull() & df["V2"].notnull() & df["V3"].notnull()] 

Unnamed: 0,V1,V2,V3
1,3.0,1.0,7.0
2,6.0,5.0,6.0
4,7.0,2.0,7.0


## Directly Cleaing NaN Values

#### » Drop the NaN values (not permanent)

In [11]:
df.dropna()
df

Unnamed: 0,V1,V2,V3
0,5.0,10.0,
1,3.0,1.0,7.0
2,6.0,5.0,6.0
3,,3.0,
4,7.0,2.0,7.0
5,2.0,8.0,
6,8.0,,9.0
7,,6.0,
8,4.0,,1.0
9,,11.0,


#### » Drop the NaN values (permanent)

In [12]:
df.dropna(inplace=True)
df

Unnamed: 0,V1,V2,V3
1,3.0,1.0,7.0
2,6.0,5.0,6.0
4,7.0,2.0,7.0


## Filling NaN Values with the Mean Value of a Column

In [13]:
df = pd.DataFrame({"V1":vector1,"V2":vector2,"V3":vector3})
df

Unnamed: 0,V1,V2,V3
0,5.0,10.0,
1,3.0,1.0,7.0
2,6.0,5.0,6.0
3,,3.0,
4,7.0,2.0,7.0
5,2.0,8.0,
6,8.0,,9.0
7,,6.0,
8,4.0,,1.0
9,,11.0,


In [43]:
df["V1"]

0    5.0
1    3.0
2    6.0
3    NaN
4    7.0
5    2.0
6    8.0
7    NaN
8    4.0
9    NaN
Name: V1, dtype: float64

#### » Display the average of V1 column

In [44]:
df["V1"].mean()

np.float64(5.0)

#### » Fill the NaN values in column V1 with the mean

In [45]:
df["V1"].fillna(df["V1"].mean())

0    5.0
1    3.0
2    6.0
3    5.0
4    7.0
5    2.0
6    8.0
7    5.0
8    4.0
9    5.0
Name: V1, dtype: float64

#### » Fill the NaN values in the columns with the column's mean

In [19]:
df = pd.DataFrame({"V1":vector1,"V2":vector2,"V3":vector3})
df.apply(lambda x : x.fillna(x.mean()), axis=0)

Unnamed: 0,V1,V2,V3
0,5.0,10.0,6.0
1,3.0,1.0,7.0
2,6.0,5.0,6.0
3,5.0,3.0,6.0
4,7.0,2.0,7.0
5,2.0,8.0,6.0
6,8.0,5.75,9.0
7,5.0,6.0,6.0
8,4.0,5.75,1.0
9,5.0,11.0,6.0


## Filling NaN Values of a Column with 0 

In [46]:
df["V2"].fillna(0)

0    10.0
1     1.0
2     5.0
3     3.0
4     2.0
5     8.0
6     0.0
7     6.0
8     0.0
9    11.0
Name: V2, dtype: float64