## <font color="maroon"><h4 align="center">Handling Missing Data - fillna, interpolate, dropna</font>

In [None]:
import pandas as pd
df = pd.read_csv("weather_data.csv",parse_dates=['day'])
type(df.day[0])
df

In [None]:
df.set_index('day',inplace=True)
df

## <font color="blue">fillna</font>

<font color="purple">**Fill all NaN with one specific value**</font>

In [None]:
new_df = df.fillna(0)
new_df

<font color="purple">**Fill na using column names and dict**</font>

In [None]:
new_df = df.fillna({
        'temperature': 0,
        'windspeed': 0,
        'event': 'No Event'
    })
new_df

<font color="purple">**Use method to determine how to fill na values**</font>

In [None]:
new_df = df.fillna(method="ffill")
new_df

In [None]:
new_df = df.fillna(method="bfill")
new_df

<font color="purple">**Use of axis**</font>

In [None]:
new_df = df.fillna(method="bfill", axis="columns") # axis is either "index" or "columns"
new_df

<font color="purple">**limit parameter**</font>

In [None]:
new_df = df.fillna(method="ffill",limit=1)
new_df

### <font color="blue">interpolate</font>

In [None]:
new_df = df.interpolate()
new_df

In [None]:
new_df = df.interpolate(method="time") 
new_df

**Notice that in above temperature on 2017-01-04 is 29 instead of 30 (in plain linear interpolate)**

**There are many other methods for interpolation such as quadratic, piecewise_polynomial, cubic etc. 
Just google "dataframe interpolate" to see complete documentation**

### <font color="blue">dropna</font>

In [None]:
new_df = df.dropna()
new_df

In [None]:
new_df = df.dropna(how='all')
new_df

In [None]:
new_df = df.dropna(thresh=1)
new_df

### <font color="blue">Inserting Missing Dates</font>

In [None]:
# filling the 2 missing dates
dt = pd.date_range("01-01-2017","01-11-2017")
idx = pd.DatetimeIndex(dt)
df.reindex(idx)
dd=df.interpolate(method='time')
dd

In [None]:
dd.fillna(method='ffill')


In [None]:
import numpy as np
dd.replace([7.00,8.00],np.nan)

In [None]:
# replace values based on columns
import numpy as np
dd.replace({
    "windspeed":[7.00,8.00],
},np.nan)

In [None]:
# when want to replace certain values
import numpy as np
dd.replace({
    7.00:np.nan,
    np.nan:'Sunny'
})

In [None]:
# use regex to replace 
dd.replace('[A-Za-z]',"",regex=True)

In [None]:
# similarly on different columns
dd.replace({
    "event":'[A-Za-z]'
},'',regex=True)

In [None]:
# replace a list of values and another list of values
df = pd.DataFrame({
    'score': ['exceptional','average', 'good', 'poor', 'average', 'exceptional'],
    'student': ['rob', 'maya', 'parthiv', 'tom', 'julian', 'erica']
})
df

In [None]:
# 0th index of first list corresponds to 0th of 2nd list
df.replace(['poor','average','good','exceptional'],[1,2,3,4])