Handling Missing data

In [None]:
import pandas as pd
df = pd.read_csv("weather_data.csv")
df

In [None]:
type(df.day[0])

In [None]:
df = pd.read_csv("weather_data.csv", parse_dates=["day"])
df

In [None]:
type(df.day[0])

In [None]:
df.set_index('day', inplace=True)
df

In [None]:
# Replace all NaN elements with 0s.
new_df = df.fillna(0)
new_df

In [None]:
# Replace all NaN elements according to different columns
new_df = df.fillna({
    'temperature':0,
    'windspeed':0,
    'event': 'no event'
})
new_df

In [None]:
# propagate non-null values forward.
new_df=df.fillna(method ="ffill")
new_df

In [None]:
# propagate non-null values backward.
new_df = df.fillna(method="bfill")
new_df

In [None]:
# Replace all NaN elements as the next column
new_df = df.fillna(method="bfill", axis="columns")
new_df

In [None]:
# Replace the first NaN element.
new_df = df.fillna(method="bfill", limit=1)
new_df

In [None]:
new_df = df.interpolate()
new_df

In [None]:
# Interpolate based on time.
new_df = df.interpolate(method="time")
new_df

In [None]:
# Drop the rows where at least one element is missing.
new_df = df.dropna()
new_df

In [None]:
# Drop the columns where at least one element is missing.
new_df=df.dropna(axis="columns")
new_df

In [None]:
# Drop the rows where all elements are missing.
new_df = df.dropna(how="all")
new_df

In [None]:
# Keep only the rows with at least 2 non-NA values.
new_df = df.dropna(thresh = 2)
new_df

In [None]:
# Define in which columns to look for missing values.
new_df = df.dropna(subset=['temperature', 'windspeed'])
new_df

In [None]:
# insert missing dates
dt = pd.date_range("2018-01-01","2018-01-11")
idx = pd.DatetimeIndex(dt)
new_df = df.reindex(idx)
new_df