In [5]:
import pandas as pd
import numpy as np


data = {
    'A': [1, 2, np.nan, 4, 5],
    'B': [np.nan, 2, 3, 4, 5],
    'C': [1, 2, 3, 4, np.nan]
}

df = pd.DataFrame(data)


In [6]:
from data_cleaner.missing_value_handler import MissingValueHandler

# Initialize the class
handler = MissingValueHandler(df)

# Identify missing values
missing_values = handler.identify_missing()
print("Missing Values:\n", missing_values)

# Fill missing values with average
df_imputed_mean = handler.impute_mean()
print("After Mean Imputation:\n", df_imputed_mean)

# Fill missing values with median
df_imputed_median = handler.impute_median()
print("After Median Imputation:\n", df_imputed_median)

# Filling missing values with a constant value (e.g. 0)
df_imputed_constant = handler.impute_constant(0)
print("After Constant Imputation:\n", df_imputed_constant)

# Delete rows or columns containing missing values
df_dropped = handler.drop_missing(axis=0, how='any')
print("After Dropping Rows with Any Missing Values:\n", df_dropped)


Missing Values:
        A      B      C
0  False   True  False
1  False  False  False
2   True  False  False
3  False  False  False
4  False  False   True
After Mean Imputation:
      A    B    C
0  1.0  3.5  1.0
1  2.0  2.0  2.0
2  3.0  3.0  3.0
3  4.0  4.0  4.0
4  5.0  5.0  2.5
After Median Imputation:
      A    B    C
0  1.0  3.5  1.0
1  2.0  2.0  2.0
2  3.0  3.0  3.0
3  4.0  4.0  4.0
4  5.0  5.0  2.5
After Constant Imputation:
      A    B    C
0  1.0  3.5  1.0
1  2.0  2.0  2.0
2  3.0  3.0  3.0
3  4.0  4.0  4.0
4  5.0  5.0  2.5
After Dropping Rows with Any Missing Values:
      A    B    C
0  1.0  3.5  1.0
1  2.0  2.0  2.0
2  3.0  3.0  3.0
3  4.0  4.0  4.0
4  5.0  5.0  2.5
