In [None]:
import pandas as pd
import numpy as np

# Load the dataset
df = pd.read_csv("iris.csv", delimiter=",")

# Convert numeric columns to float, coercing errors to NaN
numeric_columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
for column in numeric_columns:
    df[column] = pd.to_numeric(df[column], errors='coerce')

# Preliminary: Create a copy without the 'flower' column for numeric operations
df_numeric = df[numeric_columns].copy()

# 1. Fill missing values with different strategies for each column
df2 = df_numeric.copy()

df2['sepal_length'] = df2['sepal_length'].fillna(df2['sepal_length'].mean())
df2['sepal_width'] = df2['sepal_width'].fillna(df2['sepal_width'].median())
df2['petal_length'] = df2['petal_length'].fillna(0)
df2['petal_width'] = df2['petal_width'].fillna(0)

print("Dataset after strategy 1:")
print(df2.head())
print("\nMissing values after strategy 1:")
print(df2.isnull().sum())

# 2. Fill missing values using the median of the associated column
df3 = df_numeric.copy()

for column in df3.columns:
    df3[column] = df3[column].fillna(df3[column].median())

print("\nDataset after strategy 2:")
print(df3.head())
print("\nMissing values after strategy 2:")
print(df3.isnull().sum())

# Find and print the special row
special_row = df[(df['sepal_width'] < 0) | (df['petal_length'] < 0)]
print("\nSpecial row:")
print(special_row)

# Bonus: Handle the special row
# Option 1: Remove the row
df_clean = df[~((df['sepal_width'] < 0) | (df['petal_length'] < 0))]

# Option 2: Replace negative values with NaN and then impute
df['sepal_width'] = df['sepal_width'].mask(df['sepal_width'] < 0)
df['petal_length'] = df['petal_length'].mask(df['petal_length'] < 0)

for column in numeric_columns:
    df[column] = df[column].fillna(df[column].median())

print("\nDataset after handling special row:")
print(df.head())
print("\nMissing values after handling special row:")
print(df[numeric_columns].isnull().sum())

# Print summary statistics to verify the changes
print("\nSummary statistics:")
print(df[numeric_columns].describe())