# 🧼 Clean Data Demo
This notebook demonstrates how to clean messy data using pandas.

In [None]:

import pandas as pd

# Load the dirty data
df = pd.read_csv("dirty_data.csv")
df.head()


## Step 1: Check for Nulls

In [None]:
df.isnull().sum()

## Step 2: Drop or Fill Nulls

In [None]:
df.fillna({'name': 'Unknown', 'age': df['age'].mean(), 'gender': 'unknown'}, inplace=True)

## Step 3: Standardize Text

In [None]:
df['gender'] = df['gender'].str.strip().str.lower()

## Step 4: Remove Duplicates

In [None]:
df.drop_duplicates(inplace=True)

## Step 5: Fix Column Types

In [None]:

df['income'] = df['income'].astype(str).str.strip().astype(float)
df['signup_date'] = pd.to_datetime(df['signup_date'], errors='coerce')


## Step 6: Rename Columns

In [None]:
df.rename(columns={'signup_date': 'Signup_Date', 'income': 'Income'}, inplace=True)

## Step 7: Save Cleaned Data

In [None]:
df.to_csv('cleaned_data.csv', index=False)
df.head()