## Load dataset

In [16]:
import pandas as pd
df = pd.read_csv("raw_data.csv")

## Detect missing values

In [12]:
df.isnull().sum()

order_id         0
customer_name    1
order_date       3
product          1
price            4
dtype: int64

## Handle missing values

In [13]:
df.ffill(inplace=True)

## Fix data types

In [14]:
df['order_date'] = pd.to_datetime(df['order_date'], errors='coerce')
df['price'] = pd.to_numeric(df['price'], errors='coerce')

## Remove duplicates

In [15]:
df.drop_duplicates(inplace=True)

## Standardize Column Names

In [17]:
df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_")

## Save cleaned dataset

In [18]:
df.to_csv("cleaned_data.csv", index=False)

## Create cleaning log

In [19]:
with open("cleaning_log.txt", "w") as f:
    f.write("Missing values handled\n")
    f.write("Duplicates removed\n")
    f.write("Data types fixed\n")