# 02 - Data Cleaning

This notebook cleans and merges the fetched data.

In [None]:
import sys
sys.path.append('../src')

from data_utils import clean_data, align_and_merge
import pandas as pd
import os

In [None]:
data_dir = '../data'

# Load Raw Data
spot_df = pd.read_csv(f'{data_dir}/nifty_spot_5min.csv')
futures_df = pd.read_csv(f'{data_dir}/nifty_futures_5min.csv')
options_df = pd.read_csv(f'{data_dir}/nifty_options_5min.csv')

print(f"Loaded: Spot {spot_df.shape}, Futures {futures_df.shape}, Options {options_df.shape}")

In [None]:
# Check for missing values
print("=== Missing Values ===")
print("Spot:", spot_df.isnull().sum().sum())
print("Futures:", futures_df.isnull().sum().sum())
print("Options:", options_df.isnull().sum().sum())

In [None]:
# Clean Data
clean_spot = clean_data(spot_df, "Spot")
clean_futures = clean_data(futures_df, "Futures")
clean_options = clean_data(options_df, "Options")

In [None]:
# Save Cleaned Data
clean_spot.to_csv(f'{data_dir}/nifty_spot_5min_clean.csv', index=False)
clean_futures.to_csv(f'{data_dir}/nifty_futures_5min_clean.csv', index=False)
clean_options.to_csv(f'{data_dir}/nifty_options_5min_clean.csv', index=False)

In [None]:
# Merge Data
merged_df = align_and_merge(clean_spot, clean_futures, clean_options)
print(f"Merged Data Shape: {merged_df.shape}")
merged_df.head()

In [None]:
# Save Merged Data
merged_df.to_csv(f'{data_dir}/nifty_merged_5min.csv', index=False)

# Save Cleaning Report
report = f"""Data Cleaning Report
====================
Spot: {len(spot_df)} -> {len(clean_spot)} rows
Futures: {len(futures_df)} -> {len(clean_futures)} rows
Options: {len(options_df)} -> {len(clean_options)} rows
Merged: {merged_df.shape}
"""
with open(f'{data_dir}/data_cleaning_report.txt', 'w') as f:
    f.write(report)
print(report)