In [1]:
import pandas as pd

# 1. Extract
print(" Loading raw trades...")
df = pd.read_csv("/Users/yashashreeshinde/TradeSphere/data/trades.csv", parse_dates=['trade_date'])

# 2. Transform
print(" Cleaning and transforming...")

# Handle missing values (if any)
df = df.dropna()

# Normalize notional to millions for easier comparison
df['notional_mn'] = df['notional'] / 1e6

# Add trade year, month for grouping
df['year'] = df['trade_date'].dt.year
df['month'] = df['trade_date'].dt.month

# Calculate exposure per counterparty
exposure = df.groupby('counterparty')['notional_mn'].sum().reset_index()
exposure.rename(columns={'notional_mn':'total_exposure_mn'}, inplace=True)

# Calculate total PnL and liquidity metrics
agg_metrics = df.groupby('counterparty')[['pnl','liquidity']].sum().reset_index()

# Merge metrics
summary = pd.merge(exposure, agg_metrics, on='counterparty')

# 3. Load
df.to_csv("/Users/yashashreeshinde/TradeSphere/data/processed_trades.csv", index=False)
summary.to_csv("/Users/yashashreeshinde/TradeSphere/data/summary_metrics.csv", index=False)

print(" Transformation complete! Processed files saved:")
print("- data/processed_trades.csv (clean trades)")
print("- data/summary_metrics.csv (aggregated metrics)")


 Loading raw trades...
 Cleaning and transforming...
 Transformation complete! Processed files saved:
- data/processed_trades.csv (clean trades)
- data/summary_metrics.csv (aggregated metrics)
