In [1]:
import pandas as pd
import numpy as np

# 1. Load your data
df_raw = pd.read_csv("data/bursa_full_universe.csv", index_col=0, parse_dates=True)
df_metadata = pd.read_csv("data/mapping_table.csv")

# 2. Handle Missing Values (Forward Fill)
# Stocks don't trade on weekends/holidays; forward fill ensures a continuous line
df_raw = df_raw.ffill()

# 3. Calculate Performance Metrics
# % Change since the start of the data (Cumulative Return)
df_returns = (df_raw / df_raw.iloc[0]) - 1

# 4. Convert from 'Wide' to 'Long' Format
# This is the 'Melt' process crucial for BI tools
df_long = df_returns.reset_index().melt(id_vars='Date', var_name='Ticker', value_name='Cumulative_Return')

# 5. Add Volatility (Risk)
# We calculate the standard deviation of daily returns over the last year
daily_pct_change = df_raw.pct_change()
volatility = daily_pct_change.std() * np.sqrt(252) # Annualized Volatility
vol_df = volatility.reset_index()
vol_df.columns = ['Ticker', 'Volatility_Risk']

# 6. Merge with Metadata (Sectors & ESG Status)
df_final = pd.merge(df_long, df_metadata, on='Ticker', how='left')
df_final = pd.merge(df_final, vol_df, on='Ticker', how='left')

# 7. Save the Cleaned File
df_final.to_csv("data/bursa_analysis_final.csv", index=False)
print("Phase 2 Complete: Cleaned data saved to data/processed/")

Phase 2 Complete: Cleaned data saved to data/processed/
