# Performance Metrics and Visualization

This notebook computes and visualizes portfolio performance metrics for US and India strategies built in the data collection step. It covers: Total Return, Annualized Return (CAGR), Volatility, Sharpe Ratio (excess over risk-free), and Max Drawdown.

## Methodology

- Total return: cumulative product of (1 + daily returns) - 1.
- Annualized return: (1 + total_return) ** (annualization_factor / periods) - 1.
- Volatility: std(daily_returns) * sqrt(annualization_factor).
- Sharpe ratio: (annualized_return - annual_rf) / volatility.
- Max drawdown: max peak-to-trough decline of cumulative return curve.

Assumptions: daily frequency (252 trading days), risk-free rate configurable.

In [None]:
# Imports
import os
import json
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

%matplotlib inline
sns.set(style='whitegrid', context='talk')


In [None]:
# Configuration
DATA_DIR = Path('notebooks/data_collection')
# Expected file(s) produced by strategy_selection_and_data_download.ipynb
# Adjust names if your pipeline saved different filenames.
US_RETURNS_FILE = DATA_DIR / 'us_strategy_returns.csv'
INDIA_RETURNS_FILE = DATA_DIR / 'india_strategy_returns.csv'
# Alternatively, load multiple tickers; we'll detect all *_returns.csv in data_collection.
ANNUALIZATION = 252
ANNUAL_RF = 0.00  # set to an estimate if desired, e.g., 0.02 for 2%

print('Looking for returns files in', DATA_DIR.resolve())


In [None]:
# Helper functions
def ensure_datetime_index(df):
    if not isinstance(df.index, pd.DatetimeIndex):
        for col in df.columns:
            if 'date' in col.lower():
                df[col] = pd.to_datetime(df[col])
                df = df.set_index(col).sort_index()
                break
    return df

def compute_metrics(returns: pd.Series, annualization: int = 252, annual_rf: float = 0.0):
    returns = returns.dropna()
    total_return = (1 + returns).prod() - 1
    periods = returns.shape[0]
    if periods == 0:
        return {k: np.nan for k in ['Total Return','Annualized Return','Volatility','Sharpe','Max Drawdown']}
    ann_return = (1 + total_return) ** (annualization / periods) - 1
    vol = returns.std() * math.sqrt(annualization)
    sharpe = np.nan
    if vol and not np.isnan(vol) and vol != 0:
        sharpe = (ann_return - annual_rf) / vol
    # Drawdown
    equity_curve = (1 + returns).cumprod()
    rolling_max = equity_curve.cummax()
    drawdown = equity_curve / rolling_max - 1
    max_dd = drawdown.min()
    return {
        'Total Return': total_return,
        'Annualized Return': ann_return,
        'Volatility': vol,
        'Sharpe': sharpe,
        'Max Drawdown': max_dd,
    }

def load_returns_files():
    files = []
    if US_RETURNS_FILE.exists(): files.append(('US Strategy', US_RETURNS_FILE))
    if INDIA_RETURNS_FILE.exists(): files.append(('India Strategy', INDIA_RETURNS_FILE))
    # Also auto-discover any *_returns.csv
    for p in DATA_DIR.glob('*_returns.csv'):
        label = p.stem.replace('_returns','').replace('_',' ').title()
        if all(p != f for _, f in files):
            files.append((label, p))
    return files


In [None]:
# Load data for all detected strategies
pairs = load_returns_files()
if not pairs:
    raise FileNotFoundError('No *_returns.csv files found in notebooks/data_collection. Please run data collection notebook first or adjust paths.')

strategies = {}
for label, path in pairs:
    df = pd.read_csv(path)
    df = ensure_datetime_index(df)
    # Try to infer the returns column
    if 'return' in df.columns:
        ret_col = [c for c in df.columns if c.lower() == 'return'][0]
    else:
        # fallbacks
        candidates = [c for c in df.columns if 'ret' in c.lower()] or [c for c in df.columns if 'pct' in c.lower()]
        ret_col = candidates[0] if candidates else df.columns[-1]
    returns = pd.to_numeric(df[ret_col], errors='coerce')
    strategies[label] = returns

print('Loaded strategies:', list(strategies.keys()))


In [None]:
# Compute metrics table
metrics = {}
for label, ret in strategies.items():
    metrics[label] = compute_metrics(ret, annualization=ANNUALIZATION, annual_rf=ANNUAL_RF)
metrics_df = pd.DataFrame(metrics).T
metrics_df = metrics_df[['Total Return','Annualized Return','Volatility','Sharpe','Max Drawdown']]
metrics_df.sort_values(by='Annualized Return', ascending=False, inplace=True)

display(metrics_df.style.format({
    'Total Return': '{:.2%}',
    'Annualized Return': '{:.2%}',
    'Volatility': '{:.2%}',
    'Sharpe': '{:.2f}',
    'Max Drawdown': '{:.2%}'
}))


In [None]:
# Plot cumulative returns for each strategy
cum_df = pd.DataFrame({label: (1 + ret.dropna()).cumprod() for label, ret in strategies.items()})
ax = cum_df.plot(figsize=(12,6), linewidth=2)
ax.set_title('Cumulative Return by Strategy')
ax.set_ylabel('Growth of $1')
plt.legend(title='Strategy')
plt.show()


In [None]:
# Bar charts for key metrics
plot_df = metrics_df.copy()
fig, axes = plt.subplots(2, 2, figsize=(14,10))
plot_df['Annualized Return'].plot(kind='bar', ax=axes[0,0], color='C0')
axes[0,0].set_title('Annualized Return')
axes[0,0].set_ylabel('%')
plot_df['Volatility'].plot(kind='bar', ax=axes[0,1], color='C1')
axes[0,1].set_title('Volatility')
axes[0,1].set_ylabel('%')
plot_df['Sharpe'].plot(kind='bar', ax=axes[1,0], color='C2')
axes[1,0].set_title('Sharpe Ratio')
plot_df['Max Drawdown'].plot(kind='bar', ax=axes[1,1], color='C3')
axes[1,1].set_title('Max Drawdown')
for ax in axes.flat:
    if ax.get_ylabel() == '%':
        ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda y, _: f'{y*100:.0f}%'))
    ax.tick_params(axis='x', rotation=45)
plt.tight_layout()
plt.show()


## Integration of Global Trends & Transitional Market Insights

[Placeholder] Discuss how macro trends, policy shifts, and cross-asset transitions could affect the strategies and metrics above.