In [2]:
import sys
from pathlib import Path

sys.path.append(str(Path.cwd().parent))  # adds project root

In [3]:
import pandas as pd
import yfinance as yf
import numpy as np
from src.functions.position_VB import positionVB
import plotly.express as px
from src.functions.plot_position import plot_positions_2mas
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)


In [6]:
import pandas as pd

summary = pd.read_csv("outputs_strategy02/strategy02_summary.csv")

param_cols = ["SMA_win","slow_mult","slope_m","z_entry","z_stop","z_sl","z_mom","cooldown"]

best_params_per_quarter = (
    summary
    .set_index(["quarter","asset"])[param_cols]
    .apply(lambda r: tuple(r.values), axis=1)
    .unstack("asset")
    .to_dict(orient="index")
)

# example:
best_params_per_quarter["data1_2025_Q2"]["NQ"] 


(np.float64(240.0),
 np.float64(4.0),
 np.float64(10.0),
 np.float64(2.0),
 np.float64(4.5),
 np.float64(2.0),
 np.float64(0.05),
 np.float64(5.0))

In [13]:
import pandas as pd

summary = pd.read_csv("outputs_strategy02/strategy02_summary.csv")

param_cols = ["SMA_win","slow_mult","slope_m","z_entry","z_stop","z_sl","z_mom","cooldown"]

# build results_by_dataset with the same access pattern: results_by_dataset[q][asset] -> (net_pnl, params_tuple)
tmp = summary.set_index(["quarter","asset"])
results_by_dataset = {}

for (q, a), row in tmp.iterrows():
    params = tuple(row[param_cols].values)
    net_pnl = float(row["net_pnl"])
    results_by_dataset.setdefault(q, {})[a] = (net_pnl, params)

# example:
results_by_dataset["data1_2025_Q2"]["NQ"][1]


(np.float64(240.0),
 np.float64(4.0),
 np.float64(10.0),
 np.float64(2.0),
 np.float64(4.5),
 np.float64(2.0),
 np.float64(0.05),
 np.float64(5.0))

## Group 1

In [14]:
import pandas as pd
import os

# List all file paths (adjust the folder path as necessary)
file_paths = [
    "../data_oos/data1_2023_Q2.parquet",
    "../data_oos/data1_2024_Q1.parquet",
    "../data_oos/data1_2024_Q3.parquet",
    "../data_oos/data1_2025_Q3.parquet",
    "../data_oos/data1_2025_Q4.parquet",
    # "../data_oos/data2_2023_Q2.parquet",
    # "../data_oos/data2_2024_Q1.parquet",
    # "../data_oos/data2_2024_Q3.parquet",
    # "../data_oos/data2_2025_Q3.parquet",
    # "../data_oos/data2_2025_Q4.parquet"
]

# Dictionary to hold all datasets
data_dict = {}

# Load each dataset
for file_path in file_paths:
    # Extract the dataset name from the filename
    dataset_name = os.path.basename(file_path).replace(".parquet", "")
    # Read the dataset into the dictionary
    data_dict[dataset_name] = pd.read_parquet(file_path, engine="fastparquet")

# Print the dictionary keys (dataset names)
print(data_dict.keys())


dict_keys(['data1_2023_Q2', 'data1_2024_Q1', 'data1_2024_Q3', 'data1_2025_Q3', 'data1_2025_Q4'])


In [15]:
# APPLY MASK
def add_group1_masks(index: pd.DatetimeIndex) -> pd.DataFrame:
    """
    Creates masks required by the assignment for Group 1:
    - NaN windows for calculations: 9:31-9:40 and 15:51-16:00
    - no trading: 9:31-9:55
    - flat from: 15:40 to end (no overnight)
    """
    t = index.time

    nan_morning = (t >= pd.to_datetime("09:31").time()) & (t <= pd.to_datetime("09:40").time())
    nan_close   = (t >= pd.to_datetime("15:51").time()) & (t <= pd.to_datetime("16:00").time())
    mask_nan = nan_morning | nan_close

    no_trade = (t >= pd.to_datetime("09:31").time()) & (t <= pd.to_datetime("09:55").time())
    flat_from = (t >= pd.to_datetime("15:40").time())

    return pd.DataFrame(
        {"mask_nan": mask_nan, "mask_no_trade": no_trade, "mask_flat_from": flat_from},
        index=index
    )



In [16]:
# Fix index -> DatetimeIndex, then add masks
for k in list(data_dict.keys()):
    df = data_dict[k].copy()

    if "datetime" in df.columns:
        df["datetime"] = pd.to_datetime(df["datetime"])
        df = df.set_index("datetime")
    else:
        df.index = pd.to_datetime(df.index)  # if index already holds datetimes as strings

    masks = add_group1_masks(df.index)
    data_dict[k] = df.join(masks, how="left")


In [23]:
# Run OOS with previous-quarter params
from src.functions.runOneAsset import run_one_asset_rerun

prev_q = {# OOS : Best param 
    "data1_2023_Q2": "data1_2023_Q1",
    "data1_2024_Q1": "data1_2023_Q1",
    "data1_2024_Q3": "data1_2023_Q3",
    "data1_2025_Q3": "data1_2025_Q1", 
    "data1_2025_Q4": "data1_2024_Q4",
}

asset_cols = ["NQ", "SP"]

oos_outputs, oos_intr_outputs = {}, {}

for q_oos, df_oos in data_dict.items():
    q_is = prev_q[q_oos]
    oos_outputs[q_oos], oos_intr_outputs[q_oos] = {}, {}

    for a in asset_cols:
        best_params = results_by_dataset[q_is][a][1]

        res = run_one_asset_rerun(df_oos, a, best_params)

        oos_outputs[q_oos][a]      = res
        oos_intr_outputs[q_oos][a] = res["out_intr"]


In [24]:
import numpy as np
import pandas as pd

TRADING_DAYS = 252
rows = []

for q, assets in oos_outputs.items():
    for a, res in assets.items():

        gross = pd.Series(res["gross_pnl_series"],
                          index=res["out_intr"].index)
        net   = pd.Series(res["net_pnl_series"],
                          index=res["out_intr"].index)
        cap   = res["init_capital"]

        # cumulative PnL
        gross_cum = gross.sum()
        net_cum   = net.sum()

        # DAILY strategy PnL
        gross_daily = gross.groupby(gross.index.date).sum()
        net_daily   = net.groupby(net.index.date).sum()

        # DAILY strategy returns
        gross_ret = gross_daily / cap
        net_ret   = net_daily / cap

        # Sharpe (strategy-correct)
        grossSR = np.nan if gross_ret.std(ddof=1) == 0 else (
            gross_ret.mean() / gross_ret.std(ddof=1) * np.sqrt(TRADING_DAYS)
        )
        netSR = np.nan if net_ret.std(ddof=1) == 0 else (
            net_ret.mean() / net_ret.std(ddof=1) * np.sqrt(TRADING_DAYS)
        )

        # equity curves (daily)
        eq_gross = cap + gross_daily.cumsum()
        eq_net   = cap + net_daily.cumsum()

        # drawdowns
        dd_gross = (eq_gross.cummax() - eq_gross).max()
        dd_net   = (eq_net.cummax() - eq_net).max()

        # Calmar
        grossCR = np.nan if dd_gross == 0 else gross_cum / dd_gross
        netCR   = np.nan if dd_net   == 0 else net_cum   / dd_net

        rows.append({
            "quarter": q,
            "asset": a,
            "grossSR": grossSR,
            "netSR": netSR,
            "grossCR": grossCR,
            "netCR": netCR,
            "gross_cumPnL": gross_cum,
            "net_cumPnL": net_cum,
            "av.ntrades": res["n_trades"],
        })

perf_table = pd.DataFrame(rows)


In [25]:
perf_table

Unnamed: 0,quarter,asset,grossSR,netSR,grossCR,netCR,gross_cumPnL,net_cumPnL,av.ntrades
0,data1_2023_Q2,NQ,-0.173529,-0.173529,-0.089546,-0.089546,-43.074,-43.074,72
1,data1_2023_Q2,SP,-1.747914,-1.747914,-0.761499,-0.761499,-103.222,-103.222,100
2,data1_2024_Q1,NQ,-0.741695,-0.741695,-0.504297,-0.504297,-214.961,-214.961,64
3,data1_2024_Q1,SP,2.775729,2.775729,2.988728,2.988728,173.932,173.932,100
4,data1_2024_Q3,NQ,-0.862929,-0.862929,-0.619516,-0.619516,-219.826,-219.826,42
5,data1_2024_Q3,SP,0.859428,0.859428,0.36071,0.36071,57.649,57.649,48
6,data1_2025_Q3,NQ,-3.517972,-3.517972,-0.931754,-0.931754,-757.608,-757.608,48
7,data1_2025_Q3,SP,0.014392,0.014392,0.013809,0.013809,0.532,0.532,26
8,data1_2025_Q4,NQ,-1.459561,-1.459561,-0.42569,-0.42569,-210.097,-210.097,40
9,data1_2025_Q4,SP,-0.390667,-0.390667,-0.14265,-0.14265,-19.413,-19.413,26


In [26]:
perf_table.to_csv("outputs_OOS_grp1/strategy02_perf.csv", index=False)


## Group 2

In [7]:
import pickle

# Define the path where the best parameters were saved
save_path = "/Users/shah/CODE_BOOK_3/code_document/hfdProject/HFD-Final-Project/Testing/outputs_grp2/best_params_per_quarter.sav"

# Load the saved best parameters back into the notebook
with open(save_path, 'rb') as f:
    results_by_dataset = pickle.load(f)

print("Best parameters loaded from saved file.")

Best parameters loaded from saved file.


In [5]:
import pandas as pd
import os

# List all file paths (adjust the folder path as necessary)
file_paths = [
    # "../data_oos/data1_2023_Q2.parquet",
    # "../data_oos/data1_2024_Q1.parquet",
    # "../data_oos/data1_2024_Q3.parquet",
    # "../data_oos/data1_2025_Q3.parquet",
    # "../data_oos/data1_2025_Q4.parquet",
    "../data_oos/data2_2023_Q2.parquet",
    "../data_oos/data2_2024_Q1.parquet",
    "../data_oos/data2_2024_Q3.parquet",
    "../data_oos/data2_2025_Q3.parquet",
    "../data_oos/data2_2025_Q4.parquet"
]

# Dictionary to hold all datasets
data_dict = {}

# Load each dataset
for file_path in file_paths:
    # Extract the dataset name from the filename
    dataset_name = os.path.basename(file_path).replace(".parquet", "")
    # Read the dataset into the dictionary
    data_dict[dataset_name] = pd.read_parquet(file_path, engine="fastparquet")

# Print the dictionary keys (dataset names)
print(data_dict.keys())


dict_keys(['data2_2023_Q2', 'data2_2024_Q1', 'data2_2024_Q3', 'data2_2025_Q3', 'data2_2025_Q4'])


In [6]:
# APPLY MASK
def add_group1_masks(index: pd.DatetimeIndex) -> pd.DataFrame:
    """
    Creates masks required by the assignment for Group 1:
    - NaN windows for calculations: 9:31-9:40 and 15:51-16:00
    - no trading: 9:31-9:55
    - flat from: 15:40 to end (no overnight)
    """
    t = index.time

    nan_morning = (t >= pd.to_datetime("09:31").time()) & (t <= pd.to_datetime("09:40").time())
    nan_close   = (t >= pd.to_datetime("15:51").time()) & (t <= pd.to_datetime("16:00").time())
    mask_nan = nan_morning | nan_close

    no_trade = (t >= pd.to_datetime("09:31").time()) & (t <= pd.to_datetime("09:55").time())
    flat_from = (t >= pd.to_datetime("15:40").time())

    return pd.DataFrame(
        {"mask_nan": mask_nan, "mask_no_trade": no_trade, "mask_flat_from": flat_from},
        index=index
    )



In [7]:
# Fix index -> DatetimeIndex, then add masks
for k in list(data_dict.keys()):
    df = data_dict[k].copy()

    if "datetime" in df.columns:
        df["datetime"] = pd.to_datetime(df["datetime"])
        df = df.set_index("datetime")
    else:
        df.index = pd.to_datetime(df.index)  # if index already holds datetimes as strings

    masks = add_group1_masks(df.index)
    data_dict[k] = df.join(masks, how="left")


In [35]:
# Run OOS with previous-quarter params
from src.functions.runOneAsset import run_one_asset_rerun

prev_q = {# OOS : Best param 
    "data2_2023_Q2": "data2_2023_Q1",
    "data2_2024_Q1": "data2_2023_Q1",
    "data2_2024_Q3": "data2_2023_Q3",
    "data2_2025_Q3": "data2_2025_Q1",
    "data2_2025_Q4": "data2_2024_Q4",
}

asset_cols = ["AUD", "CAD", "XAG", "XAU"]

oos_outputs, oos_intr_outputs = {}, {}

for q_oos, df_oos in data_dict.items():
    q_is = prev_q[q_oos]
    oos_outputs[q_oos], oos_intr_outputs[q_oos] = {}, {}

    for a in asset_cols:
        best_params = results_by_dataset[q_is][a][1]

        res = run_one_asset_rerun(df_oos, a, best_params)

        oos_outputs[q_oos][a]      = res
        oos_intr_outputs[q_oos][a] = res["out_intr"]


In [36]:
import numpy as np
import pandas as pd

TRADING_DAYS = 252
rows = []

for q, assets in oos_outputs.items():
    for a, res in assets.items():

        gross = pd.Series(res["gross_pnl_series"],
                          index=res["out_intr"].index)
        net   = pd.Series(res["net_pnl_series"],
                          index=res["out_intr"].index)
        cap   = res["init_capital"]

        # cumulative PnL
        gross_cum = gross.sum()
        net_cum   = net.sum()

        # DAILY strategy PnL
        gross_daily = gross.groupby(gross.index.date).sum()
        net_daily   = net.groupby(net.index.date).sum()

        # DAILY strategy returns
        gross_ret = gross_daily / cap
        net_ret   = net_daily / cap

        # Sharpe (strategy-correct)
        grossSR = np.nan if gross_ret.std(ddof=1) == 0 else (
            gross_ret.mean() / gross_ret.std(ddof=1) * np.sqrt(TRADING_DAYS)
        )
        netSR = np.nan if net_ret.std(ddof=1) == 0 else (
            net_ret.mean() / net_ret.std(ddof=1) * np.sqrt(TRADING_DAYS)
        )

        # equity curves (daily)
        eq_gross = cap + gross_daily.cumsum()
        eq_net   = cap + net_daily.cumsum()

        # drawdowns
        dd_gross = (eq_gross.cummax() - eq_gross).max()
        dd_net   = (eq_net.cummax() - eq_net).max()

        # Calmar
        grossCR = np.nan if dd_gross == 0 else gross_cum / dd_gross
        netCR   = np.nan if dd_net   == 0 else net_cum   / dd_net

        rows.append({
            "quarter": q,
            "asset": a,
            "grossSR": grossSR,
            "netSR": netSR,
            "grossCR": grossCR,
            "netCR": netCR,
            "gross_cumPnL": gross_cum,
            "net_cumPnL": net_cum,
            "av.ntrades": res["n_trades"],
        })

perf_table = pd.DataFrame(rows)


### GROUP 2 Assets

In [None]:
perf_table.sort_values(by= "netSR")

Unnamed: 0,quarter,asset,grossSR,netSR,grossCR,netCR,gross_cumPnL,net_cumPnL,av.ntrades
17,data2_2025_Q4,CAD,-5.782293,-6.365314,-0.868221,-0.908084,-715.883861,-875.883861,16
9,data2_2024_Q3,CAD,-2.883952,-4.691547,-0.811109,-0.91169,-1384.766077,-2344.766077,96
5,data2_2024_Q1,CAD,-2.773326,-4.564697,-0.775191,-0.863718,-1925.506265,-3285.506265,136
3,data2_2023_Q2,XAU,-2.752694,-3.164955,-0.875832,-0.89902,-10139.6,-11729.6,106
4,data2_2024_Q1,AUD,-2.047304,-2.75001,-0.821727,-0.872845,-1180.0,-1620.0,44
16,data2_2025_Q4,AUD,-1.851024,-2.514026,-0.768435,-0.977191,-594.0,-814.0,22
8,data2_2024_Q3,AUD,-0.7032,-1.907092,-0.464894,-0.858065,-437.0,-1197.0,76
6,data2_2024_Q1,XAG,-1.75902,-1.906488,-0.612288,-0.641497,-4335.0,-4715.0,38
7,data2_2024_Q1,XAU,-1.360164,-1.842277,-0.570904,-0.679731,-5982.5,-8142.5,144
1,data2_2023_Q2,CAD,0.069163,-1.375614,0.042355,-0.612377,57.810658,-1162.189342,122


In [40]:
perf_table.to_csv("outputs_OOS_grp2/strategy02_perf.csv", index=False)
