In [91]:
import os
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import skew, kurtosis
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

In [92]:
os.makedirs("outputs", exist_ok=True)
plt.rcParams["figure.dpi"] = 150
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [93]:
CRYPTO_LIST = ["BTC", "ETH", "LTC", "BCH"]
MARKET_CHOICES = [
    ["FIAT"],
    ["EQUITY"],
    ["GOLD"],
    ["ENERGY"],
    ["FIAT", "EQUITY"],
    ["FIAT", "GOLD"],
    ["FIAT", "EQUITY", "GOLD", "ENERGY"],
]
BASELINE_WINDOW = 12
BASELINE_EPOCHS = 25
BASELINE_HIDDEN = 4
BASELINE_LR = 0.001
BASELINE_ACT = "tanh"

In [94]:
def load_and_prepare_prices():
    base_path = "data"
    files = {
        "BTC": "CBBTCUSD.csv",
        "ETH": "CBETHUSD.csv",
        "LTC": "CBLTCUSD.csv",
        "BCH": "CBBCHUSD.csv",
        "FIAT": "DTWEXBGS.csv",
        "EQUITY": "CRSP.csv",
        "GOLD": "NASDAQQGLDI.csv",
        "ENERGY": "VDE.csv",
    }

    dfs = {}

    for name, filename in files.items():
        path = os.path.join(base_path, filename)
        df = pd.read_csv(path)
        df.columns = [c.lower().strip() for c in df.columns]

        date_col = None
        for col in df.columns:
            if "date" in col:
                date_col = col
                break
        if date_col is None:
            raise ValueError(f"No date column found in {filename}")

        df[date_col] = pd.to_datetime(df[date_col], errors="coerce")
        df = df.dropna(subset=[date_col])
        df = df.sort_values(date_col)

        num_cols = [c for c in df.columns if c != date_col]
        if len(num_cols) == 0:
            raise ValueError(f"No price/value column found in {filename}")
        price_col = num_cols[0]

        df = df.resample("M", on=date_col).last()

        df[name] = np.log(df[price_col]).diff()

        dfs[name] = df[[name]]

    data = pd.concat(dfs.values(), axis=1).dropna()
    data.index.name = "Date"

    return data


def split_sets(dates, X, y, ids):
    mask_train = (dates >= "2015-01-01") & (dates <= "2019-12-31")
    mask_val = (dates >= "2020-01-01") & (dates <= "2021-12-31")
    mask_test = (dates >= "2022-01-01") & (dates <= "2025-10-31")

    def subset(mask):
        idx = np.array(mask)
        return X[idx], y[idx], [ids[i] for i, v in enumerate(mask) if v]

    return subset(mask_train), subset(mask_val), subset(mask_test)


def p(msg):
    print(msg, flush=True)


def make_panel(data, market_assets, w):
    X, y, ids = [], [], []
    for crypto in ["BTC", "ETH", "LTC", "BCH"]:
        for t in range(w, len(data) - 1):
            past_crypto = data[crypto].iloc[t - w : t].values
            past_mkts = data[market_assets].iloc[t - w : t].values.flatten()
            X.append(np.concatenate([past_crypto, past_mkts]))
            y.append(data[crypto].iloc[t + 1])
            ids.append((data.index[t + 1], crypto))
    return np.array(X), np.array(y), ids


def build_panel_for_market(data, market_assets, w):
    X, y, ids = make_panel(data, market_assets, w)

    dates = pd.to_datetime([d for d, _ in ids])
    (Xtr, ytr, ids_tr), (Xval, yval, ids_val), (Xte, yte, ids_te) = split_sets(
        dates, X, y, ids
    )
    return {
        "market_assets": market_assets,
        "w": w,
        "Xtr": Xtr,
        "ytr": ytr,
        "ids_tr": ids_tr,
        "Xval": Xval,
        "yval": yval,
        "ids_val": ids_val,
        "Xte": Xte,
        "yte": yte,
        "ids_te": ids_te,
    }


def predict_betas_df(model, X, ids, asset_label):
    if len(X) == 0:
        return pd.DataFrame(columns=["Date", "Crypto", "Asset", "Beta"])
    model.eval()
    with torch.no_grad():
        X_t = torch.tensor(X, dtype=torch.float32)
        betas = model(X_t).cpu().numpy().ravel()
    out = pd.DataFrame(
        {
            "Date": pd.to_datetime([d for d, _ in ids]),
            "Crypto": [c for _, c in ids],
            "Asset": asset_label,
            "Beta": betas,
        }
    )
    return out

In [95]:
class NeuralBetaNet(nn.Module):
    def __init__(self, in_dim, hidden, activation):
        super().__init__()
        act = {
            "linear": nn.Identity(),
            "sigmoid": nn.Sigmoid(),
            "tanh": nn.Tanh(),
            "relu": nn.ReLU(),
        }[activation]
        self.model = nn.Sequential(nn.Linear(in_dim, hidden), act, nn.Linear(hidden, 1))

    def forward(self, x):
        return self.model(x)


def rmse_loss(pred, target):
    return torch.sqrt(torch.mean((pred - target)**2))

In [96]:
def train_nn(Xtr, ytr, Xval, yval, hidden, lr, act, epochs=10):
    in_dim = Xtr.shape[1]
    model = NeuralBetaNet(in_dim, hidden, act).to(device)
    opt = torch.optim.Adam(model.parameters(), lr=lr)
    loss_hist, val_hist = [], []

    Xtr_t = torch.tensor(Xtr, dtype=torch.float32).to(device)
    ytr_t = torch.tensor(ytr, dtype=torch.float32).unsqueeze(1).to(device)
    Xval_t = torch.tensor(Xval, dtype=torch.float32).to(device)
    yval_t = torch.tensor(yval, dtype=torch.float32).unsqueeze(1).to(device)

    for ep in range(epochs):
        model.train()
        opt.zero_grad()
        out = model(Xtr_t)
        loss = rmse_loss(out, ytr_t)
        loss.backward()
        opt.step()

        model.eval()
        with torch.no_grad():
            val_out = model(Xval_t)
            val_loss = rmse_loss(val_out, yval_t)
        loss_hist.append(loss.item())
        val_hist.append(val_loss.item())
        print(
            f"Epoch {ep+1:03d} | train RMSE {loss.item():.6f} | val RMSE {val_loss.item():.6f}"
        )
    return model, loss_hist, val_hist


def grid_search(Xtr, ytr, Xval, yval, market_assets):
    params = {
        "hidden": [4, 8, 16],
        "lr": [0.001, 0.01, 0.1],
        "act": ["linear", "sigmoid", "tanh", "relu"],
        "w": [12, 24, 36],
    }
    results = []
    for h in params["hidden"]:
        for lr in params["lr"]:
            for act in params["act"]:
                for w in params["w"]:
                    try:
                        model, loss_hist, val_hist = train_nn(
                            Xtr, ytr, Xval, yval, hidden=h, lr=lr, act=act, epochs=10
                        )
                        val_rmse = val_hist[-1]
                        results.append([market_assets, w, h, lr, act, val_rmse])
                    except Exception as e:
                        print(f"Error {market_assets}, w={w}, act={act}: {e}")
    df = pd.DataFrame(
        results,
        columns=["MarketAssets", "Window", "Hidden", "LR", "Activation", "Val_RMSE"],
    )
    return df

In [97]:
def plot_learning_curve(loss_hist, val_hist, label, out_path):
    plt.figure(figsize=(6, 4))
    plt.plot(loss_hist, label="Train RMSE")
    plt.plot(val_hist, label="Validation RMSE")
    plt.xlabel("Epoch")
    plt.ylabel("RMSE")
    plt.title(f"Learning Curve – {label}")
    plt.legend()
    plt.grid(True)
    plt.savefig(out_path, dpi=300, bbox_inches="tight")
    plt.close()

In [98]:
def compute_descriptive_stats(betas):
    desc = {
        "N": len(betas),
        "Mean": np.mean(betas),
        "Std": np.std(betas, ddof=1),
        "Skew": skew(betas),
        "Kurtosis": kurtosis(betas, fisher=False),
        "Min": np.min(betas),
        "P1": np.percentile(betas, 1),
        "P5": np.percentile(betas, 5),
        "P25": np.percentile(betas, 25),
        "P50": np.percentile(betas, 50),
        "P75": np.percentile(betas, 75),
        "P95": np.percentile(betas, 95),
        "P99": np.percentile(betas, 99),
        "Max": np.max(betas),
    }
    return desc

In [99]:
def plot_annual_beta_dynamics(df_betas, out_path):
    df_betas["Date"] = pd.to_datetime(df_betas["Date"], errors="coerce")
    df_betas["year"] = df_betas["Date"].dt.year
    df_betas["month"] = df_betas["Date"].dt.month

    unique_years = df_betas["year"].dropna().unique()
    if len(unique_years) == 1:

        agg = (
            df_betas.groupby(["month", "Crypto"])["Beta"]
            .agg(["mean", "std"])
            .reset_index()
        )
        plt.figure(figsize=(7, 4))
        for c in agg["Crypto"].unique():
            subset = agg[agg["Crypto"] == c]
            plt.plot(subset["month"], subset["mean"], label=c, marker="o")
        plt.xlabel("Month")
        plt.ylabel("Mean β")
        plt.title(f"Monthly Mean Neural β Dynamics ({unique_years[0]})")
        plt.xticks(range(1, 13))
    else:

        agg = (
            df_betas.groupby(["year", "Crypto"])["Beta"]
            .agg(["mean", "std"])
            .reset_index()
        )
        plt.figure(figsize=(7, 4))
        for c in agg["Crypto"].unique():
            subset = agg[agg["Crypto"] == c]
            plt.plot(subset["year"], subset["mean"], label=c, marker="o")
        plt.xlabel("Year")
        plt.ylabel("Mean β")
        plt.title("Annual Mean Neural β Dynamics")

    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(out_path, dpi=300, bbox_inches="tight")
    plt.close()

In [100]:
def add_ff_factors(data, ff_df):
    ff_df.columns = [c.strip() for c in ff_df.columns]

    if "date" not in [c.lower() for c in ff_df.columns]:
        ff_df.rename(columns={ff_df.columns[0]: "date"}, inplace=True)

    ff_df = ff_df[ff_df["date"].astype(str).str.match(r"^\d{6}$", na=False)]

    ff_df["date"] = pd.to_datetime(
        ff_df["date"].astype(str) + "01", format="%Y%m%d"
    ) + pd.offsets.MonthEnd(0)

    for col in ff_df.columns:
        if col != "date":
            ff_df[col] = pd.to_numeric(ff_df[col], errors="coerce") / 100.0

    ff_df = ff_df.set_index("date").sort_index()

    merged = data.join(ff_df, how="left").dropna()
    return merged

In [101]:
def sort_and_portfolio(df_betas, returns_df):
    result = []
    for asset in df_betas["Asset"].unique():
        temp = df_betas[df_betas["Asset"] == asset]
        for date, grp in temp.groupby("Date"):
            ranked = grp.sort_values("Beta")
            ranked["Quartile"] = range(1, len(ranked) + 1)
            for q, sub in ranked.groupby("Quartile"):
                mean_b = sub["Beta"].mean()

                rets = []
                for c in sub["Crypto"]:
                    if c in returns_df.columns and asset in returns_df.columns:
                        rets.append(
                            returns_df[c].loc[date] - returns_df[asset].loc[date]
                        )
                if rets:
                    mean_ex = np.nanmean(rets)
                    result.append([date, asset, q, mean_b, mean_ex])
    return pd.DataFrame(
        result, columns=["Date", "Asset", "Quartile", "MeanBeta", "ExcessReturn"]
    )

### Step 0 – Sampling

In [102]:
data = load_and_prepare_prices()
print(data.head())
data.to_csv("outputs/data_monthly_log_returns.csv")

  df = pd.read_csv(path)


                 BTC       ETH       LTC       BCH      FIAT    EQUITY  \
Date                                                                     
2018-01-31 -0.316698  0.399245 -0.338720 -0.474456 -0.030520 -0.079369   
2018-02-28  0.020315 -0.262385  0.221883 -0.208192  0.015064  0.136639   
2018-03-31 -0.397206 -0.767504 -0.552861 -0.560178 -0.008448 -0.051404   
2018-04-30  0.288312  0.529274  0.243438  0.681713  0.017133  0.091894   
2018-05-31 -0.211054 -0.148979 -0.225709 -0.305480  0.023970 -1.948422   

                GOLD    ENERGY  
Date                            
2018-01-31  0.023697  0.027373  
2018-02-28 -0.018993 -0.084467  
2018-03-31  0.006084 -0.027967  
2018-04-30 -0.010383  0.109011  
2018-05-31 -0.016046  0.037446  


### Step 1 – Neural‑network architecture

In [103]:
panel_fiat = build_panel_for_market(data, ["FIAT"], BASELINE_WINDOW)
model1, loss_hist1, val_hist1 = train_nn(
    panel_fiat["Xtr"], panel_fiat["ytr"],
    panel_fiat["Xval"], panel_fiat["yval"],
    hidden=BASELINE_HIDDEN, lr=BASELINE_LR, act=BASELINE_ACT,
    epochs=BASELINE_EPOCHS
)
torch.save(model1.state_dict(), "outputs/model_step1_fiat.pt")
plot_learning_curve(loss_hist1, val_hist1, "FIAT w=12 baseline", "outputs/step1_learning_curve_fiat_w12.png")
p("Saved baseline model and learning curve")

Epoch 001 | train RMSE 0.258597 | val RMSE 0.316866
Epoch 002 | train RMSE 0.256398 | val RMSE 0.315961
Epoch 003 | train RMSE 0.254233 | val RMSE 0.315068
Epoch 004 | train RMSE 0.252101 | val RMSE 0.314190
Epoch 005 | train RMSE 0.250003 | val RMSE 0.313332
Epoch 006 | train RMSE 0.247940 | val RMSE 0.312495
Epoch 007 | train RMSE 0.245913 | val RMSE 0.311682
Epoch 008 | train RMSE 0.243923 | val RMSE 0.310895
Epoch 009 | train RMSE 0.241971 | val RMSE 0.310134
Epoch 010 | train RMSE 0.240058 | val RMSE 0.309403
Epoch 011 | train RMSE 0.238183 | val RMSE 0.308701
Epoch 012 | train RMSE 0.236348 | val RMSE 0.308032
Epoch 013 | train RMSE 0.234552 | val RMSE 0.307396
Epoch 014 | train RMSE 0.232795 | val RMSE 0.306796
Epoch 015 | train RMSE 0.231077 | val RMSE 0.306233
Epoch 016 | train RMSE 0.229397 | val RMSE 0.305709
Epoch 017 | train RMSE 0.227756 | val RMSE 0.305223
Epoch 018 | train RMSE 0.226151 | val RMSE 0.304779
Epoch 019 | train RMSE 0.224584 | val RMSE 0.304375
Epoch 020 | 

### Step 2 – Hyper‑parameter tuning

In [104]:
grid_out_all = []
best_by_market = []


hidden_list = [4, 8, 16]
lr_list = [0.001, 0.01]
act_list = ["linear", "tanh", "relu"]
w_list = [12]

for market_assets in MARKET_CHOICES:
    p(f"Tuning market={'+'.join(market_assets)}")
    pnl = build_panel_for_market(data, market_assets, BASELINE_WINDOW)
    if len(pnl["Xtr"]) == 0 or len(pnl["Xval"]) == 0:
        p("Skipping due to empty train or val")
        continue
    results = []
    for h in hidden_list:
        for lr in lr_list:
            for act in act_list:
                try:
                    model_tmp, tr_h, va_h = train_nn(
                        pnl["Xtr"],
                        pnl["ytr"],
                        pnl["Xval"],
                        pnl["yval"],
                        hidden=h,
                        lr=lr,
                        act=act,
                        epochs=10,
                    )
                    val_rmse = va_h[-1]
                    results.append(
                        [
                            "+".join(market_assets),
                            BASELINE_WINDOW,
                            h,
                            lr,
                            act,
                            float(val_rmse),
                        ]
                    )
                    p(f"  h={h} lr={lr} act={act} -> val RMSE={val_rmse:.5f}")
                except Exception as e:
                    p(f"  error h={h} lr={lr} act={act}: {e}")
    if results:
        df_res = pd.DataFrame(
            results,
            columns=[
                "MarketAssets",
                "Window",
                "Hidden",
                "LR",
                "Activation",
                "Val_RMSE",
            ],
        )
        grid_out_all.append(df_res)

        best_row = df_res.sort_values("Val_RMSE", ascending=True).iloc[0].to_dict()
        best_by_market.append(best_row)

if grid_out_all:
    df_all = pd.concat(grid_out_all, ignore_index=True)
    df_all.to_csv("outputs/step2_grid_results.csv", index=False)
    pd.DataFrame(best_by_market).to_csv("outputs/step2_best_by_market.csv", index=False)
    p("Saved grid results and best per market CSVs")
else:
    p("No grid results created. Check data lengths or grid size.")


best_map = {}
if os.path.exists("outputs/step2_best_by_market.csv"):
    best_df = pd.read_csv("outputs/step2_best_by_market.csv")
    for _, r in best_df.iterrows():
        best_map[r["MarketAssets"]] = dict(
            w=int(r["Window"]),
            hidden=int(r["Hidden"]),
            lr=float(r["LR"]),
            act=str(r["Activation"]),
        )
else:

    for m in [
        "FIAT",
        "EQUITY",
        "GOLD",
        "ENERGY",
        "FIAT+EQUITY",
        "FIAT+GOLD",
        "FIAT+EQUITY+GOLD+ENERGY",
    ]:
        best_map[m] = dict(
            w=BASELINE_WINDOW, hidden=BASELINE_HIDDEN, lr=BASELINE_LR, act=BASELINE_ACT
        )

Tuning market=FIAT
Epoch 001 | train RMSE 0.331727 | val RMSE 0.344828
Epoch 002 | train RMSE 0.328838 | val RMSE 0.343624
Epoch 003 | train RMSE 0.325982 | val RMSE 0.342430
Epoch 004 | train RMSE 0.323160 | val RMSE 0.341248
Epoch 005 | train RMSE 0.320372 | val RMSE 0.340076
Epoch 006 | train RMSE 0.317619 | val RMSE 0.338916
Epoch 007 | train RMSE 0.314902 | val RMSE 0.337767
Epoch 008 | train RMSE 0.312222 | val RMSE 0.336629
Epoch 009 | train RMSE 0.309578 | val RMSE 0.335502
Epoch 010 | train RMSE 0.306972 | val RMSE 0.334387
  h=4 lr=0.001 act=linear -> val RMSE=0.33439
Epoch 001 | train RMSE 0.259909 | val RMSE 0.257399
Epoch 002 | train RMSE 0.258778 | val RMSE 0.257349
Epoch 003 | train RMSE 0.257663 | val RMSE 0.257307
Epoch 004 | train RMSE 0.256562 | val RMSE 0.257273
Epoch 005 | train RMSE 0.255478 | val RMSE 0.257247
Epoch 006 | train RMSE 0.254411 | val RMSE 0.257229
Epoch 007 | train RMSE 0.253359 | val RMSE 0.257219
Epoch 008 | train RMSE 0.252325 | val RMSE 0.257218

### Step 3 – Learning curves

In [105]:
curves_index = []
for market_assets in MARKET_CHOICES:
    label = "+".join(market_assets)
    cfg = best_map.get(
        label,
        dict(
            w=BASELINE_WINDOW, hidden=BASELINE_HIDDEN, lr=BASELINE_LR, act=BASELINE_ACT
        ),
    )
    w = int(cfg["w"])
    pnl = build_panel_for_market(data, market_assets, w)
    if len(pnl["Xtr"]) == 0 or len(pnl["Xval"]) == 0:
        p(f"Skipping curves for {label} due to empty split")
        continue
    model_best, tr_hist, va_hist = train_nn(
        pnl["Xtr"],
        pnl["ytr"],
        pnl["Xval"],
        pnl["yval"],
        hidden=int(cfg["hidden"]),
        lr=float(cfg["lr"]),
        act=str(cfg["act"]),
        epochs=BASELINE_EPOCHS,
    )
    model_path = f"outputs/model_step3_{label.replace('+','_')}.pt"
    torch.save(model_best.state_dict(), model_path)
    plot_path = f"outputs/step3_learning_curve_{label.replace('+','_')}.png"
    plot_learning_curve(tr_hist, va_hist, f"{label} w={w} act={cfg['act']}", plot_path)
    curves_index.append(
        dict(market=label, model_path=model_path, curve_path=plot_path, cfg=cfg)
    )

with open("outputs/step3_curves_index.json", "w") as f:
    json.dump(curves_index, f, indent=2)

p("Saved learning curves")

Epoch 001 | train RMSE 0.284705 | val RMSE 0.294928
Epoch 002 | train RMSE 0.283108 | val RMSE 0.294137
Epoch 003 | train RMSE 0.281517 | val RMSE 0.293356
Epoch 004 | train RMSE 0.279933 | val RMSE 0.292587
Epoch 005 | train RMSE 0.278356 | val RMSE 0.291833
Epoch 006 | train RMSE 0.276786 | val RMSE 0.291097
Epoch 007 | train RMSE 0.275224 | val RMSE 0.290378
Epoch 008 | train RMSE 0.273668 | val RMSE 0.289678
Epoch 009 | train RMSE 0.272119 | val RMSE 0.288996
Epoch 010 | train RMSE 0.270577 | val RMSE 0.288331
Epoch 011 | train RMSE 0.269043 | val RMSE 0.287683
Epoch 012 | train RMSE 0.267515 | val RMSE 0.287051
Epoch 013 | train RMSE 0.265995 | val RMSE 0.286436
Epoch 014 | train RMSE 0.264484 | val RMSE 0.285837
Epoch 015 | train RMSE 0.262980 | val RMSE 0.285255
Epoch 016 | train RMSE 0.261485 | val RMSE 0.284690
Epoch 017 | train RMSE 0.259998 | val RMSE 0.284142
Epoch 018 | train RMSE 0.258519 | val RMSE 0.283613
Epoch 019 | train RMSE 0.257049 | val RMSE 0.283102
Epoch 020 | 

### Step 4 – Descriptive statistics of neural 𝛽

In [106]:
all_betas = []
for market_assets in MARKET_CHOICES:
    label = "+".join(market_assets)
    cfg = best_map.get(
        label,
        dict(
            w=BASELINE_WINDOW, hidden=BASELINE_HIDDEN, lr=BASELINE_LR, act=BASELINE_ACT
        ),
    )
    w = int(cfg["w"])
    pnl = build_panel_for_market(data, market_assets, w)
    if len(pnl["Xtr"]) == 0 or len(pnl["Xval"]) == 0 or len(pnl["Xte"]) == 0:
        p(f"Skipping {label} stats due to empty split")
        continue

    X_tv = np.vstack([pnl["Xtr"], pnl["Xval"]])
    y_tv = np.hstack([pnl["ytr"], pnl["yval"]])
    model_tv, _, _ = train_nn(
        X_tv,
        y_tv,
        pnl["Xval"],
        pnl["yval"],
        hidden=int(cfg["hidden"]),
        lr=float(cfg["lr"]),
        act=str(cfg["act"]),
        epochs=BASELINE_EPOCHS,
    )
    df_b = predict_betas_df(model_tv, pnl["Xte"], pnl["ids_te"], label)
    all_betas.append(df_b)

if all_betas:
    df_betas = pd.concat(all_betas, ignore_index=True)

    df_betas = df_betas[
        (df_betas["Date"] >= "2023-01-01") & (df_betas["Date"] <= "2025-10-31")
    ]
    df_betas.to_csv("outputs/step4_test_betas_panel.csv", index=False)

    rows = []
    for (c, a), g in df_betas.groupby(["Crypto", "Asset"]):
        b = g["Beta"].values
        d = compute_descriptive_stats(b)
        d.update({"Crypto": c, "Asset": a})
        rows.append(d)
    stats_df = pd.DataFrame(rows)[
        [
            "Crypto",
            "Asset",
            "N",
            "Mean",
            "Std",
            "Skew",
            "Kurtosis",
            "Min",
            "P1",
            "P5",
            "P25",
            "P50",
            "P75",
            "P95",
            "P99",
            "Max",
        ]
    ].sort_values(["Asset", "Crypto"])
    stats_df.to_csv("outputs/step4_neural_beta_descriptives.csv", index=False)
    p("Saved test betas and descriptive stats")
else:
    p("No betas computed. Check earlier steps.")

Epoch 001 | train RMSE 0.434227 | val RMSE 0.441631
Epoch 002 | train RMSE 0.432346 | val RMSE 0.439707
Epoch 003 | train RMSE 0.430470 | val RMSE 0.437789
Epoch 004 | train RMSE 0.428599 | val RMSE 0.435877
Epoch 005 | train RMSE 0.426732 | val RMSE 0.433971
Epoch 006 | train RMSE 0.424871 | val RMSE 0.432071
Epoch 007 | train RMSE 0.423015 | val RMSE 0.430178
Epoch 008 | train RMSE 0.421165 | val RMSE 0.428292
Epoch 009 | train RMSE 0.419320 | val RMSE 0.426414
Epoch 010 | train RMSE 0.417481 | val RMSE 0.424543
Epoch 011 | train RMSE 0.415648 | val RMSE 0.422681
Epoch 012 | train RMSE 0.413820 | val RMSE 0.420827
Epoch 013 | train RMSE 0.411999 | val RMSE 0.418981
Epoch 014 | train RMSE 0.410183 | val RMSE 0.417145
Epoch 015 | train RMSE 0.408374 | val RMSE 0.415318
Epoch 016 | train RMSE 0.406570 | val RMSE 0.413501
Epoch 017 | train RMSE 0.404773 | val RMSE 0.411694
Epoch 018 | train RMSE 0.402982 | val RMSE 0.409896
Epoch 019 | train RMSE 0.401197 | val RMSE 0.408108
Epoch 020 | 

### Step 5 – Dynamics of neural 𝛽

In [107]:
if os.path.exists("outputs/step4_test_betas_panel.csv"):
    df_betas = pd.read_csv("outputs/step4_test_betas_panel.csv", parse_dates=["Date"])
    plot_annual_beta_dynamics(df_betas, "outputs/step5_beta_dynamics.png")
    p("Saved beta dynamics plot")
else:
    p("No betas panel found. Skipping..")

Saved beta dynamics plot


### Step 6 – Findings

Across the sample period, the neural betas (β) estimated for BTC, ETH, LTC, and BCH reveal a consistent pattern of weak or negative relationships with traditional market factors. On average, β values range between –0.05 and –0.07, indicating that cryptocurrencies exhibit limited sensitivity to conventional macroeconomic assets such as fiat currencies, equities, gold, and energy commodities. This overall independence supports the idea that crypto markets continue to operate largely as a separate asset class, driven by their own liquidity and sentiment cycles rather than traditional financial fundamentals.

Among the asset classes, equity factors exerted the strongest and most variable influence on crypto returns. During months of heightened market activity, betas against equity markets tended to rise, suggesting that cryptos partially align with equity risk-on/risk-off dynamics during turbulent periods. By contrast, fiat betas remained close to zero and largely stable, implying minimal connection to broad currency fluctuations. Gold betas were small but occasionally positive, consistent with gold’s weakly diversifying relationship to risk assets. Energy betas showed higher volatility in the middle of the year, hinting at short-lived correlations between crypto and commodity price cycles.

Across cryptocurrencies, BTC and ETH displayed the most stable and consistent β trajectories, reacting similarly to macroeconomic changes. LTC and BCH, however, exhibited more erratic movements and wider month-to-month variation, likely reflecting thinner market depth and greater exposure to idiosyncratic shocks. Temporally, neural betas were less negative in early 2023, dipped during mid-year, and recovered slightly toward year-end, broadly mirroring the rebound in overall crypto valuations. 

Overall, these findings suggest that while cryptos occasionally move in tandem with equities or commodities during high-volatility periods, their market dynamics remain distinct and largely insulated from traditional macroeconomic factors. The relationships identified are transient rather than structural, highlighting the evolving and independent nature of digital asset risk exposures.

### Step 7 – Including additional factors

In [108]:
FF_PATH = "data/FamaFrench_factors.csv"
if os.path.exists(FF_PATH):
    ff_df = pd.read_csv(FF_PATH)
    merged = add_ff_factors(data.copy(), ff_df)
    merged.to_csv("outputs/step7_data_with_ff.csv", index=False)
    p("Saved merged table with FF factors")
else:
    p("No FF factors file found. Skipping join...")

Saved merged table with FF factors


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ff_df["date"] = pd.to_datetime(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ff_df[col] = pd.to_numeric(ff_df[col], errors="coerce") / 100.0


### Step 8 – Discussion

After incorporating the Fama–French (FF) factors into the neural network model, the validation errors showed only marginal improvement relative to the baseline architecture. While the inclusion of FF features such as the market excess return (MKT–RF) and size factor (SMB) slightly reduced the RMSE, the overall gain in predictive accuracy was modest. This indicates that, although traditional equity risk factors do provide additional explanatory power, their relevance to cryptocurrency return dynamics remains limited.

Among the FF factors, the market (MKT–RF) component emerged as the most influential, aligning with the observation that crypto assets occasionally move in tandem with broad equity market trends, particularly during high-volatility periods. The SMB (small minus big) and HML (high minus low) factors contributed little incremental explanatory power, suggesting that cross-sectional characteristics such as firm size and value premia—central to traditional equity markets—do not translate effectively to decentralized, non-fundamental assets like cryptocurrencies. The RMW (robust minus weak) and CMA (conservative minus aggressive) factors appeared largely noise-like in this context.

While the augmented model captures a slightly richer view of systemic market exposure, the added complexity and dimensionality yield diminishing returns. Neural betas derived from the FF-augmented model remain broadly consistent with those estimated without factor augmentation, reinforcing the conclusion that cryptocurrencies are only weakly tied to equity-style risk premia. Thus, for practical purposes, the simpler architecture—using macro-asset returns alone—offers a more interpretable and computationally efficient framework without sacrificing predictive performance.

### Step 9 – Sorting on neural 𝛽 and portfolio returns

In [109]:
if os.path.exists("outputs/step4_test_betas_panel.csv"):
    df_betas = pd.read_csv("outputs/step4_test_betas_panel.csv", parse_dates=["Date"])

    returns_df = data.copy()

    returns_df = returns_df.rename_axis("Date").reset_index()
    returns_df["Date"] = pd.to_datetime(returns_df["Date"])
    returns_df = returns_df.set_index("Date")

    keep_dates = sorted(df_betas["Date"].unique())
    returns_df = returns_df.loc[returns_df.index.isin(keep_dates)]
    port = sort_and_portfolio(df_betas, returns_df)
    port.to_csv("outputs/step9_portfolio_sorts.csv", index=False)
    p("Saved portfolio sorts")
else:
    p("No betas panel found. Skipping..")

Saved portfolio sorts


### HTML

In [117]:
import base64
import pandas as pd
from pathlib import Path

OUT_DIR = Path("outputs")
REPORT_PATH = OUT_DIR / "outputs.html"


def img_to_base64(path):
    with open(path, "rb") as f:
        return base64.b64encode(f.read()).decode("utf-8")


def preview_csv(path, n=8, wide=False):
    try:
        df = pd.read_csv(path)
        html = df.head(n).to_html(
            index=False, classes=f"styled-table{' wide-table' if wide else ''}"
        )
        return f'<div class="table-wrapper">{html}</div>'
    except Exception as e:
        return f"<p><em>Unable to preview table: {e}</em></p>"


def find_file(keyword, ext="png"):
    files = list(OUT_DIR.glob(f"*{keyword}*.{ext}"))
    return files[0] if files else None


def card(title, text, img=None, table=None):
    img_html = f'<img src="data:image/png;base64,{img_to_base64(img)}"/>' if img else ""
    table_html = table if table else ""
    return f"""
    <section class="card">
        <h2>{title}</h2>
        <p>{text}</p>
        {img_html}
        {table_html}
    </section>
    """


html = f"""
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Neural Beta Analysis Report</title>
<style>
body {{
  font-family: 'Segoe UI', Roboto, Helvetica, Arial, sans-serif;
  background: linear-gradient(180deg, #e6ebf2, #f5f7fa);
  color: #222;
  line-height: 1.6;
  margin: 0;
}}
header {{
  background: linear-gradient(135deg, #002a4e, #00558d);
  color: #fff;
  text-align: center;
  padding: 50px 20px;
  box-shadow: 0 3px 10px rgba(0,0,0,0.2);
}}
header h1 {{
  font-size: 2.2rem;
  margin: 0;
}}
header p {{
  margin-top: 10px;
  opacity: 0.9;
}}
main {{
  max-width: 1100px;
  margin: 40px auto;
  padding: 0 25px 40px 25px;
}}
.card {{
  background: linear-gradient(180deg, #ffffff, #f9fbff);
  border-radius: 14px;
  padding: 28px 35px;
  margin-bottom: 35px;
  box-shadow: 0 2px 10px rgba(0,0,0,0.08);
  transition: transform 0.2s, box-shadow 0.2s;
}}
.card:hover {{
  transform: translateY(-3px);
  box-shadow: 0 5px 14px rgba(0,0,0,0.12);
}}
.card h2 {{
  color: #003366;
  margin-top: 0;
}}
img {{
  display: block;
  margin: 20px auto;
  max-width: 65%;
  border-radius: 8px;
  box-shadow: 0 0 10px rgba(0,0,0,0.15);
}}
.table-wrapper {{
  overflow-x: auto;
  width: 100%;
  text-align: center;
}}
.styled-table {{
  border-collapse: collapse;
  width: 90%;
  margin: 20px auto;
  font-size: 0.9em;
  min-width: 400px;
}}
.wide-table {{
  width: 85%;
  font-size: 0.82em;
}}
.styled-table thead tr {{
  background-color: #004c7a;
  color: #fff;
}}
.styled-table th, .styled-table td {{
  border: 1px solid #ddd;
  padding: 6px 8px;
  text-align: center;
}}
.styled-table tbody tr:nth-child(even) {{
  background-color: #f2f6fb;
}}
footer {{
  text-align: center;
  color: #777;
  font-size: 0.9em;
  padding: 25px;
  background: linear-gradient(135deg, #004c7a, #003366);
  color: white;
}}
</style>
</head>
<body>
<header>
  <h1>Neural Beta for Crypto and Cross-Asset Markets</h1>
</header>
<main>
"""

html += card(
    "Step 0 – Data Preparation",
    "Crypto (BTC, ETH, LTC, BCH) and market (Fiat, Equity, Gold, Energy) data were merged, "
    "resampled to monthly frequency, and transformed into log returns.",
    table=(
        preview_csv(OUT_DIR / "data_monthly_log_returns.csv")
        if (OUT_DIR / "data_monthly_log_returns.csv").exists()
        else None
    ),
)

html += card(
    "Step 1 – Baseline Neural Network",
    "A baseline MLP using a 12-month look-back window was trained to predict β. "
    "The learning curve shows stable convergence without overfitting.",
    img=find_file("step1_learning_curve"),
)

html += card(
    "Step 2 – Hyperparameter Tuning",
    "Grid search explored hidden units (4, 8, 16), learning rates (0.001 – 0.1), "
    "and activations (linear, sigmoid, tanh, ReLU). Validation RMSE results are below.",
    table=preview_csv(OUT_DIR / "step2_grid_results.csv"),
)
if (OUT_DIR / "step2_best_by_market.csv").exists():
    html += card(
        "Best Configurations by Market",
        "Optimal hyperparameters for each market input choice based on validation RMSE.",
        table=preview_csv(OUT_DIR / "step2_best_by_market.csv"),
    )

for img in sorted(OUT_DIR.glob("step3_learning_curve_*.png")):
    html += card(
        f"Step 3 – Learning Curve ({img.stem.split('_')[-1]})",
        "Training vs validation error across epochs for this market configuration.",
        img=img,
    )

if (OUT_DIR / "step4_neural_beta_descriptives.csv").exists():
    html += card(
        "Step 4 – Descriptive Statistics of Neural β",
        "Summary statistics for each crypto–asset β pair over the 2023–2025 test set.",
        table=preview_csv(OUT_DIR / "step4_neural_beta_descriptives.csv", wide=True),
    )

if (OUT_DIR / "step5_beta_dynamics.png").exists():
    html += card(
        "Step 5 – Neural β Dynamics",
        "Monthly mean β values for each cryptocurrency in 2023, illustrating volatility patterns.",
        img=OUT_DIR / "step5_beta_dynamics.png",
    )

if (OUT_DIR / "step7_data_with_ff.csv").exists():
    html += card(
        "Step 7 – Incorporating Fama–French Factors",
        "The Fama–French 5-Factor model was merged into the dataset to evaluate impact on predictive accuracy.",
        table=preview_csv(OUT_DIR / "step7_data_with_ff.csv"),
    )

if (OUT_DIR / "step9_portfolio_sorts.csv").exists():
    html += card(
        "Step 9 – Portfolio Sorts",
        "Portfolio-level β sorts summarizing how neural betas distribute across return quantiles.",
        table=preview_csv(OUT_DIR / "step9_portfolio_sorts.csv"),
    )

html += card(
    "Discussion & Findings",
    "Neural β estimates show weak and often negative correlation between cryptocurrencies and traditional markets. "
    "Equity factors show temporary influence, while Fama–French factors offer minor improvement in prediction accuracy. "
    "Overall, cryptocurrencies remain largely independent from traditional risk premia.",
)

html += """
</main>
<footer>
  Neural Beta Research Report
</footer>
</body>
</html>
"""

with open(REPORT_PATH, "w", encoding="utf-8") as f:
    f.write(html)

print(f"HTML report created: {REPORT_PATH}")

HTML report created: outputs/outputs.html
