In [10]:
import numpy as np
import pandas as pd
import polars as pl
from pathlib import Path

# ------------------------------------------------
# Paths
# ------------------------------------------------
DATA_PATH = Path.home() / "Documents/kaggle/hull_tactical/data"

# ------------------------------------------------
# Load data with Polars
# ------------------------------------------------
train = pl.read_csv(DATA_PATH / "train.csv")
test  = pl.read_csv(DATA_PATH / "test.csv")  # only used for column intersection

# Cast non-date columns to float
train = train.with_columns(
    pl.all().exclude("date_id").cast(pl.Float64, strict=False)
)

# ------------------------------------------------
# Simple mean imputation
# ------------------------------------------------
train_filled = train.with_columns([
    pl.when(pl.col(c).is_null())
      .then(pl.col(c).mean())
      .otherwise(pl.col(c))
      .alias(c)
    for c in train.columns if c != "date_id"
])

# ------------------------------------------------
# Define common features
# ------------------------------------------------
train_cols = set(train_filled.columns)
test_cols  = set(test.columns)

common_cols = train_cols & test_cols
BASE_FEATURE_COLS = sorted(common_cols - {"is_scored"})

# ------------------------------------------------
# Sort by time
# ------------------------------------------------
train_sorted = train_filled.sort("date_id")

# Target
y = train_sorted["market_forward_excess_returns"].to_numpy()

# Features
X = train_sorted.select(BASE_FEATURE_COLS).to_pandas()
X = X.fillna(X.mean())

N = len(y)
print("X shape:", X.shape, "| y shape:", y.shape)
print("Baseline feature columns:", len(BASE_FEATURE_COLS))

# Global return std for Kelly-ish stuff
GLOBAL_RET_STD = float(np.std(y))

# ------------------------------------------------
# Walk-forward splits
# ------------------------------------------------
fractions = [0.4, 0.5, 0.6, 0.7, 0.8, 1.0]
bounds = [int(f * N) for f in fractions]

fold_indices = []
for i in range(5):
    train_end = bounds[i]
    val_start = bounds[i]
    val_end   = bounds[i+1]
    fold_indices.append((np.arange(0,train_end), np.arange(val_start,val_end)))


# ======================================================================
#   BASE ALLOCATION STRATEGIES  
#   (the ones you've already tested)
# ======================================================================

# 1) ✔ Tanh scaling (default, stable)
def alloc_tanh(pred):
    EDGE_SCALE = 50.0
    a = 1.0 + np.tanh(pred * EDGE_SCALE)
    return np.clip(a, 0.0, 2.0)

# 2) ✔ Linear scaling (simple, can be volatile)
def alloc_linear(pred):
    SCALE = 300.0
    a = 1.0 + pred * SCALE
    return np.clip(a, 0.0, 2.0)

# 3) ✔ Piecewise (momentum-style)
def alloc_piecewise(pred):
    a = np.where(pred > 0, 1.25, 0.75)
    return np.clip(a, 0.0, 2.0)

# 4) ✔ Asymmetric tanh (more long than short)
def alloc_asymmetric(pred):
    SCALE_UP = 60.0
    SCALE_DOWN = 30.0
    pos = 1.0 + np.tanh(pred * SCALE_UP)
    neg = 1.0 + np.tanh(pred * SCALE_DOWN)
    a = np.where(pred >= 0, pos, neg)
    return np.clip(a, 0.0, 2.0)

# 5) ✔ Softplus (smooth convex response)
def alloc_softplus(pred):
    SCALE = 70.0
    x = pred * SCALE
    a = 1.0 + (np.log1p(np.exp(x)) - 0.5) / 20.0
    return np.clip(a, 0.0, 2.0)

# 6) ✔ Clipped sign (very simple and robust)
def alloc_sign(pred):
    a = 1.0 + 0.5 * np.sign(pred)
    return np.clip(a, 0.0, 2.0)


# ======================================================================
#   ADVANCED ALLOCATION STRATEGIES
#   (volatility scaling, thresholds, Kelly-ish, regime, rolling Sharpe)
# ======================================================================

# 7) Volatility-scaled softplus
def alloc_softplus_vol(pred):
    WINDOW = 20
    SCALE = 70.0

    # True short-circuit safe softplus
    def softplus_safe(z):
        z = np.asarray(z)
        out = np.empty_like(z)

        mask = z > 30
        out[mask] = z[mask]
        out[~mask] = np.log1p(np.exp(z[~mask]))

        return out

    s = pd.Series(pred)
    vol = s.rolling(WINDOW, min_periods=1).std().to_numpy()

    # Safe volatility
    safe_vol = np.where((vol <= 1e-12) | (~np.isfinite(vol)), 1e-6, vol)

    # Normalize
    norm = pred / safe_vol
    norm = np.where(~np.isfinite(norm), 0.0, norm)

    # Softplus input
    x = norm * SCALE
    x = np.where(~np.isfinite(x), 0.0, x)

    # Apply fully safe softplus
    sp = softplus_safe(x)

    # Final allocation
    a = 1.0 + (sp - 0.5) / 20.0
    a = np.where(~np.isfinite(a), 1.0, a)

    return np.clip(a, 0.0, 2.0)

# 8) Thresholded tanh (ignore small/noisy signals)
def alloc_tanh_threshold(pred):
    THRESH = 0.0002  # tune this
    pred_thr = np.where(np.abs(pred) < THRESH, 0.0, pred)
    EDGE_SCALE = 50.0
    a = 1.0 + np.tanh(pred_thr * EDGE_SCALE)
    return np.clip(a, 0.0, 2.0)

# 9) Kelly-inspired allocation
#    f* ~ edge / variance. Here we approx edge ~ pred, variance ~ GLOBAL_RET_STD^2.
def alloc_kelly(pred):
    sigma2 = GLOBAL_RET_STD**2 + 1e-12
    EDGE_SCALE = 1.0  # treat pred itself as expected return
    f_raw = (pred * EDGE_SCALE) / sigma2
    f_clipped = np.clip(f_raw, -1.0, 1.0)  # don't exceed 1x bet either way
    a = 1.0 + f_clipped
    return np.clip(a, 0.0, 2.0)

# 10) Regime-switching: low-vol uses aggressive softplus, high-vol shrinks bets
def alloc_regime_softplus(pred):
    WINDOW = 20
    s = pd.Series(pred)
    vol = s.rolling(WINDOW, min_periods=1).std().to_numpy()
    median_vol = np.nanmedian(vol) if np.any(np.isfinite(vol)) else 0.0

    high_vol = vol > median_vol

    # Low-vol: regular softplus
    a_low = alloc_softplus(pred)

    # High-vol: compressed tanh around 1.0 (less leverage)
    SCALE_HV = 20.0
    a_high = 1.0 + 0.5 * np.tanh(pred * SCALE_HV)  # half range

    a = np.where(high_vol, a_high, a_low)
    return np.clip(a, 0.0, 2.0)

# 11) Softplus scaled by rolling Sharpe of the signal
def alloc_softplus_sharpe(pred):
    WINDOW = 60
    ALPHA = 0.5

    def softplus_safe(z):
        return np.where(z > 30, z, np.log1p(np.exp(z)))

    s = pd.Series(pred)

    roll_mean = s.rolling(WINDOW, min_periods=5).mean().to_numpy()
    roll_std  = s.rolling(WINDOW, min_periods=5).std().to_numpy()

    # Safe std
    safe_std = np.where((roll_std <= 1e-12) | (~np.isfinite(roll_std)), 1e-6, roll_std)

    # Rolling Sharpe
    roll_sharpe = roll_mean / safe_std
    roll_sharpe = np.where(~np.isfinite(roll_sharpe), 0.0, roll_sharpe)

    # Factor in [0,1]
    factor = 0.5 + 0.5 * np.tanh(ALPHA * roll_sharpe)
    factor = np.where(~np.isfinite(factor), 0.5, factor)

    # Base allocation (softplus)
    x = pred * 70.0
    x = np.where(~np.isfinite(x), 0.0, x)
    sp = softplus_safe(x)
    base = 1.0 + (sp - 0.5) / 20.0

    # Scale by rolling Sharpe
    a = 1.0 + (base - 1.0) * factor
    a = np.where(~np.isfinite(a), 1.0, a)

    return np.clip(a, 0.0, 2.0)

# ======================================================================
#   Sharpe-like competition evaluation
# ======================================================================
def sharpe_like_strategy(y_true, allocation):
    strat = allocation * y_true
    return float(np.mean(strat) / (np.std(strat) + 1e-9))


def pretty_print_scores(name, scores):
    print(f"{name} per fold:")
    for i, s in enumerate(scores, 1):
        print(f"  Fold {i}: {s:.6f}")
    print(f"{name} avg: {np.mean(scores):.6f}\n")


# ======================================================================
#   SELECT STRATEGY HERE (ONE ONLY)
#   (keeping your known last-return scores in comments)
# ======================================================================
# allocation_fn = alloc_softplus      # last return 0.009228
# allocation_fn = alloc_piecewise     # last return 0.002353
# allocation_fn = alloc_tanh          # last return -0.008881
# allocation_fn = alloc_linear        # last return -0.017213
# allocation_fn = alloc_asymmetric    # last return -0.005558
# allocation_fn = alloc_sign          # last return -0.005170

# Advanced ones (not yet measured; you'll fill in comments as you test)
allocation_fn = alloc_softplus_vol # last return
# allocation_fn = alloc_tanh_threshold # last return -0.008884
# allocation_fn = alloc_kelly # last return -0.016253
# allocation_fn = alloc_regime_softplus # last return 0.005810
# allocation_fn = alloc_softplus_sharpe # last return 0.009759


# ------------------------------------------------
# Baseline: last-return signal → allocation → strategy Sharpe
# ------------------------------------------------
scores_last = []

for train_idx, val_idx in fold_indices:
    y_val = y[val_idx]

    # Last return baseline signal
    s_last = y[val_idx - 1]
    a_last = allocation_fn(s_last)
    scores_last.append(sharpe_like_strategy(y_val, a_last))

pretty_print_scores("Baseline (last return) Allocation Sharpe-like", scores_last)


X shape: (9021, 95) | y shape: (9021,)
Baseline feature columns: 95
Baseline (last return) Allocation Sharpe-like per fold:
  Fold 1: -0.034048
  Fold 2: -0.022892
  Fold 3: 0.038958
  Fold 4: 0.005148
  Fold 5: 0.008821
Baseline (last return) Allocation Sharpe-like avg: -0.000803

