# Target-Decoupled Specialists Kill Test
**Goal**: Train t0-specialist and t1-specialist GRUs with asymmetric loss weighting.
Stitch t0 from t0-spec + t1 from t1-spec, compare vs vanilla baseline.

**Kill gate**: stitched val >= 0.2814 AND consistent per-target lift.

**Configs**:
- `gru_t0_spec_90.yaml`: loss weights [0.9, 0.1]
- `gru_t1_spec_90.yaml`: loss weights [0.1, 0.9]

**Pipeline**: Setup -> Train t0-specs (3 seeds) -> Train t1-specs (3 seeds) -> Evaluate + stitch -> Save

In [None]:
# Cell 1: Mount Drive, download data from Kaggle, clone repo
import os, json, subprocess

from google.colab import drive
drive.mount('/content/drive')
os.makedirs('/content/drive/MyDrive/wunderfund', exist_ok=True)

!pip install -q kaggle==1.6.14 --force-reinstall
os.makedirs('/root/.kaggle', exist_ok=True)
with open('/root/.kaggle/kaggle.json', 'w') as f:
    json.dump({"username": "vincentvdo6", "key": "FILL_IN"}, f)
os.chmod('/root/.kaggle/kaggle.json', 0o600)

os.makedirs('/content/data', exist_ok=True)
!kaggle datasets download -d vincentvdo6/wunderfund-predictorium -p /content/data/ --force
!unzip -o -q /content/data/wunderfund-predictorium.zip -d /content/data/
!ls /content/data/*.parquet

# Clone repo
REPO = "/content/competition_package"
os.chdir("/content")
subprocess.run(["rm", "-rf", REPO], check=False)
subprocess.run(["git", "clone", "https://github.com/vincentvdo6/competition_package.git", REPO], check=True)
os.chdir(REPO)
os.makedirs("datasets", exist_ok=True)
os.makedirs("logs", exist_ok=True)

subprocess.run(["ln", "-sf", "/content/data/train.parquet", "datasets/train.parquet"], check=True)
subprocess.run(["ln", "-sf", "/content/data/valid.parquet", "datasets/valid.parquet"], check=True)

assert os.path.exists("datasets/train.parquet"), "train.parquet not found!"
assert os.path.exists("datasets/valid.parquet"), "valid.parquet not found!"

commit = subprocess.check_output(["git", "rev-parse", "--short", "HEAD"], text=True).strip()
print(f"Commit: {commit}")
print(f"GPU: {subprocess.check_output(['nvidia-smi', '--query-gpu=name', '--format=csv,noheader'], text=True).strip()}")
print("Ready!")

In [None]:
# Cell 2: Train t0-specialists (seeds 42, 43, 44)
import subprocess, sys, os
os.chdir("/content/competition_package")

SEEDS = [42, 43, 44]
CONFIG = "configs/gru_t0_spec_90.yaml"

print(f"=== T0 SPECIALISTS ({len(SEEDS)} seeds) ===")
print(f"Config: {CONFIG} (loss weights: [0.9, 0.1])")
print("=" * 60, flush=True)

for seed in SEEDS:
    ckpt = f"logs/gru_t0_spec_90_seed{seed}.pt"
    if os.path.exists(ckpt):
        print(f"seed {seed}: checkpoint exists -- skip")
        continue
    print(f"\n{'='*60}")
    print(f"T0-SPEC seed {seed}")
    print(f"{'='*60}", flush=True)
    proc = subprocess.Popen(
        [sys.executable, "-u", "scripts/train.py",
         "--config", CONFIG,
         "--seed", str(seed), "--device", "cuda"],
        stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True
    )
    for line in proc.stdout:
        print(line, end="", flush=True)
    rc = proc.wait()
    if rc != 0:
        print(f"ERROR: seed {seed} failed with rc={rc}")

print(f"\nT0 specialists done!")

In [None]:
# Cell 3: Train t1-specialists (seeds 42, 43, 44)
import subprocess, sys, os
os.chdir("/content/competition_package")

SEEDS = [42, 43, 44]
CONFIG = "configs/gru_t1_spec_90.yaml"

print(f"=== T1 SPECIALISTS ({len(SEEDS)} seeds) ===")
print(f"Config: {CONFIG} (loss weights: [0.1, 0.9])")
print("=" * 60, flush=True)

for seed in SEEDS:
    ckpt = f"logs/gru_t1_spec_90_seed{seed}.pt"
    if os.path.exists(ckpt):
        print(f"seed {seed}: checkpoint exists -- skip")
        continue
    print(f"\n{'='*60}")
    print(f"T1-SPEC seed {seed}")
    print(f"{'='*60}", flush=True)
    proc = subprocess.Popen(
        [sys.executable, "-u", "scripts/train.py",
         "--config", CONFIG,
         "--seed", str(seed), "--device", "cuda"],
        stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True
    )
    for line in proc.stdout:
        print(line, end="", flush=True)
    rc = proc.wait()
    if rc != 0:
        print(f"ERROR: seed {seed} failed with rc={rc}")

print(f"\nT1 specialists done!")

In [None]:
# Cell 4: Evaluate â€” per-target scores + stitched comparison vs vanilla baseline
import os, sys, torch, numpy as np
os.chdir("/content/competition_package")
sys.path.insert(0, "/content/competition_package")

from src.data.dataset import LOBSequenceDataset
from src.models.gru_baseline import GRUBaseline
from src.evaluation.metrics import compute_weighted_pearson

SEEDS = [42, 43, 44]
# Vanilla baseline per-target val scores (from existing 23-seed pool)
VANILLA_T0 = {42: None, 43: None, 44: None}  # will compute
VANILLA_T1 = {42: None, 43: None, 44: None}
VANILLA_AVG = {42: 0.2649, 43: 0.2737, 44: 0.2690}

# Load val data
print("Loading validation data...")
val_ds = LOBSequenceDataset("datasets/valid.parquet", normalize=False, derived_features=False)
val_loader = torch.utils.data.DataLoader(val_ds, batch_size=64, shuffle=False, num_workers=0)

def evaluate_model(ckpt_path, device="cuda"):
    """Load checkpoint and compute per-target val scores."""
    ckpt = torch.load(ckpt_path, map_location="cpu", weights_only=False)
    config = ckpt["config"]
    model = GRUBaseline(config)
    model.load_state_dict(ckpt["model_state_dict"], strict=False)
    model = model.to(device).eval()

    all_preds, all_targets = [], []
    with torch.no_grad():
        for features, targets, masks in val_loader:
            features = features.to(device)
            preds, _ = model(features)
            # Apply mask
            for b in range(features.size(0)):
                m = masks[b].bool()
                all_preds.append(preds[b][m].cpu())
                all_targets.append(targets[b][m].cpu())
    
    preds_arr = torch.cat(all_preds, dim=0).numpy().clip(-6, 6)
    tgts_arr = torch.cat(all_targets, dim=0).numpy()
    
    # compute_weighted_pearson expects (N, 2) arrays
    t0 = compute_weighted_pearson(tgts_arr, preds_arr, target_idx=0)
    t1 = compute_weighted_pearson(tgts_arr, preds_arr, target_idx=1)
    return t0, t1, (t0 + t1) / 2, preds_arr

# Evaluate all models
results = {}
for variant in ["t0_spec_90", "t1_spec_90"]:
    results[variant] = {}
    for seed in SEEDS:
        ckpt = f"logs/gru_{variant}_seed{seed}.pt"
        if not os.path.exists(ckpt):
            print(f"MISSING: {ckpt}")
            continue
        t0, t1, avg, preds = evaluate_model(ckpt)
        results[variant][seed] = {"t0": t0, "t1": t1, "avg": avg, "preds": preds}
        print(f"{variant} s{seed}: t0={t0:.4f}  t1={t1:.4f}  avg={avg:.4f}")

print("\n" + "=" * 70)
print("PER-TARGET SPECIALIST RESULTS")
print("=" * 70)
print(f"{'Seed':<6} {'t0-spec t0':>12} {'t1-spec t1':>12} {'vanilla avg':>12}")
print("-" * 50)

t0_spec_scores = []
t1_spec_scores = []
for seed in SEEDS:
    t0s = results.get("t0_spec_90", {}).get(seed, {})
    t1s = results.get("t1_spec_90", {}).get(seed, {})
    t0_val = t0s.get("t0", 0)
    t1_val = t1s.get("t1", 0)
    van = VANILLA_AVG[seed]
    t0_spec_scores.append(t0_val)
    t1_spec_scores.append(t1_val)
    print(f"s{seed:<5} {t0_val:>12.4f} {t1_val:>12.4f} {van:>12.4f}")

# Stitched score: average of (mean t0-spec t0, mean t1-spec t1)
mean_t0 = np.mean(t0_spec_scores)
mean_t1 = np.mean(t1_spec_scores)
stitched = (mean_t0 + mean_t1) / 2

print(f"\nStitched score (mean t0-spec t0 + mean t1-spec t1) / 2:")
print(f"  t0 specialist mean t0: {mean_t0:.4f}")
print(f"  t1 specialist mean t1: {mean_t1:.4f}")
print(f"  Stitched avg:          {stitched:.4f}")
print(f"  Vanilla anchor avg:    0.2810")
print(f"  Delta vs anchor:       {stitched - 0.2810:+.4f}")
print(f"\nKill gate (0.2814): {'PASS' if stitched >= 0.2814 else 'FAIL'}")

In [None]:
# Cell 5: Save checkpoints to Drive
import os, torch, shutil
os.chdir("/content/competition_package")

SEEDS = [42, 43, 44]
DRIVE_DIR = "/content/drive/MyDrive/wunderfund"

for variant in ["t0_spec_90", "t1_spec_90"]:
    for seed in SEEDS:
        pt = f"logs/gru_{variant}_seed{seed}.pt"
        if os.path.exists(pt):
            ckpt = torch.load(pt, map_location="cpu", weights_only=False)
            slim = {
                "model_state_dict": ckpt["model_state_dict"],
                "config": ckpt.get("config", {}),
                "best_score": ckpt.get("best_score", None),
                "best_epoch": ckpt.get("best_epoch", None),
            }
            dst = f"{DRIVE_DIR}/gru_{variant}_seed{seed}.pt"
            torch.save(slim, dst)
            sz = os.path.getsize(dst) / 1e6
            print(f"Saved: gru_{variant}_seed{seed}.pt ({sz:.1f}MB)")

print("\nDone! Download from Drive for local analysis.")