# Regularized GRU Kill Test (WD + Dropout + OneCycleLR)
**Changes vs tightwd_v2**: WD 5e-5→2e-4, dropout 0.22→0.30, OneCycleLR(max_lr=2e-3, pct_start=0.1)

**Kill test**: 3 seeds, pass if mean val >= 0.2596 (+0.0012) AND >=2/3 positive vs tightwd_v2 baseline (~0.2584)

In [None]:
# Cell 0: Mount Drive, download data from Kaggle
import os, json

from google.colab import drive
drive.mount('/content/drive')
os.makedirs('/content/drive/MyDrive/wunderfund', exist_ok=True)

!pip install -q kaggle==1.6.14 --force-reinstall
os.makedirs('/root/.kaggle', exist_ok=True)
with open('/root/.kaggle/kaggle.json', 'w') as f:
    json.dump({"username": "vincentvdo6", "key": "FILL_IN"}, f)
os.chmod('/root/.kaggle/kaggle.json', 0o600)

os.makedirs('/content/data', exist_ok=True)
!kaggle datasets download -d vincentvdo6/wunderfund-predictorium -p /content/data/ --force
!unzip -o -q /content/data/wunderfund-predictorium.zip -d /content/data/
!ls /content/data/*.parquet

In [None]:
# Cell 1: Setup — clone repo, link data
import os, subprocess
REPO = "/content/competition_package"

os.chdir("/content")
os.system(f"rm -rf {REPO}")
os.system(f"git clone https://github.com/vincentvdo6/competition_package.git {REPO}")
os.chdir(REPO)
os.makedirs("datasets", exist_ok=True)
os.makedirs("logs", exist_ok=True)

os.system('ln -sf /content/data/train.parquet datasets/train.parquet')
os.system('ln -sf /content/data/valid.parquet datasets/valid.parquet')

assert os.path.exists("datasets/train.parquet"), "train.parquet not found!"
assert os.path.exists("datasets/valid.parquet"), "valid.parquet not found!"
print("Commit:", subprocess.check_output(["git", "rev-parse", "--short", "HEAD"], text=True).strip())
print(f"GPU: {os.popen('nvidia-smi --query-gpu=name --format=csv,noheader').read().strip()}")
print("Ready!")

In [None]:
# Cell 2: Train 3 seeds — regularized_v1
import os, subprocess, sys
os.chdir("/content/competition_package")

SEEDS = [42, 43, 44]
CONFIG = "configs/gru_regularized_v1.yaml"
print(f"Training {CONFIG} with seeds {SEEDS}")
print("="*60, flush=True)

for seed in SEEDS:
    print(f"\n{'='*60}")
    print(f"Training gru_regularized_v1 seed {seed}")
    print(f"{'='*60}", flush=True)
    proc = subprocess.Popen(
        [sys.executable, "-u", "scripts/train.py",
         "--config", CONFIG,
         "--seed", str(seed), "--device", "cuda"],
        stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True
    )
    for line in proc.stdout:
        print(line, end="", flush=True)
    proc.wait()
    if proc.returncode != 0:
        print(f"ERROR: seed {seed} failed with return code {proc.returncode}")

print(f"\nDone: regularized_v1 seeds {SEEDS}")

In [None]:
# Cell 3: Train 3 seeds — tightwd_v2 baseline (same seeds for fair comparison)
import os, subprocess, sys
os.chdir("/content/competition_package")

SEEDS = [42, 43, 44]
CONFIG = "configs/gru_derived_tightwd_v2.yaml"
print(f"Training {CONFIG} baseline with seeds {SEEDS}")
print("="*60, flush=True)

for seed in SEEDS:
    print(f"\n{'='*60}")
    print(f"Training tightwd_v2 baseline seed {seed}")
    print(f"{'='*60}", flush=True)
    proc = subprocess.Popen(
        [sys.executable, "-u", "scripts/train.py",
         "--config", CONFIG,
         "--seed", str(seed), "--device", "cuda"],
        stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True
    )
    for line in proc.stdout:
        print(line, end="", flush=True)
    proc.wait()
    if proc.returncode != 0:
        print(f"ERROR: seed {seed} failed with return code {proc.returncode}")

print(f"\nDone: tightwd_v2 baseline seeds {SEEDS}")

In [None]:
# Cell 4: Compare results — kill test
import os, glob, torch
os.chdir("/content/competition_package")

reg_scores = []
base_scores = []

for pt in sorted(glob.glob("logs/*.pt")):
    basename = os.path.basename(pt)
    if '_epoch' in basename:
        continue
    try:
        ckpt = torch.load(pt, map_location="cpu", weights_only=False)
    except TypeError:
        ckpt = torch.load(pt, map_location="cpu")
    score = float(ckpt.get("best_score", 0))
    epoch = ckpt.get("best_epoch", "?")
    
    if 'regularized' in basename:
        reg_scores.append((basename, score, epoch))
    elif 'tightwd' in basename:
        base_scores.append((basename, score, epoch))

print("REGULARIZED v1:")
for name, score, epoch in sorted(reg_scores):
    print(f"  {name}: val={score:.4f}, epoch={epoch}")

print("\nTIGHTWD_V2 BASELINE:")
for name, score, epoch in sorted(base_scores):
    print(f"  {name}: val={score:.4f}, epoch={epoch}")

reg_mean = sum(s for _, s, _ in reg_scores) / len(reg_scores) if reg_scores else 0
base_mean = sum(s for _, s, _ in base_scores) / len(base_scores) if base_scores else 0
delta = reg_mean - base_mean
positive = sum(1 for (_, rs, _), (_, bs, _) in zip(sorted(reg_scores), sorted(base_scores)) if rs > bs)

print(f"\n{'='*60}")
print(f"Reg mean:  {reg_mean:.4f}")
print(f"Base mean: {base_mean:.4f}")
print(f"Delta:     {delta:+.4f}")
print(f"Positive seeds: {positive}/3")
print(f"\nKILL TEST: mean >= +0.0012 AND >=2/3 positive")
passed = delta >= 0.0012 and positive >= 2
print(f"RESULT: {'PASS' if passed else 'FAIL'} (delta={delta:+.4f}, {positive}/3 positive)")

In [None]:
# Cell 5: Strip checkpoints + zip + save to Drive
import os, torch, glob, shutil
os.chdir("/content/competition_package")
os.makedirs("logs/slim", exist_ok=True)

for pt in sorted(glob.glob("logs/*.pt")):
    basename = os.path.basename(pt)
    if '_epoch' in basename:
        continue
    try:
        ckpt = torch.load(pt, map_location="cpu", weights_only=False)
    except TypeError:
        ckpt = torch.load(pt, map_location="cpu")
    slim = {
        "model_state_dict": ckpt["model_state_dict"],
        "config": ckpt.get("config", {}),
        "best_score": ckpt.get("best_score", None),
        "best_epoch": ckpt.get("best_epoch", None),
    }
    out = f"logs/slim/{basename}"
    torch.save(slim, out)
    orig = os.path.getsize(pt) / 1e6
    new = os.path.getsize(out) / 1e6
    print(f"{basename}: {orig:.1f}MB -> {new:.1f}MB")

for npz in sorted(glob.glob("logs/normalizer_*.npz")):
    shutil.copy(npz, f"logs/slim/{os.path.basename(npz)}")
    print(f"Copied {os.path.basename(npz)}")

shutil.make_archive("/content/regularized_gate", "zip",
                     "/content/competition_package/logs/slim")
sz = os.path.getsize("/content/regularized_gate.zip") / 1e6
print(f"\nregularized_gate.zip: {sz:.1f}MB")

shutil.copy("/content/regularized_gate.zip", "/content/drive/MyDrive/wunderfund/regularized_gate.zip")
print("Saved to Drive: MyDrive/wunderfund/regularized_gate.zip")