# Vanilla GRU Scaling
**Breakthrough**: Vanilla GRU (h=64, 3L, raw32, no norm, MSE, linear output) scored **0.2814 LB** — new PB!
Gap flipped **POSITIVE** (+0.0077) vs tightwd_v2's -0.0004. Only 0.0020 from top 100.

**Scaling test**: Train h=128, h=144, h=192 (3L), h=192 (2L) with 3 seeds each.
Pick best config → seed expansion → single-model LB test → ensemble.

In [None]:
# Cell 0: Mount Drive, download data from Kaggle
import os, json

from google.colab import drive
drive.mount('/content/drive')
os.makedirs('/content/drive/MyDrive/wunderfund', exist_ok=True)

!pip install -q kaggle==1.6.14 --force-reinstall
os.makedirs('/root/.kaggle', exist_ok=True)
with open('/root/.kaggle/kaggle.json', 'w') as f:
    json.dump({"username": "vincentvdo6", "key": "FILL_IN"}, f)
os.chmod('/root/.kaggle/kaggle.json', 0o600)

os.makedirs('/content/data', exist_ok=True)
!kaggle datasets download -d vincentvdo6/wunderfund-predictorium -p /content/data/ --force
!unzip -o -q /content/data/wunderfund-predictorium.zip -d /content/data/
!ls /content/data/*.parquet

In [None]:
# Cell 1: Setup — clone repo, link data
import os, subprocess
REPO = "/content/competition_package"

os.chdir("/content")
subprocess.run(["rm", "-rf", REPO], check=False)
subprocess.run(["git", "clone", "https://github.com/vincentvdo6/competition_package.git", REPO], check=True)
os.chdir(REPO)
os.makedirs("datasets", exist_ok=True)
os.makedirs("logs", exist_ok=True)

subprocess.run(["ln", "-sf", "/content/data/train.parquet", "datasets/train.parquet"], check=True)
subprocess.run(["ln", "-sf", "/content/data/valid.parquet", "datasets/valid.parquet"], check=True)

assert os.path.exists("datasets/train.parquet"), "train.parquet not found!"
assert os.path.exists("datasets/valid.parquet"), "valid.parquet not found!"
commit = subprocess.check_output(["git", "rev-parse", "--short", "HEAD"], text=True).strip()
print(f"Commit: {commit}")
print(f"GPU: {subprocess.check_output(['nvidia-smi', '--query-gpu=name', '--format=csv,noheader'], text=True).strip()}")
print("Ready!")

In [None]:
# Cell 2: Train ALL vanilla scaling configs (h=128, h=144, h=192 3L, h=192 2L) x 3 seeds
import os, subprocess, sys
os.chdir("/content/competition_package")

CONFIGS = [
    ("vanilla_h128", "configs/vanilla_h128.yaml"),
    ("vanilla_h192", "configs/vanilla_h192.yaml"),
    ("vanilla_h144", "configs/vanilla_h144.yaml"),
    ("vanilla_h192_2L", "configs/vanilla_h192_2L.yaml"),
]
SEEDS = [42, 43, 44]

print(f"=== VANILLA GRU SCALING ===")
print(f"Configs: {len(CONFIGS)} x {len(SEEDS)} seeds = {len(CONFIGS)*len(SEEDS)} runs")
print("=" * 60, flush=True)

for config_name, config_path in CONFIGS:
    for seed in SEEDS:
        print(f"\n{'='*60}")
        print(f"Training {config_name} seed {seed}")
        print(f"{'='*60}", flush=True)
        proc = subprocess.Popen(
            [sys.executable, "-u", "scripts/train.py",
             "--config", config_path,
             "--seed", str(seed), "--device", "cuda"],
            stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True
        )
        for line in proc.stdout:
            print(line, end="", flush=True)
        proc.wait()
        if proc.returncode != 0:
            print(f"ERROR: {config_name} seed {seed} failed with return code {proc.returncode}")

print(f"\nAll training done!")

In [None]:
# Cell 3: Evaluation — compare all configs
import os, glob, torch
os.chdir("/content/competition_package")

# Reference points
PARITY_V1_VAL = 0.2692   # mean val (3 seeds)
PARITY_V1_LB = 0.2814    # best seed (s43, val 0.2737)
TIGHTWD_VAL = 0.2660     # our previous best single-model recipe
OFFICIAL_LB = 0.2761     # official baseline h=64

all_results = {}

for config_name in ["vanilla_h128", "vanilla_h144", "vanilla_h192", "vanilla_h192_2L"]:
    print(f"\n{'='*70}")
    print(f"  {config_name}")
    print(f"{'='*70}")
    
    results = []
    for pt in sorted(glob.glob(f"logs/{config_name}_seed*.pt")):
        basename = os.path.basename(pt)
        if '_epoch' in basename:
            continue
        ckpt = torch.load(pt, map_location="cpu", weights_only=False)
        score = float(ckpt.get("best_score", 0))
        epoch = ckpt.get("best_epoch", "N/A")
        results.append((basename, score, epoch))
    
    if not results:
        print("  No checkpoints found!")
        continue
    
    print(f"{'Model':<50} {'Val Score':>10} {'Epoch':>6}")
    print("-" * 70)
    for name, score, epoch in results:
        print(f"{name:<50} {score:>10.4f} {str(epoch):>6}")
    
    scores = [s for _, s, _ in results]
    mean_val = sum(scores) / len(scores)
    best_val = max(scores)
    best_name = [n for n, s, _ in results if s == best_val][0]
    
    all_results[config_name] = {
        'mean': mean_val, 'best': best_val, 'best_name': best_name,
        'scores': scores, 'results': results
    }
    
    print(f"\nMean val: {mean_val:.4f} (vs parity_v1 h=64: {mean_val - PARITY_V1_VAL:+.4f})")
    print(f"Best val: {best_val:.4f} ({best_name})")

print(f"\n{'='*70}")
print("SUMMARY")
print(f"{'='*70}")
print(f"{'Config':<25} {'Mean Val':>10} {'Best Val':>10} {'vs h=64':>10} {'Best Seed':>30}")
print("-" * 90)
print(f"{'parity_v1 (h=64, 3L)':<25} {PARITY_V1_VAL:>10.4f} {'0.2737':>10} {'baseline':>10} {'s43':>30}")
for name, data in sorted(all_results.items(), key=lambda x: -x[1]['mean']):
    delta = data['mean'] - PARITY_V1_VAL
    print(f"{name:<25} {data['mean']:>10.4f} {data['best']:>10.4f} {delta:>+10.4f} {data['best_name']:>30}")

print(f"\nINTERPRETATION:")
print(f"parity_v1 h=64: val=0.2692, LB=0.2814 (gap +0.0122)")
print(f"If mean val >= 0.275: STRONG PASS — submit best seed immediately")
print(f"If mean val 0.270-0.275: PASS — submit for LB gap test")
print(f"If mean val < 0.265: FAIL — larger vanilla GRU may overfit")

In [None]:
# Cell 4: Strip checkpoints + zip + save to Drive
import os, torch, glob, shutil
os.chdir("/content/competition_package")
os.makedirs("logs/slim", exist_ok=True)

for pattern in ["vanilla_h128", "vanilla_h144", "vanilla_h192", "vanilla_h192_2L"]:
    for pt in sorted(glob.glob(f"logs/{pattern}*.pt")):
        basename = os.path.basename(pt)
        if '_epoch' in basename:
            continue
        ckpt = torch.load(pt, map_location="cpu", weights_only=False)
        slim = {
            "model_state_dict": ckpt["model_state_dict"],
            "config": ckpt.get("config", {}),
            "best_score": ckpt.get("best_score", None),
            "best_epoch": ckpt.get("best_epoch", None),
        }
        out = f"logs/slim/{basename}"
        torch.save(slim, out)
        orig = os.path.getsize(pt) / 1e6
        new = os.path.getsize(out) / 1e6
        print(f"{basename}: {orig:.1f}MB -> {new:.1f}MB")

shutil.make_archive("/content/vanilla_scaling", "zip",
                     "/content/competition_package/logs/slim")
sz = os.path.getsize("/content/vanilla_scaling.zip") / 1e6
print(f"\nvanilla_scaling.zip: {sz:.1f}MB")

shutil.copy("/content/vanilla_scaling.zip",
            "/content/drive/MyDrive/wunderfund/vanilla_scaling.zip")
print("Saved to Drive: MyDrive/wunderfund/vanilla_scaling.zip")