# Mixup Kill Test
**Goal**: Train vanilla GRU h=64 with sequence-level Mixup augmentation.

Mixup interpolates pairs of training sequences and their targets:
- `x' = lam*x_i + (1-lam)*x_j`, `y' = lam*y_i + (1-lam)*y_j`
- `lam ~ Beta(0.2, 0.2)`, applied with prob=0.25, annealed off in last 25% epochs
- Architecture and inference are IDENTICAL to base parity_v1

**Kill test**: 3 seeds (s42, s43, s44). Compare val avg vs base parity_v1 mean (0.2689).
- **Kill**: mean val avg < 0.2680
- **Scale up**: mean val avg > 0.2720
- **Base scores**: s42=0.2649, s43=0.2737, s44=0.2690

In [None]:
# Cell 0: Mount Drive, download data from Kaggle
import os, json

from google.colab import drive
drive.mount('/content/drive')
os.makedirs('/content/drive/MyDrive/wunderfund', exist_ok=True)

!pip install -q kaggle==1.6.14 --force-reinstall
os.makedirs('/root/.kaggle', exist_ok=True)
with open('/root/.kaggle/kaggle.json', 'w') as f:
    json.dump({"username": "vincentvdo6", "key": "FILL_IN"}, f)
os.chmod('/root/.kaggle/kaggle.json', 0o600)

os.makedirs('/content/data', exist_ok=True)
!kaggle datasets download -d vincentvdo6/wunderfund-predictorium -p /content/data/ --force
!unzip -o -q /content/data/wunderfund-predictorium.zip -d /content/data/
!ls /content/data/*.parquet

In [None]:
# Cell 1: Setup -- clone repo, link data
import os, subprocess
REPO = "/content/competition_package"

os.chdir("/content")
subprocess.run(["rm", "-rf", REPO], check=False)
subprocess.run(["git", "clone", "https://github.com/vincentvdo6/competition_package.git", REPO], check=True)
os.chdir(REPO)
os.makedirs("datasets", exist_ok=True)
os.makedirs("logs", exist_ok=True)

subprocess.run(["ln", "-sf", "/content/data/train.parquet", "datasets/train.parquet"], check=True)
subprocess.run(["ln", "-sf", "/content/data/valid.parquet", "datasets/valid.parquet"], check=True)

assert os.path.exists("datasets/train.parquet"), "train.parquet not found!"
assert os.path.exists("datasets/valid.parquet"), "valid.parquet not found!"
commit = subprocess.check_output(["git", "rev-parse", "--short", "HEAD"], text=True).strip()
print(f"Commit: {commit}")
print(f"GPU: {subprocess.check_output(['nvidia-smi', '--query-gpu=name', '--format=csv,noheader'], text=True).strip()}")
print("Ready!")

In [None]:
# Cell 2: Train Mixup kill test (3 seeds)
import os, subprocess, sys
os.chdir("/content/competition_package")

CONFIG = "configs/gru_parity_v1_mixup.yaml"
SEEDS = [42, 43, 44]

print("=== MIXUP KILL TEST ===")
print(f"Config: {CONFIG}")
print(f"Seeds: {SEEDS}")
print("=" * 60, flush=True)

for seed in SEEDS:
    print(f"\n{'='*60}")
    print(f"Training mixup seed {seed}")
    print(f"{'='*60}", flush=True)
    proc = subprocess.Popen(
        [sys.executable, "-u", "scripts/train.py",
         "--config", CONFIG,
         "--seed", str(seed), "--device", "cuda"],
        stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True
    )
    for line in proc.stdout:
        print(line, end="", flush=True)
    proc.wait()
    if proc.returncode != 0:
        print(f"ERROR: seed {seed} failed with rc={proc.returncode}")

print(f"\nAll training done!")

In [None]:
# Cell 3: Evaluate Mixup vs base
import os, glob, torch
os.chdir("/content/competition_package")

BASE_SCORES = {42: 0.2649, 43: 0.2737, 44: 0.2690}
BASE_MEAN = 0.2689

# Collect mixup results
mixup_scores = []
for pt in sorted(glob.glob("logs/gru_parity_v1_mixup_seed*.pt")):
    basename = os.path.basename(pt)
    if '_epoch' in basename:
        continue
    ckpt = torch.load(pt, map_location="cpu", weights_only=False)
    score = float(ckpt.get("best_score", 0))
    epoch = ckpt.get("best_epoch", "N/A")
    seed = int(basename.split("seed")[1].replace(".pt", ""))
    base = BASE_SCORES.get(seed, 0)
    delta = score - base
    mixup_scores.append((seed, score, epoch, delta))
    print(f"seed {seed}: val={score:.4f}, epoch={epoch}, base={base:.4f}, delta={delta:+.4f}")

if mixup_scores:
    scores = [s[1] for s in mixup_scores]
    deltas = [s[3] for s in mixup_scores]
    mean_score = sum(scores) / len(scores)
    mean_delta = sum(deltas) / len(deltas)
    
    print(f"\n=== KILL TEST RESULT ===")
    print(f"Mixup mean val:  {mean_score:.4f}")
    print(f"Base mean val:   {BASE_MEAN:.4f}")
    print(f"Mean delta:      {mean_delta:+.4f}")
    print(f"Best:            {max(scores):.4f}")
    print(f"Worst:           {min(scores):.4f}")
    print(f"Positive seeds:  {sum(1 for d in deltas if d > 0)}/{len(deltas)}")
    
    if mean_score < 0.2680:
        print(f"\n>>> KILL: mean {mean_score:.4f} < 0.2680 threshold")
    elif mean_score > 0.2720:
        print(f"\n>>> SCALE UP: mean {mean_score:.4f} > 0.2720 threshold!")
    else:
        print(f"\n>>> NEUTRAL: mean {mean_score:.4f} in [0.2680, 0.2720]. Consult Codex.")
else:
    print("No mixup checkpoints found!")

In [None]:
# Cell 4: Strip checkpoints + zip + save to Drive
import os, torch, glob, shutil
os.chdir("/content/competition_package")
os.makedirs("logs/slim", exist_ok=True)

for pt in sorted(glob.glob("logs/gru_parity_v1_mixup_seed*.pt")):
    basename = os.path.basename(pt)
    if '_epoch' in basename:
        continue
    ckpt = torch.load(pt, map_location="cpu", weights_only=False)
    slim = {
        "model_state_dict": ckpt["model_state_dict"],
        "config": ckpt.get("config", {}),
        "best_score": ckpt.get("best_score", None),
        "best_epoch": ckpt.get("best_epoch", None),
    }
    out = f"logs/slim/{basename}"
    torch.save(slim, out)
    orig = os.path.getsize(pt) / 1e6
    new = os.path.getsize(out) / 1e6
    print(f"{basename}: {orig:.1f}MB -> {new:.1f}MB")

shutil.make_archive("/content/mixup_kill_test", "zip",
                     "/content/competition_package/logs/slim")
sz = os.path.getsize("/content/mixup_kill_test.zip") / 1e6
print(f"\nmixup_kill_test.zip: {sz:.1f}MB")

shutil.copy("/content/mixup_kill_test.zip",
            "/content/drive/MyDrive/wunderfund/mixup_kill_test.zip")
print("Saved to Drive: MyDrive/wunderfund/mixup_kill_test.zip")