In [None]:
# Cell 0: Setup - clone repo, link data
import os, subprocess
REPO = "/kaggle/working/competition_package"
os.chdir("/kaggle/working")
os.system(f"rm -rf {REPO}")
os.system(f"git clone https://github.com/vincentvdo6/competition_package.git {REPO}")
os.chdir(REPO)
os.makedirs("datasets", exist_ok=True)
os.makedirs("logs", exist_ok=True)
os.system("ln -sf /kaggle/input/wunderfund-predictorium/train.parquet datasets/train.parquet")
os.system("ln -sf /kaggle/input/wunderfund-predictorium/valid.parquet datasets/valid.parquet")
print("Commit:", subprocess.check_output(["git", "rev-parse", "--short", "HEAD"], text=True).strip())
print("Ready!")

In [None]:
# Cell 1: Train 3x GRU Pearson (seeds 42-44)
# ~35 epochs x 5s/epoch x 3 seeds = ~9 min
import os
os.chdir("/kaggle/working/competition_package")
for seed in [42, 43, 44]:
    print(f"\n{'='*60}")
    print(f"Training gru_pearson_v1 seed {seed}")
    print(f"{'='*60}")
    os.system(
        f"python -u scripts/train.py "
        f"--config configs/gru_pearson_v1.yaml "
        f"--seed {seed} --device cuda"
    )

In [None]:
# Cell 2: Train 2x GRU+Attention clean (seeds 45-46)
# ~40 epochs x 33s/epoch x 2 seeds = ~44 min
import os
os.chdir("/kaggle/working/competition_package")
for seed in [45, 46]:
    print(f"\n{'='*60}")
    print(f"Training gru_attention_clean_v1 seed {seed}")
    print(f"{'='*60}")
    os.system(
        f"python -u scripts/train.py "
        f"--config configs/gru_attention_clean_v1.yaml "
        f"--seed {seed} --device cuda"
    )

In [None]:
# Cell 3: Train 2x GRU+Attention Pearson (seeds 42-43)
# ~40 epochs x 33s/epoch x 2 seeds = ~44 min
import os
os.chdir("/kaggle/working/competition_package")
for seed in [42, 43]:
    print(f"\n{'='*60}")
    print(f"Training gru_attention_pearson_v1 seed {seed}")
    print(f"{'='*60}")
    os.system(
        f"python -u scripts/train.py "
        f"--config configs/gru_attention_pearson_v1.yaml "
        f"--seed {seed} --device cuda"
    )

In [None]:
# Cell 4: Strip checkpoints and copy normalizers
import os, torch, glob, shutil
os.chdir("/kaggle/working/competition_package")
os.makedirs("logs/slim", exist_ok=True)

# Strip checkpoints (remove optimizer/scheduler, keep model weights only)
for pt in sorted(glob.glob("logs/*.pt")):
    try:
        ckpt = torch.load(pt, map_location="cpu", weights_only=False)
    except TypeError:
        ckpt = torch.load(pt, map_location="cpu")
    slim = {"model_state_dict": ckpt["model_state_dict"], "config": ckpt.get("config", {})}
    out = f"logs/slim/{os.path.basename(pt)}"
    torch.save(slim, out)
    orig = os.path.getsize(pt) / 1e6
    new = os.path.getsize(out) / 1e6
    print(f"{os.path.basename(pt)}: {orig:.1f}MB -> {new:.1f}MB")

# Copy normalizers alongside slim checkpoints
for npz in sorted(glob.glob("logs/normalizer_*.npz")):
    shutil.copy(npz, f"logs/slim/{os.path.basename(npz)}")
    print(f"Copied {os.path.basename(npz)}")

print(f"\n--- logs/slim/ contents ---")
for f in sorted(os.listdir("logs/slim")):
    sz = os.path.getsize(f"logs/slim/{f}") / 1e6
    print(f"  {f}: {sz:.1f}MB")

In [None]:
# Cell 5: Zip slim checkpoints for download
import shutil
shutil.make_archive("/kaggle/working/slim_checkpoints_pearson", "zip",
                     "/kaggle/working/competition_package/logs/slim")
import os
sz = os.path.getsize("/kaggle/working/slim_checkpoints_pearson.zip") / 1e6
print(f"slim_checkpoints_pearson.zip: {sz:.1f}MB")
print("Download from: /kaggle/working/slim_checkpoints_pearson.zip")

In [None]:
# Cell 6: Print validation scores summary
import os, glob, torch
os.chdir("/kaggle/working/competition_package")
print(f"{'Model':<45} {'Best Val Score':>15}")
print("-" * 62)
for pt in sorted(glob.glob("logs/*.pt")):
    try:
        ckpt = torch.load(pt, map_location="cpu", weights_only=False)
    except TypeError:
        ckpt = torch.load(pt, map_location="cpu")
    score = ckpt.get("best_score", ckpt.get("val_score", "N/A"))
    name = os.path.basename(pt)
    if isinstance(score, float):
        print(f"{name:<45} {score:>15.4f}")
    else:
        print(f"{name:<45} {str(score):>15}")