# TCN Seed Expansion (Colab)
Trains 8 base TCN seeds (s47-54) + 5 k=5 alt-config seeds (s42-46).
Existing: 5 base TCN seeds (s42-46, mean 0.2650, best s45=0.2688).

**Runtime**: Use GPU (T4). High-RAM not needed (TCN is tiny, 9K params).

In [None]:
# Cell 0: Mount Drive, download data from Kaggle
import os, json

from google.colab import drive
drive.mount('/content/drive')
os.makedirs('/content/drive/MyDrive/wunderfund', exist_ok=True)

!pip install -q kaggle==1.6.14 --force-reinstall
os.makedirs('/root/.kaggle', exist_ok=True)
with open('/root/.kaggle/kaggle.json', 'w') as f:
    json.dump({"username": "vincentvdo6", "key": "KGAT_17c43012d9e77edf2c183a25acb1489b"}, f)
os.chmod('/root/.kaggle/kaggle.json', 0o600)

os.makedirs('/content/data', exist_ok=True)
!kaggle datasets download -d vincentvdo6/wunderfund-predictorium -p /content/data/ --force
!unzip -o -q /content/data/wunderfund-predictorium.zip -d /content/data/
!ls /content/data/*.parquet

In [None]:
# Cell 1: Clone repo, link data, create k5 config if missing
import os, subprocess
REPO = "/content/competition_package"

os.chdir("/content")
os.system(f"rm -rf {REPO}")
os.system(f"git clone https://github.com/vincentvdo6/competition_package.git {REPO}")
os.chdir(REPO)
os.makedirs("datasets", exist_ok=True)
os.makedirs("logs", exist_ok=True)

os.system('ln -sf /content/data/train.parquet datasets/train.parquet')
os.system('ln -sf /content/data/valid.parquet datasets/valid.parquet')

assert os.path.exists("datasets/train.parquet"), "train.parquet not found!"
assert os.path.exists("datasets/valid.parquet"), "valid.parquet not found!"
print("Commit:", subprocess.check_output(["git", "rev-parse", "--short", "HEAD"], text=True).strip())
print(f"GPU: {os.popen('nvidia-smi --query-gpu=name --format=csv,noheader').read().strip()}")

# Write tcn_k5_v1.yaml if not in repo yet
k5_config = os.path.join(REPO, "configs", "tcn_k5_v1.yaml")
if not os.path.exists(k5_config):
    print("tcn_k5_v1.yaml not in repo, creating...")
    with open(k5_config, "w") as f:
        f.write("""# TCN variant \u2014 wider kernel for longer receptive field\n# kernel_size=5, receptive field 253 steps (vs 127 for k=3). ~9K params.\n\nmodel:\n  type: tcn\n  input_size: 42\n  hidden_channels: 32\n  kernel_size: 5\n  dilations: [1, 2, 4, 8, 16, 32]\n  dropout: 0.15\n  output_size: 2\n\ntraining:\n  lr: 0.001\n  weight_decay: 1e-4\n  epochs: 50\n  batch_size: 192\n  gradient_clip: 1.0\n  early_stopping_patience: 10\n  loss: combined\n  weighted_ratio: 0.62\n  use_amp: true\n  scheduler:\n    type: reduce_on_plateau\n    factor: 0.5\n    patience: 5\n    min_lr: 1e-6\n\ndata:\n  train_path: datasets/train.parquet\n  valid_path: datasets/valid.parquet\n  normalize: true\n  derived_features: true\n\nevaluation:\n  clip_predictions: true\n  clip_range: [-6, 6]\n\nlogging:\n  log_dir: logs\n  save_every: 5\n""")
    print("Created tcn_k5_v1.yaml")
else:
    print("tcn_k5_v1.yaml already in repo")

print("Ready!")

In [None]:
# Cell 2: tcn_base_v1 seeds 47-54 (8 new same-config seeds)
# Existing: s42-46 (5 seeds, mean 0.2650, best s45=0.2688)
# TCN trains fast (~2 min/seed on T4), total ~16 min
import os, subprocess, sys, time, torch
os.chdir("/content/competition_package")

# Verify GPU first
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    device = "cuda"
else:
    print("WARNING: CUDA not available, falling back to CPU (slower but OK for 9K params)")
    device = "cpu"

for seed in range(47, 55):
    print(f"\n{'='*60}")
    print(f"Training tcn_base_v1 seed {seed}")
    print(f"{'='*60}", flush=True)
    t0 = time.time()
    proc = subprocess.Popen(
        [sys.executable, "-u", "scripts/train.py",
         "--config", "configs/tcn_base_v1.yaml",
         "--seed", str(seed), "--device", device],
        stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
        text=True, bufsize=1
    )
    for line in proc.stdout:
        print(line, end="", flush=True)
    proc.wait()
    elapsed = time.time() - t0
    if proc.returncode != 0:
        print(f"ERROR: seed {seed} failed with return code {proc.returncode}")
    else:
        print(f"Seed {seed} done in {elapsed:.0f}s")

print("\ntcn_base_v1 expansion done: seeds 47-54")

In [None]:
# Cell 3: tcn_k5_v1 seeds 42-46 (5 seeds of alt config, wider kernel)
# New config: kernel_size=5, receptive field 253 steps (vs 127 for k=3)
# Same params (~9K), same speed. Different temporal reach = diversity.
# Kill rule: mean val < 0.2400 = kill. > 0.2500 = viable.
import os, subprocess, sys, time, torch
os.chdir("/content/competition_package")

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

for seed in range(42, 47):
    print(f"\n{'='*60}")
    print(f"Training tcn_k5_v1 seed {seed}")
    print(f"{'='*60}", flush=True)
    t0 = time.time()
    proc = subprocess.Popen(
        [sys.executable, "-u", "scripts/train.py",
         "--config", "configs/tcn_k5_v1.yaml",
         "--seed", str(seed), "--device", device],
        stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
        text=True, bufsize=1
    )
    for line in proc.stdout:
        print(line, end="", flush=True)
    proc.wait()
    elapsed = time.time() - t0
    if proc.returncode != 0:
        print(f"ERROR: seed {seed} failed with return code {proc.returncode}")
    else:
        print(f"Seed {seed} done in {elapsed:.0f}s")

print("\ntcn_k5_v1 pilot done: seeds 42-46")

In [None]:
# Cell 4: Compare all TCN val scores
import os, glob, torch
os.chdir("/content/competition_package")

results = []
for pt in sorted(glob.glob("logs/*.pt")):
    try:
        ckpt = torch.load(pt, map_location="cpu", weights_only=False)
    except TypeError:
        ckpt = torch.load(pt, map_location="cpu")
    score = ckpt.get("best_score", ckpt.get("val_score", None))
    name = os.path.basename(pt)
    if isinstance(score, (int, float)):
        results.append((name, float(score)))
    else:
        results.append((name, 0.0))

results.sort(key=lambda x: x[1], reverse=True)

print(f"{'Rank':<5} {'Model':<55} {'Val Score':>10}")
print("-" * 72)
for i, (name, score) in enumerate(results, 1):
    print(f"{i:<5} {name:<55} {score:>10.4f}")

# Summary per config
base = [(n, s) for n, s in results if "tcn_base" in n]
k5 = [(n, s) for n, s in results if "tcn_k5" in n]

for label, group in [("tcn_base_v1 (k=3)", base), ("tcn_k5_v1 (k=5)", k5)]:
    if not group:
        continue
    scores = [s for _, s in group]
    mean = sum(scores) / len(scores)
    std = (sum((s - mean) ** 2 for s in scores) / len(scores)) ** 0.5
    print(f"\n--- {label}: {len(group)} seeds ---")
    print(f"  Mean: {mean:.4f}, Std: {std:.4f}")
    print(f"  Best: {max(scores):.4f}, Worst: {min(scores):.4f}")
    if mean > 0.2500:
        print(f"  VERDICT: VIABLE for ensemble")
    elif mean > 0.2400:
        print(f"  VERDICT: MARGINAL")
    else:
        print(f"  VERDICT: KILL")

In [None]:
# Cell 5: Strip checkpoints + zip + save to Drive
import os, torch, glob, shutil
os.chdir("/content/competition_package")
os.makedirs("logs/slim", exist_ok=True)

for pt in sorted(glob.glob("logs/*.pt")):
    try:
        ckpt = torch.load(pt, map_location="cpu", weights_only=False)
    except TypeError:
        ckpt = torch.load(pt, map_location="cpu")
    slim = {
        "model_state_dict": ckpt["model_state_dict"],
        "config": ckpt.get("config", {}),
        "best_score": ckpt.get("best_score", None),
    }
    out = f"logs/slim/{os.path.basename(pt)}"
    torch.save(slim, out)
    orig = os.path.getsize(pt) / 1e6
    new = os.path.getsize(out) / 1e6
    print(f"{os.path.basename(pt)}: {orig:.1f}MB -> {new:.1f}MB")

for npz in sorted(glob.glob("logs/normalizer_*.npz")):
    shutil.copy(npz, f"logs/slim/{os.path.basename(npz)}")
    print(f"Copied {os.path.basename(npz)}")

print(f"\n--- logs/slim/ contents ({len(os.listdir('logs/slim'))} files) ---")
for f in sorted(os.listdir("logs/slim")):
    sz = os.path.getsize(f"logs/slim/{f}") / 1e6
    print(f"  {f}: {sz:.1f}MB")

# Zip
shutil.make_archive("/content/tcn_expansion", "zip",
                     "/content/competition_package/logs/slim")
sz = os.path.getsize("/content/tcn_expansion.zip") / 1e6
print(f"\ntcn_expansion.zip: {sz:.1f}MB")

# Save to Drive
shutil.copy("/content/tcn_expansion.zip", "/content/drive/MyDrive/wunderfund/tcn_expansion.zip")
print("Saved to Drive: MyDrive/wunderfund/tcn_expansion.zip")

# Also download directly
from google.colab import files
files.download("/content/tcn_expansion.zip")