In [None]:
# Cell 0: Mount Drive, download data from Kaggle
import os, json

# Mount Drive for saving outputs
from google.colab import drive
drive.mount('/content/drive')
os.makedirs('/content/drive/MyDrive/wunderfund', exist_ok=True)

# Install pinned kaggle + set credentials
!pip install -q kaggle==1.6.14 --force-reinstall
os.makedirs('/root/.kaggle', exist_ok=True)
with open('/root/.kaggle/kaggle.json', 'w') as f:
    json.dump({"username": "vincentvdo6", "key": "KGAT_17c43012d9e77edf2c183a25acb1489b"}, f)
os.chmod('/root/.kaggle/kaggle.json', 0o600)

# Download + unzip dataset
os.makedirs('/content/data', exist_ok=True)
!kaggle datasets download -d vincentvdo6/wunderfund-predictorium -p /content/data/ --force
!unzip -o -q /content/data/wunderfund-predictorium.zip -d /content/data/
!ls /content/data/*.parquet

In [None]:
# Cell 1: Setup — clone repo, link data
import os, subprocess
REPO = "/content/competition_package"

os.chdir("/content")
os.system(f"rm -rf {REPO}")
os.system(f"git clone https://github.com/vincentvdo6/competition_package.git {REPO}")
os.chdir(REPO)
os.makedirs("datasets", exist_ok=True)
os.makedirs("logs", exist_ok=True)

# Link data from Kaggle download
os.system('ln -sf /content/data/train.parquet datasets/train.parquet')
os.system('ln -sf /content/data/valid.parquet datasets/valid.parquet')

# Verify
assert os.path.exists("datasets/train.parquet"), "train.parquet not found!"
assert os.path.exists("datasets/valid.parquet"), "valid.parquet not found!"
print("Commit:", subprocess.check_output(["git", "rev-parse", "--short", "HEAD"], text=True).strip())
print(f"GPU: {os.popen('nvidia-smi --query-gpu=name --format=csv,noheader').read().strip()}")
print("Ready!")

In [None]:
# Cell 2: Train baseline_match × 3 seeds (FOUNDATION — must run first)
# Architecture: h=64, 3 layers, 32 raw features, linear output
# Matches official baseline that scored 0.2761 LB
# Expected: ~5 min per seed
import os, subprocess
os.chdir("/content/competition_package")

configs = [
    ('gru_baseline_match_v1', 'configs/gru_baseline_match_v1.yaml'),
]

for config_name, config_path in configs:
    for seed in [42, 43, 44]:
        print(f"\n{'='*60}")
        print(f'Training {config_name} seed {seed}')
        print(f"{'='*60}", flush=True)
        p = subprocess.Popen(
            ['python', '-u', 'scripts/train.py',
             '--config', config_path,
             '--seed', str(seed), '--device', 'cuda'],
            stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
            text=True, bufsize=1
        )
        for line in p.stdout:
            print(line, end='')
        rc = p.wait()
        if rc != 0:
            print(f'ERROR: seed {seed} failed with exit code {rc}')

print('\nBaseline match training done!')

In [None]:
# Cell 3: Train chrono init × 3 seeds
# Change vs baseline_match: chrono initialization for GRU update gate
import os, subprocess
os.chdir("/content/competition_package")

for seed in [42, 43, 44]:
    print(f"\n{'='*60}")
    print(f'Training gru_v2_chrono seed {seed}')
    print(f"{'='*60}", flush=True)
    p = subprocess.Popen(
        ['python', '-u', 'scripts/train.py',
         '--config', 'configs/gru_v2_chrono.yaml',
         '--seed', str(seed), '--device', 'cuda'],
        stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
        text=True, bufsize=1
    )
    for line in p.stdout:
        print(line, end='')
    rc = p.wait()
    if rc != 0:
        print(f'ERROR: seed {seed} failed with exit code {rc}')

print('\nChrono training done!')

In [None]:
# Cell 4: Train augmentation × 3 seeds
# Change vs baseline_match: variance stretch/compress augmentation
import os, subprocess
os.chdir("/content/competition_package")

for seed in [42, 43, 44]:
    print(f"\n{'='*60}")
    print(f'Training gru_v2_aug seed {seed}')
    print(f"{'='*60}", flush=True)
    p = subprocess.Popen(
        ['python', '-u', 'scripts/train.py',
         '--config', 'configs/gru_v2_aug.yaml',
         '--seed', str(seed), '--device', 'cuda'],
        stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
        text=True, bufsize=1
    )
    for line in p.stdout:
        print(line, end='')
    rc = p.wait()
    if rc != 0:
        print(f'ERROR: seed {seed} failed with exit code {rc}')

print('\nAugmentation training done!')

In [None]:
# Cell 5: Train SWA × 3 seeds
# Change vs baseline_match: SWA after epoch 30 (constant LR + weight averaging)
import os, subprocess
os.chdir("/content/competition_package")

for seed in [42, 43, 44]:
    print(f"\n{'='*60}")
    print(f'Training gru_v2_swa seed {seed}')
    print(f"{'='*60}", flush=True)
    p = subprocess.Popen(
        ['python', '-u', 'scripts/train.py',
         '--config', 'configs/gru_v2_swa.yaml',
         '--seed', str(seed), '--device', 'cuda'],
        stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
        text=True, bufsize=1
    )
    for line in p.stdout:
        print(line, end='')
    rc = p.wait()
    if rc != 0:
        print(f'ERROR: seed {seed} failed with exit code {rc}')

print('\nSWA training done!')

In [None]:
# Cell 6: Evaluate ALL configs — kill test comparison
# Baseline match is the CONTROL. Chrono/aug/SWA are treatments.
# Kill test: pass if treatment mean val > baseline_match mean val + 0.0010
# Also: baseline_match must beat our old p1 mean (0.2627) to validate the arch change.
import os, torch, glob
os.chdir("/content/competition_package")

configs_to_eval = [
    ('baseline_match', 'gru_baseline_match_v1'),
    ('chrono', 'gru_v2_chrono'),
    ('augmentation', 'gru_v2_aug'),
    ('swa', 'gru_v2_swa'),
]

all_results = {}
for label, prefix in configs_to_eval:
    print(f'\n--- {label} ({prefix}) ---')
    scores = []
    details = []
    for pt in sorted(glob.glob(f'logs/{prefix}_seed*.pt')):
        if '_epoch' in pt:
            continue
        ckpt = torch.load(pt, map_location='cpu', weights_only=False)
        score = float(ckpt.get('best_score', 0.0))
        epoch = ckpt.get('best_epoch', 0)
        name = os.path.basename(pt)
        scores.append(score)
        details.append((name, score, epoch))
        print(f'  {name}: val={score:.4f} (best epoch {epoch})')
    if scores:
        mean_score = sum(scores) / len(scores)
        print(f'  Mean: {mean_score:.4f}, Min: {min(scores):.4f}, Max: {max(scores):.4f}')
        all_results[label] = {'scores': scores, 'mean': mean_score, 'details': details}
    else:
        print('  No checkpoints found!')
        all_results[label] = {'scores': [], 'mean': 0, 'details': []}

print(f"\n{'='*60}")
print('KILL TEST SUMMARY')
print(f"{'='*60}")
print(f'Old p1 control mean: 0.2627')
print()

bm = all_results.get('baseline_match', {})
bm_mean = bm.get('mean', 0)
print(f'baseline_match mean: {bm_mean:.4f}')
if bm_mean >= 0.2650:
    print(f'  ARCH CHANGE PASS! {bm_mean:.4f} >= 0.2650 (above p1 + 0.0023)')
elif bm_mean >= 0.2627:
    print(f'  MARGINAL: {bm_mean:.4f} >= 0.2627 but < 0.2650')
else:
    print(f'  ARCH CHANGE FAIL: {bm_mean:.4f} < 0.2627 (worse than old p1)')
print()

for label in ['chrono', 'augmentation', 'swa']:
    r = all_results.get(label, {})
    r_mean = r.get('mean', 0)
    delta = r_mean - bm_mean
    r_scores = r.get('scores', [])
    n_positive = sum(1 for s, bm_s in zip(r_scores, bm.get('scores', []))
                     if s > bm_s) if r_scores and bm.get('scores') else 0
    print(f'{label}: mean={r_mean:.4f}, delta={delta:+.4f}, positive={n_positive}/{len(r_scores)}')
    if delta >= 0.0010 and n_positive >= 2:
        print(f'  PASS! Meaningful improvement over baseline_match')
    elif delta > 0:
        print(f'  MARGINAL: Positive but not significant')
    else:
        print(f'  FAIL: No improvement')

In [None]:
# Cell 7: Training curves for all configs
import json, glob, os
os.chdir("/content/competition_package")

for prefix_label in ['gru_baseline_match_v1', 'gru_v2_chrono', 'gru_v2_aug', 'gru_v2_swa']:
    print(f'\n--- {prefix_label} ---')
    for hist_file in sorted(glob.glob(f'logs/training_history_{prefix_label}*.json')):
        with open(hist_file) as f:
            hist = json.load(f)
        name = os.path.basename(hist_file).replace('training_history_', '').replace('.json', '')
        scores = [s['avg'] for s in hist['val_scores']]
        t0_scores = [s['t0'] for s in hist['val_scores']]
        t1_scores = [s['t1'] for s in hist['val_scores']]
        best_idx = scores.index(max(scores))
        print(f'  {name}:')
        print(f'    Epochs: {len(scores)}, Best avg: {max(scores):.4f} at epoch {best_idx+1}')
        print(f'    Best t0: {t0_scores[best_idx]:.4f}, Best t1: {t1_scores[best_idx]:.4f}')
        print(f'    t0/t1 ratio: {t0_scores[best_idx]/max(t1_scores[best_idx], 1e-8):.2f}')
        print(f'    Last 5 avg: {["{:.4f}".format(s) for s in scores[-5:]]}')

In [None]:
# Cell 8: Strip checkpoints + zip + save to Drive
# Run AFTER reviewing kill test results
import os, torch, glob, shutil
os.chdir("/content/competition_package")
os.makedirs('logs/slim', exist_ok=True)

prefixes = ['gru_baseline_match_v1', 'gru_v2_chrono', 'gru_v2_aug', 'gru_v2_swa']
for prefix in prefixes:
    for pt in sorted(glob.glob(f'logs/{prefix}_*.pt')):
        if '_epoch' in pt:
            continue
        ckpt = torch.load(pt, map_location='cpu', weights_only=False)
        slim = {
            'model_state_dict': ckpt['model_state_dict'],
            'config': ckpt.get('config', {}),
            'best_score': ckpt.get('best_score', None),
        }
        out = f'logs/slim/{os.path.basename(pt)}'
        torch.save(slim, out)
        orig = os.path.getsize(pt) / 1e6
        new = os.path.getsize(out) / 1e6
        print(f'{os.path.basename(pt)}: {orig:.1f}MB -> {new:.1f}MB')
    # Copy normalizers
    for npz in sorted(glob.glob(f'logs/normalizer_{prefix}*.npz')):
        shutil.copy(npz, f'logs/slim/{os.path.basename(npz)}')
        print(f'Copied {os.path.basename(npz)}')

print(f'\n--- logs/slim/ contents ({len(os.listdir("logs/slim"))} files) ---')
total_mb = 0
for f in sorted(os.listdir('logs/slim')):
    sz = os.path.getsize(f'logs/slim/{f}') / 1e6
    total_mb += sz
    print(f'  {f}: {sz:.1f}MB')
print(f'  Total: {total_mb:.1f}MB')

# Zip for download
shutil.make_archive('/content/baseline_match_kill_test', 'zip',
                    '/content/competition_package/logs/slim')
sz = os.path.getsize('/content/baseline_match_kill_test.zip') / 1e6
print(f'\nbaseline_match_kill_test.zip: {sz:.1f}MB')

# Save to Drive
shutil.copy('/content/baseline_match_kill_test.zip',
            '/content/drive/MyDrive/wunderfund/baseline_match_kill_test.zip')
print('Saved to Drive: MyDrive/wunderfund/baseline_match_kill_test.zip')