# 07: Attention Model Seed Expansion

Train combined-loss attention models (gru_attention_clean_v1) with seeds 45-52.
We currently have 3 seeds (42-44). This expands to 11 total for better
ensemble diversity and selection.

**Config**: `configs/gru_attention_clean_v1.yaml`
**Estimated time**: ~15-20 min per seed on T4 GPU
**Run 2-3 seeds per Kaggle session**

In [None]:
# Upload competition_package.zip to Kaggle, then run:
import os
import zipfile

# Adjust path if needed
ZIP_PATH = '/kaggle/input/competition-package/competition_package.zip'
WORK_DIR = '/kaggle/working/competition_package'

if not os.path.exists(WORK_DIR):
    with zipfile.ZipFile(ZIP_PATH, 'r') as z:
        z.extractall('/kaggle/working/')

os.chdir(WORK_DIR)
print(f'Working directory: {os.getcwd()}')
print(f'Files: {os.listdir(".")[:10]}')

In [None]:
# Configuration: which seeds to train in this session
# Adjust SEEDS list per session (2-3 seeds per session)
# Session 1: [45, 46, 47]
# Session 2: [48, 49, 50]
# Session 3: [51, 52]

SEEDS = [45, 46, 47]  # <-- CHANGE PER SESSION
CONFIG = 'configs/gru_attention_clean_v1.yaml'
OUTPUT_DIR = '/kaggle/working'

In [None]:
import torch
import shutil

for seed in SEEDS:
    print(f'\n{"="*60}')
    print(f'Training attention model seed {seed}')
    print(f'{"="*60}')
    
    seed_dir = f'{OUTPUT_DIR}/attn_s{seed}'
    
    # Train
    ret = os.system(
        f'python scripts/train.py '
        f'--config {CONFIG} '
        f'--seed {seed} '
        f'--device auto'
    )
    
    if ret != 0:
        print(f'ERROR: Training failed for seed {seed}')
        continue
    
    # Find the checkpoint (saved as logs/<config_name>_seed<N>.pt)
    config_name = 'gru_attention_clean_v1'
    ckpt_name = f'{config_name}_seed{seed}.pt'
    ckpt_path = f'logs/{ckpt_name}'
    
    if not os.path.exists(ckpt_path):
        print(f'ERROR: Checkpoint not found at {ckpt_path}')
        continue
    
    # Strip checkpoint (keep only model_state_dict)
    ckpt = torch.load(ckpt_path, map_location='cpu')
    slim = {'model_state_dict': ckpt['model_state_dict']}
    slim_path = f'{OUTPUT_DIR}/attn_clean_seed{seed}.pt'
    torch.save(slim, slim_path)
    slim_size = os.path.getsize(slim_path) / 1024 / 1024
    print(f'Saved slim checkpoint: {slim_path} ({slim_size:.1f} MB)')
    
    # Copy normalizer
    norm_src = f'logs/normalizer_{config_name}_seed{seed}.npz'
    norm_dst = f'{OUTPUT_DIR}/normalizer_attn_clean_seed{seed}.npz'
    if os.path.exists(norm_src):
        shutil.copy(norm_src, norm_dst)
        print(f'Saved normalizer: {norm_dst}')
    else:
        print(f'WARNING: Normalizer not found at {norm_src}')
    
    # Report val score from training history
    val_score = ckpt.get('best_score', 'N/A')
    best_epoch = ckpt.get('best_epoch', 'N/A')
    print(f'Val score: {val_score}, Best epoch: {best_epoch}')
    print()

In [None]:
# List all output files for download
print('\nOutput files to download:')
print('=' * 60)
for f in sorted(os.listdir(OUTPUT_DIR)):
    if f.endswith('.pt') or f.endswith('.npz'):
        size = os.path.getsize(f'{OUTPUT_DIR}/{f}') / 1024 / 1024
        print(f'  {f} ({size:.1f} MB)')

## Post-Training Checklist

1. Download all `attn_clean_seed{N}.pt` and `normalizer_attn_clean_seed{N}.npz` files
2. Place in `C:\Users\Vincent\Downloads\` for use with `build_mixed_ensemble.py`
3. Run local validation with `validate_ensemble_local.py` to score new models
4. Update CLAUDE.md with val scores for new seeds