In [1]:
import os
from pathlib import Path
from datetime import datetime

cwd = Path.cwd()

if cwd.name == "experiments":
    os.chdir(cwd.parent)

print("Working dir:", Path.cwd())

Working dir: c:\Users\vikto\Desktop\mat-stk2011


# Load imports and project modules

In [4]:
import numpy as np
import yaml
from sklearn.preprocessing import StandardScaler

from src.models.teacher import TeacherConfig, TeacherTrainer
from src.utils.seeds import seed_everything
from src.utils.metrics import quadratic_weighted_kappa
from src.utils.splits import get_stratified_folds


# Load config from YAML

In [5]:
cfg = yaml.safe_load(open("configs/teacher.yaml"))

seed = int(cfg["seed"])
n_splits = int(cfg["n_splits"])
patience = int(cfg["trainer"]["patience"])
max_epochs = int(cfg["trainer"]["max_epochs"])

t = cfg["teacher"]
teacher_cfg = TeacherConfig(
    hidden_layers=tuple(t["hidden_layers"]),
    dropout=float(t["dropout"]),
    lr=float(t["lr"]),
    weight_decay=float(t["weight_decay"]),
    batch_size=int(t["batch_size"]),
    n_classes=int(t["n_classes"]),
)

print("Config loaded")
print(cfg)

Config loaded
{'seed': 312, 'n_splits': 5, 'trainer': {'patience': 15, 'max_epochs': 200}, 'teacher': {'hidden_layers': [256, 128, 64], 'dropout': 0.2, 'lr': 0.0005, 'weight_decay': 0.001, 'batch_size': 128, 'n_classes': 6}}


# Load data

In [6]:
seed_everything(seed)

emb = np.load("data/cached_embeddings_jina.npz")
spacy = np.load("data/cached_features_spacy.npz")

X_emb = emb["X"]
y = emb["y"].astype(int)
X_spacy = spacy["X"]

X = np.hstack([X_emb, X_spacy])

print("Embeddings:", X_emb.shape)
print("SpaCy:", X_spacy.shape)
print("Teacher X:", X.shape)

Embeddings: (17307, 768)
SpaCy: (17307, 84)
Teacher X: (17307, 852)


# Cross-validation training

In [7]:
trainer = TeacherTrainer(
    teacher_cfg,
    patience=patience,
    max_epochs=max_epochs,
    verbose=True,
    print_every=10,
)

folds = get_stratified_folds(y, n_splits=n_splits, seed=seed)
n = len(y)

oof_probs = np.zeros((n, teacher_cfg.n_classes))
oof_logits = np.zeros((n, teacher_cfg.n_classes))
oof_hidden = []
fold_kappas = []
fold_histories = []

for i, (tr_idx, va_idx) in enumerate(folds):
    print(f"\n--- Fold {i+1}/{len(folds)} ---")

    seed_everything(seed + i)

    scaler = StandardScaler()
    X_tr = scaler.fit_transform(X[tr_idx])
    X_va = scaler.transform(X[va_idx])

    trainer.fit(
        X_train=X_tr,
        y_train=y[tr_idx],
        X_val=X_va,
        y_val=y[va_idx],
        metric_fn=quadratic_weighted_kappa,
    )

    probs = trainer.predict_proba(X_va)
    logits = trainer.predict_logits(X_va)
    hidden = trainer.extract_features(X_va)

    oof_probs[va_idx] = probs
    oof_logits[va_idx] = logits

    if i == 0:
        hidden_dim = hidden.shape[1]
        oof_hidden = np.zeros((n, hidden_dim))
    oof_hidden[va_idx] = hidden

    preds = probs.argmax(1) + 1
    kappa = quadratic_weighted_kappa(y[va_idx], preds)

    fold_kappas.append(kappa)
    fold_histories.append({
        "fold": i + 1,
        **trainer.history,
    })

    print(f"Fold κ: {kappa:.4f}")

oof_preds = oof_probs.argmax(1) + 1
oof_kappa = quadratic_weighted_kappa(y, oof_preds)

print("\n" + "=" * 50)
print(f"Fold kappas : {[f'{k:.4f}' for k in fold_kappas]}")
print(f"Mean fold κ : {np.mean(fold_kappas):.4f} ± {np.std(fold_kappas):.4f}")
print(f"OOF QWK     : {oof_kappa:.4f}")



--- Fold 1/5 ---
  epoch  10: loss=0.7282, val_κ=0.7928
  epoch  20: loss=0.5678, val_κ=0.7956
  epoch  30: loss=0.4264, val_κ=0.7845
  early stop epoch 32, best κ=0.8026
Fold κ: 0.8026

--- Fold 2/5 ---
  epoch  10: loss=0.7192, val_κ=0.7900
  epoch  20: loss=0.5571, val_κ=0.7816
  epoch  30: loss=0.4215, val_κ=0.7680
  early stop epoch 34, best κ=0.7913
Fold κ: 0.7913

--- Fold 3/5 ---
  epoch  10: loss=0.7218, val_κ=0.7906
  epoch  20: loss=0.5667, val_κ=0.7826
  epoch  30: loss=0.4258, val_κ=0.7826
  early stop epoch 34, best κ=0.7911
Fold κ: 0.7911

--- Fold 4/5 ---
  epoch  10: loss=0.7250, val_κ=0.7957
  epoch  20: loss=0.5572, val_κ=0.7903
  early stop epoch 27, best κ=0.8076
Fold κ: 0.8076

--- Fold 5/5 ---
  epoch  10: loss=0.7280, val_κ=0.7903
  epoch  20: loss=0.5632, val_κ=0.7920
  early stop epoch 24, best κ=0.8020
Fold κ: 0.8020

Fold kappas : ['0.8026', '0.7913', '0.7911', '0.8076', '0.8020']
Mean fold κ : 0.7989 ± 0.0066
OOF QWK     : 0.7991


In [8]:
import json
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M")
run_dir = Path(f"outputs/{timestamp}_teacher_cv")
run_dir.mkdir(parents=True, exist_ok=True)

np.savez(
    run_dir / "oof_predictions.npz",
    probs=oof_probs,
    logits=oof_logits,
    hidden=oof_hidden,
    y=y,
)

results = {
    "experiment": "teacher_cv",
    "timestamp": timestamp,
    "oof_qwk": oof_kappa,
    "fold_kappas": fold_kappas,
    "config": cfg,
    "folds": fold_histories,
}

with open(run_dir / "results.json", "w") as f:
    json.dump(results, f, indent=2)

with open(run_dir / "config.yaml", "w") as f:
    yaml.dump(cfg, f)

print(f"\nSaved to {run_dir}/")
print(f"  oof_predictions.npz  ({oof_probs.shape}, {oof_logits.shape}, {oof_hidden.shape})")
print(f"  results.json")
print(f"  config.yaml")



Saved to outputs\2026-02-20_16-53_teacher_cv/
  oof_predictions.npz  ((17307, 6), (17307, 6), (17307, 64))
  results.json
  config.yaml
