# Thyroid Ultrasound Classification — Training & Evaluation

This notebook lets you **train** any of the 4 supported models and **evaluate** them on the held-out test set — all from one place.

| Model key | Architecture |
|---|---|
| `res18` | ResNet-18 |
| `res50` | ResNet-50 |
| `densenet` | DenseNet-121 |
| `efficientnet` | EfficientNet-B0 |

## 0 · Setup — project root & imports

In [None]:
import os, sys
from pathlib import Path

# Make sure the project root is on the Python path
PROJECT_ROOT = Path(os.getcwd()).resolve()
# If notebook is in notebooks/, go one level up
if PROJECT_ROOT.name == 'notebooks':
    PROJECT_ROOT = PROJECT_ROOT.parent
os.chdir(PROJECT_ROOT)
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

print(f'Project root: {PROJECT_ROOT}')

## 1 · Choose a model & edit hyperparameters

Everything is driven by `config/config.yaml`. You can edit the file directly,
or override individual keys in the cell below.

In [None]:
import yaml

CONFIG_PATH = 'config/config.yaml'

# Load the config
with open(CONFIG_PATH) as f:
    cfg = yaml.safe_load(f)

#  Available model names:  res18 | res50 | densenet | efficientnet
cfg['model']['name']              = 'res18'

cfg['training']['epochs']         = 30
cfg['training']['batch_size']     = 8
cfg['training']['lr']             = 1e-4
cfg['training']['optimizer']      = 'adam'   # adam | adamw | sgd
cfg['training']['scheduler']      = 'cosine' # cosine | step | none
cfg['training']['loss']           = 'focal'  # cross_entropy | label_smoothing | focal | focal_ce
cfg['training']['loss_gamma']     = 2.0
cfg['training']['early_stopping_patience'] = 5

# Save the modified config back so train.py picks it up
with open(CONFIG_PATH, 'w') as f:
    yaml.dump(cfg, f, default_flow_style=False, sort_keys=False)

print('Config saved:')
print(f"  model      : {cfg['model']['name']}")
print(f"  epochs     : {cfg['training']['epochs']}")
print(f"  batch_size : {cfg['training']['batch_size']}")
print(f"  optimizer  : {cfg['training']['optimizer']}")
print(f"  loss       : {cfg['training']['loss']}")

## 2 · Train

In [None]:
from src.train import train

result = train(config_path=CONFIG_PATH)

print('\nTraining complete.')
print(f"  Best val_loss : {result['best_val_loss']:.4f}")
print(f"  Best val_acc  : {result['best_val_acc']:.1f}%")
print(f"  Best epoch    : {result['best_epoch']}")
print(f"  Log saved to  : {result['log_path']}")

## 3 · Plot training curves

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

log_df = pd.read_csv(result['log_path'])

fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# Loss
axes[0].plot(log_df['epoch'], log_df['train_loss'], label='Train loss')
axes[0].plot(log_df['epoch'], log_df['val_loss'],   label='Val loss')
axes[0].set_xlabel('Epoch'); axes[0].set_ylabel('Loss')
axes[0].set_title('Loss'); axes[0].legend(); axes[0].grid(True)

# Accuracy
axes[1].plot(log_df['epoch'], log_df['train_acc'], label='Train acc')
axes[1].plot(log_df['epoch'], log_df['val_acc'],   label='Val acc')
axes[1].set_xlabel('Epoch'); axes[1].set_ylabel('Accuracy (%)')
axes[1].set_title('Accuracy'); axes[1].legend(); axes[1].grid(True)

fig.suptitle(f"Training curves — {cfg['model']['name']}", fontsize=13)
fig.tight_layout()
plt.show()

## 4 · Evaluate on test set

In [None]:
from src.test import evaluate

metrics = evaluate(config_path=CONFIG_PATH)

## 5 · Display confusion matrix

In [None]:
from PIL import Image
import matplotlib.pyplot as plt

img = Image.open(metrics['cm_path'])
plt.figure(figsize=(5, 4))
plt.imshow(img)
plt.axis('off')
plt.title(f"Confusion Matrix - {cfg['model']['name']}")
plt.tight_layout()
plt.show()