### Goal
This code example shows you how to load the stored training log, model hyperparams and model checkpoints:
- Training log: this keeps track of the training loss over the epochs (or any other data you may store during the training process).
- Model hyperparams: this keeps track of all model hyperparams and weight init schemes of your model.
- Model checkpoints: these are the stored weights of your model.

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
import devtorch
%load_ext autoreload
%autoreload 2

In [2]:
class ANNClassifier(devtorch.DevModel):
    
    def __init__(self, n_in, n_hidden, n_out):
        super().__init__()
        self._n_in = n_in
        self._n_hidden = n_hidden
        self._n_out = n_out
        self.layer1 = nn.Linear(n_in, n_hidden, bias=False)
        self.layer2 = nn.Linear(n_hidden, n_out, bias=False)
        self.init_weight(self.layer1.weight, "glorot_uniform")
        self.init_weight(self.layer2.weight, "glorot_uniform")
    
    @property
    def hyperparams(self):
        return {**super().hyperparams, "params": {"n_in": self._n_in, "n_hidden": self._n_hidden, "n_out": self._n_out}}
    
    def forward(self, x):
        x = F.leaky_relu(self.layer1(x.flatten(1, 3)))
        return F.leaky_relu(self.layer2(x))

In [3]:
class CustomTrainer(devtorch.Trainer):
    
    def __init__(self, root, model_id, model, train_dataset, n_epochs=100, batch_size=128, lr=0.001, device="cuda"):
        super().__init__(root=root, id=model_id, model=model, train_dataset=train_dataset, n_epochs=n_epochs, batch_size=batch_size, lr=lr, device=device)
        self.log = {**self.log, "model_weight_mag": []}  # We extend the log of the devtorch trainer
        self._model_weight_mag = 0
      
    @staticmethod
    def load_model(root, model_id):
        
        def model_loader(hyperparams):
            return ANNClassifier(**hyperparams["model"]["params"])
        
        return devtorch.load_model(root, model_id, model_loader)
    
    def loss(self, output, target, model):        
        with torch.no_grad():
            self._model_weight_mag = model.layer1.weight.abs().mean() + model.layer2.weight.abs().mean()
            self._model_weight_mag = self._model_weight_mag.cpu().item()
        return F.cross_entropy(output, target.long())
    
    def on_epoch_complete(self, save):
        train_loss = self.log["train_loss"][-1]
        self.log["model_weight_mag"].append(self._model_weight_mag)
        self.save_log()
        print(f"Epoch train_loss={train_loss:.4f} (pred_std={self._model_weight_mag:.4f})")

In [19]:
model = ANNClassifier(784, 2000, 10)
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
train_dataset = datasets.MNIST("../../data", train=True, download=True, transform=transform)
test_dataset = datasets.MNIST("../../data", train=False, download=True, transform=transform)

root = "../../data"  # where to save the checkpoint
model_id = "loading"  # the name of the checkpoint - if not is provided devtorch auto generates this.

trainer = CustomTrainer(root, model_id, model, train_dataset, n_epochs=10, batch_size=128, lr=0.001, device="cuda")
trainer.train(save=True)

INFO:trainer:Completed epoch 0 with loss 123.7608951269649 in 7.7229s
Epoch train_loss=123.7609 (pred_std=0.0551)
INFO:trainer:Completed epoch 1 with loss 39.99248542985879 in 7.7199s
Epoch train_loss=39.9925 (pred_std=0.0595)
INFO:trainer:Completed epoch 2 with loss 23.93866807280574 in 7.7223s
Epoch train_loss=23.9387 (pred_std=0.0639)
INFO:trainer:Completed epoch 3 with loss 16.409666340623517 in 7.7304s
Epoch train_loss=16.4097 (pred_std=0.0681)
INFO:trainer:Completed epoch 4 with loss 14.512209695458296 in 7.7263s
Epoch train_loss=14.5122 (pred_std=0.0724)
INFO:trainer:Completed epoch 5 with loss 13.020273453548725 in 7.5502s
Epoch train_loss=13.0203 (pred_std=0.0762)
INFO:trainer:Completed epoch 6 with loss 9.42313642512454 in 7.5423s
Epoch train_loss=9.4231 (pred_std=0.0794)
INFO:trainer:Completed epoch 7 with loss 10.521217444758804 in 7.5297s
Epoch train_loss=10.5212 (pred_std=0.0831)
INFO:trainer:Completed epoch 8 with loss 8.517808600507124 in 7.5542s
Epoch train_loss=8.5178

### Load training log

In [5]:
log = devtorch.load_log(root, model_id)
log

Unnamed: 0,train_loss,duration,model_weight_mag
0,123.760895,7.722918,0.055137
1,39.992485,7.719874,0.059459
2,23.938668,7.72235,0.063938
3,16.409666,7.730401,0.068137
4,14.51221,7.726257,0.072363
5,13.020273,7.550247,0.076227
6,9.423136,7.542302,0.079395
7,10.521217,7.529686,0.083117
8,8.517809,7.554157,0.08641
9,8.399855,7.525112,0.089552


### Load model hyperparams

In [6]:
hyperparams = devtorch.load_hyperparams(root, model_id)
hyperparams

{'trainer': {'date': '2024-04-22-15:47:47',
  'n_epochs': 10,
  'batch_size': 128,
  'lr': 0.001,
  'dtype': 'torch.float32',
  'grad_clip_type': None,
  'grad_clip_value': None},
 'dataset': {'name': 'MNIST'},
 'model': {'name': 'ANNClassifier',
  'weights': {'layer1.weight': {'init_type': 'glorot_uniform',
    'dtype': 'torch.float32',
    'params': {}},
   'layer2.weight': {'init_type': 'glorot_uniform',
    'dtype': 'torch.float32',
    'params': {}}},
  'params': {'n_in': 784, 'n_hidden': 2000, 'n_out': 10}}}

### Load trained model and check accuracy

In [7]:
model = CustomTrainer.load_model(root, model_id)

In [10]:
def eval_metric(output, target):
    return (torch.max(output, 1)[1] == target).sum().cpu().item()

scores = devtorch.compute_metric(model, test_dataset, eval_metric, batch_size=256)
print(f"Accuracy = {torch.Tensor(scores).sum()/len(test_dataset)}")

Accuracy = 0.9767000079154968
