In [1]:
%load_ext autoreload
import os
from pathlib import Path

import torch
import torch.nn as nn

from src.pytorch_trainer import PyTorchTrainer
from src.data_loader import dataloader
import src.model_constructor as constructor

In [None]:
# Define paths
processed_dir = Path("data/processed/")
model_dir = Path("models/")
plots_path = Path("reports/figures/")
predictions_path = Path("data/predictions/")

# Dataset paths
path_r = processed_dir / "r.pt"
path_tiny = processed_dir / "tiny.pt"

# Load datasets
train_loader, val_loader, test_loader_tiny, test_loader_r, test_tiny= dataloader(path_tiny, path_r)

  data_tiny = torch.load(path_tiny)
  data_r = torch.load(path_r)


In [3]:
import itertools
def models_iterator(depths, filters_sizes, optimizers, drops, lrs):
    models_to_train = []
    for depth in depths:
        fs = filters_sizes[str(depth)]
        d = drops[str(depth)]
        configurations = list(itertools.product(fs, optimizers, d, lrs))

        for config in configurations:
            filters_size, optimizer, drop, lr = config

            nr_filters = filters_size[:depth]
            conv_layers = constructor.Conv(nr_conv=depth, nr_filters=nr_filters, maxpool_batchnorm=True)
            fc_size = filters_size[depth:]
            act_fun = ["ReLU"] * depth
            dropouts = drop
            fc_layers = constructor.FC(
                nr_fc=depth,
                fc_size=fc_size,
                act_funs=act_fun,
                dropouts=dropouts,
                in_features=conv_layers.finaldim,
                num_classes=62,
                batchnorm=True,
            )

            # Create the model using the CNN constructor
            model = constructor.CNN(
                conv_layers=conv_layers, fc_layers=fc_layers, num_classes=62, lr=lr, optim=optimizer
            )  # Assuming 62 classes as an example

            # Store the model and its parameters in the list
            model_info = {"name": f"{model.name}", "model": model, "params": {"lr": lr, "optimizer": optimizer}}

            models_to_train.append(model_info)

    return models_to_train


depths = [2, 4]
filters_sizes = {
    "2": [[8, 16, 160, 80], [32, 64, 320, 160]],
    "4": [[4, 8, 16, 32, 200, 200, 160, 80], [8, 16, 32, 64, 400, 320, 160, 80]],
}
lrs = [0.01, 0.001]
drops = {"2": [[0.0, 0.0], [0.5, 0.2]], "4": [[0.0] * 4, [0.5, 0.3, 0.3, 0.2]]}
optimizers = ["adam", "sgd"]

models_to_train = models_iterator(depths, filters_sizes, optimizers, drops, lrs)
print(f"list of {len(models_to_train)} models generated!!")

list of 32 models generated!!


In [4]:
config = models_to_train[16]

In [5]:
%autoreload 2
import copy

model = config["model"].to("cuda" if torch.cuda.is_available() else "cpu")
model_name = config["name"]
optimizer = config["params"]["optimizer"]

untrained = copy.deepcopy(model)

path_to_model = f"models/{model_name}"
path_to_predictions = os.path.join(path_to_model, "predictions")
path_to_plots = os.path.join(path_to_model, "plots")


optims = {"adam": torch.optim.Adam, "sgd": torch.optim.SGD}
optim_cls = optims[optimizer]

os.makedirs(path_to_model, exist_ok=True)
os.makedirs(path_to_plots, exist_ok=True)
os.makedirs(path_to_predictions, exist_ok=True)

optimizer = optim_cls(model.parameters(), lr=model.lr)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2)
trainer = PyTorchTrainer(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    criterion=nn.CrossEntropyLoss(),
    optimizer=optimizer,
    scheduler=scheduler,
)

torch.save(model.state_dict(), f"{path_to_model}/untrained.pt")

trainer.train(num_epochs=100, early_stopping_patience= 10)
model = trainer.best_model
trainer.save_best_model(path_to_model)
trainer.save_plots(path_to_plots)

logits_r, labels_r = trainer.predict(test_loader_r)
trainer.save_predictions(logits_r, f"{path_to_predictions}/r.npy")
logits_tiny, labels_tiny = trainer.predict(test_loader_tiny)
trainer.save_predictions(logits_tiny, f"{path_to_predictions}/tiny.npy")

Using device: cuda
Starting Training...

Epoch [1/100]: Train Loss: 3.9387, Train Acc: 4.72% | Val Loss: 4.0336, Val Acc: 5.28%
New best model found! Validation Accuracy: 5.28%
Epoch [2/100]: Train Loss: 3.7848, Train Acc: 7.05% | Val Loss: 3.8451, Val Acc: 5.79%
New best model found! Validation Accuracy: 5.79%
Epoch [3/100]: Train Loss: 3.6987, Train Acc: 8.33% | Val Loss: 3.7538, Val Acc: 8.58%
New best model found! Validation Accuracy: 8.58%
Epoch [4/100]: Train Loss: 3.6298, Train Acc: 9.63% | Val Loss: 3.8146, Val Acc: 8.43%
Epoch [5/100]: Train Loss: 3.5483, Train Acc: 11.46% | Val Loss: 3.7133, Val Acc: 8.47%
Epoch [6/100]: Train Loss: 3.4878, Train Acc: 12.57% | Val Loss: 3.5338, Val Acc: 11.88%
New best model found! Validation Accuracy: 11.88%
Epoch [7/100]: Train Loss: 3.4360, Train Acc: 13.43% | Val Loss: 3.6128, Val Acc: 10.63%
Epoch [8/100]: Train Loss: 3.3972, Train Acc: 14.51% | Val Loss: 3.5496, Val Acc: 11.80%
Epoch [9/100]: Train Loss: 3.3587, Train Acc: 15.04% | Val 

: 

: 

In [32]:
# Save the untrained model
untrained_model_path = model_dir / f"{model_name}_untrained.pt"
torch.save(model.state_dict(), untrained_model_path)

In [40]:
# Train the model
trainer.train(num_epochs=100, early_stopping_patience=10)

Starting Training...

Epoch [1/100]: Train Loss: 3.9994, Train Acc: 3.87% | Val Loss: 3.8363, Val Acc: 6.20%
New best model found! Validation Accuracy: 6.20%
Epoch [2/100]: Train Loss: 3.8588, Train Acc: 5.62% | Val Loss: 3.7587, Val Acc: 7.40%
New best model found! Validation Accuracy: 7.40%
Epoch [3/100]: Train Loss: 3.8023, Train Acc: 6.61% | Val Loss: 3.7219, Val Acc: 8.28%
New best model found! Validation Accuracy: 8.28%
Epoch [4/100]: Train Loss: 3.7562, Train Acc: 7.50% | Val Loss: 3.6751, Val Acc: 9.86%
New best model found! Validation Accuracy: 9.86%
Epoch [5/100]: Train Loss: 3.7055, Train Acc: 8.53% | Val Loss: 3.6343, Val Acc: 10.37%
New best model found! Validation Accuracy: 10.37%
Epoch [6/100]: Train Loss: 3.6634, Train Acc: 9.08% | Val Loss: 3.6652, Val Acc: 10.12%
Epoch [7/100]: Train Loss: 3.6250, Train Acc: 10.08% | Val Loss: 3.6209, Val Acc: 11.29%
New best model found! Validation Accuracy: 11.29%
Epoch [8/100]: Train Loss: 3.5942, Train Acc: 10.74% | Val Loss: 3.61

In [41]:
# Save the trained model
trained_model_path = model_dir / f"{model_name}.pt"
trainer.save_best_model(trained_model_path)

# Save predictions
predictions_path_r = predictions_path / f"{model_name}_r.npy"
predictions = trainer.predict(test_loader_r)
trainer.save_predictions(predictions, predictions_path_r)

# Save training plots
trainer.save_plots(plots_path)

Best model saved to models\decent.pt
Predictions saved to data\predictions\decent_r.npy
Loss plot saved to reports\figures_loss.png
Accuracy plot saved to reports\figures_accuracy.png


In [63]:
untrained_model = constructor.CNN(conv_layers=decent_conv, fc_layers=decent_fc, num_classes=62, lr=10, optim=optimizer)
trained_model = constructor.CNN(conv_layers=decent_conv, fc_layers=decent_fc, num_classes=62, lr=10, optim=optimizer)


untrained_model.load_state_dict(torch.load(untrained_model_path))
trained_model.load_state_dict(torch.load(trained_model_path))

  untrained_model.load_state_dict(torch.load(untrained_model_path))
  trained_model.load_state_dict(torch.load(trained_model_path))


<All keys matched successfully>

In [64]:
trained_model

CNN(
  (conv_layers): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (5): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (9): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU()
    (11): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc_layers): Sequential(
    (0): Linear(in_features=4096, out_features=300, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear

In [61]:
trained_model

CNN(
  (conv_layers): ModuleList(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (5): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (9): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU()
    (11): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc_layers): ModuleList(
    (0): Linear(in_features=4096, out_features=300, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear

In [55]:
def calculate_perturbed_accuracy(model, dataloader, device):
    model.eval()
    batch_correct = 0
    with torch.no_grad():
        for data, target in dataloader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            pred = output.argmax(dim=1)
            batch_correct += pred.eq(target).sum().item()
    perturbed_accuracy = batch_correct / len(dataloader.dataset)
    return perturbed_accuracy

calculate_perturbed_accuracy(trained_model, train_loader, 'cuda'if torch.cuda.is_available() else 'cpu')

0.3312153796024764

In [60]:
%autoreload 2
from src.measures_sharpeness import calculate_pac_bayes_metrics

metrics = calculate_pac_bayes_metrics(trained_model, untrained_model, train_loader, trainer.history['train_acc'][-1])

CNN(
  (conv_layers): ModuleList(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (5): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (9): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU()
    (11): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc_layers): ModuleList(
    (0): Linear(in_features=4096, out_features=300, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear

NotImplementedError: Module [ModuleList] is missing the required "forward" function