# Setup squeezenet model for image classification

torchvision.models.squeezenet1_1(pretrained=True)



In [7]:
import torch
import torchvision.transforms as transforms
from torchvision.datasets import MNIST,FashionMNIST
from torch.utils.data import DataLoader, Dataset
import copy
from PIL import Image
import os
import random


# Load the base model ONCE
base_model = torch.hub.load('pytorch/vision:v0.10.0', 'squeezenet1_0', pretrained=True)


modelMNIST = copy.deepcopy(base_model)
modelFashionMNIST = copy.deepcopy(base_model)

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.Grayscale(num_output_channels=3),  # convert 1->3 channels
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],  # ImageNet stats
                         std=[0.229, 0.224, 0.225])
])




# Load original datasets
dsMNIST = MNIST(root='./data/raw/MNIST', train=True, download=True, transform=transform)
dsFashionMNIST = FashionMNIST(root='./data/raw/FashionMNIST', train=True, download=True, transform=transform)
dstestMNIST = MNIST(root='./data/raw/MNIST', train=False, download=True, transform=transform)
dstestFashionMNIST = FashionMNIST(root='./data/raw/FashionMNIST', train=False, download=True, transform=transform)

bsize = 100

basetestloaderMNIST = DataLoader(dstestMNIST, batch_size=bsize, shuffle=False)
basetestloaderFashionMNIST = DataLoader(dstestFashionMNIST, batch_size=bsize, shuffle=False)

basetrainloaderMNIST = DataLoader(dsMNIST, batch_size=bsize, shuffle=True)
basetrainloaderFashionMNIST = DataLoader(dsFashionMNIST, batch_size=bsize, shuffle=True)

Using cache found in C:\Users\stefa/.cache\torch\hub\pytorch_vision_v0.10.0


Change the model to accomodate 10 classes

In [8]:
# change output layer to match the number of classes for MNIST and FashionMNIST
MNIST_Classes = 10
FashionMNIST_Classes = 10

def get_sq_model(num_classes):
    model = torch.hub.load('pytorch/vision:v0.10.0', 'squeezenet1_0', pretrained=True)

    import torch.nn as nn
    model.classifier[1] = nn.Conv2d(512, num_classes, kernel_size=(1, 1), stride=(1, 1))
    model.num_classes = num_classes
    return model

    
    
modelMNIST = get_sq_model(MNIST_Classes)
modelFashionMNIST = get_sq_model(FashionMNIST_Classes)

import torch.nn as nn
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizerMNIST = optim.SGD(modelMNIST.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-4)

optimizerFashionMNIST = optim.SGD(modelFashionMNIST.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-4)

Using cache found in C:\Users\stefa/.cache\torch\hub\pytorch_vision_v0.10.0
Using cache found in C:\Users\stefa/.cache\torch\hub\pytorch_vision_v0.10.0


TRAINING TIME

In [None]:
from tqdm import tqdm
import torch
def train_model(model, train_dataloader, test_dataloader, optimizer, criterion, num_epochs=5, device=None, patience=5):
    if device is None:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        print(f"Using device: {device}")
    model.to(device)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)

    best_acc = 0.0
    best_model_wts = copy.deepcopy(model.state_dict())
    epochs_no_improve = 0

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        loop = tqdm(train_dataloader, desc=f"Epoch {epoch+1}/{num_epochs}", leave=False)
        for inputs, labels in loop:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            loop.set_postfix(loss=loss.item())

        epoch_loss = running_loss / len(train_dataloader)
        accuracy = correct / total * 100
        print(f"Epoch {epoch+1}/{num_epochs} - Loss: {epoch_loss:.4f}, Accuracy: {accuracy:.2f}%")

        # Evaluate on test set
        model.eval()
        test_correct = 0
        test_total = 0
        with torch.no_grad():
            for inputs, labels in test_dataloader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                _, predicted = torch.max(outputs, 1)
                test_total += labels.size(0)
                test_correct += (predicted == labels).sum().item()
        test_acc = test_correct / test_total * 100
        print(f"Test Accuracy: {test_acc:.2f}%")

        # Early stopping and best model tracking
        if test_acc > best_acc:
            best_acc = test_acc
            best_model_wts = copy.deepcopy(model.state_dict())
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1

        if epochs_no_improve >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break

        scheduler.step()

    model.load_state_dict(best_model_wts)
    print(f"Best Test Accuracy: {best_acc:.2f}%")
    return model

finedTunedModelMNIST = train_model(modelMNIST, basetrainloaderMNIST, basetestloaderMNIST, optimizerMNIST, criterion, num_epochs=30)
finedTunedModelFashionMNIST = train_model(modelFashionMNIST, basetrainloaderFashionMNIST, basetestloaderFashionMNIST, optimizerFashionMNIST, criterion, num_epochs=30)


Using device: cuda


Epoch 1/30:   8%|▊         | 51/600 [00:08<01:23,  6.60it/s, loss=0.0131]  

Testing the model on the train sets.

In [None]:
#testing the models on test dataloaders

def test_model(model, dataloader, criterion, device=None):
    if device is None:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        print(f"Using device: {device}")
    model.to(device)
    model.eval()

    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(dataloader)
    accuracy = correct / total * 100
    print(f"Test Loss: {epoch_loss:.4f}, Test Accuracy: {accuracy:.2f}%")
    
    return epoch_loss, accuracy

# Test the models
test_loss_MNIST, test_accuracy_MNIST = test_model(finedTunedModelMNIST, testloaderMNIST, criterion)
test_loss_FashionMNIST, test_accuracy_FashionMNIST = test_model(finedTunedModelFashionMNIST, testloaderFashionMNIST, criterion)

Exporting the model weights to a file



In [None]:
# export the models to models call them DATASETTYPE_squeezenet1_0_finetuned_baseline.pth

import os
def save_model(model, dataset_type, filename='model.pth'):
    os.makedirs('models', exist_ok=True)
    model_path = os.path.join('models', f"{dataset_type}_{filename}")
    torch.save(model, model_path)
    print(f"Model saved to {model_path}")
    
save_model(finedTunedModelMNIST, 'mnist',"baseline_NO_watermark")
save_model(finedTunedModelFashionMNIST, 'fmnist_NO_watermark')


