# Setup squeezenet model for image classification

torchvision.models.squeezenet1_1(pretrained=True)



In [18]:
import torch
import torchvision.transforms as transforms
from torchvision.datasets import MNIST,FashionMNIST
from torch.utils.data import DataLoader
import copy
# Load the base model ONCE
base_model = torch.hub.load('pytorch/vision:v0.10.0', 'squeezenet1_0', pretrained=True)

# print the model architecture
print(base_model)

modelMNIST = copy.deepcopy(base_model)
modelFashionMNIST = copy.deepcopy(base_model)

transform = transforms.Compose([
    transforms.Resize(224),
    transforms.Grayscale(num_output_channels=3),  # convert 1->3 channels
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],  # ImageNet stats
                         std=[0.229, 0.224, 0.225])
])
#load the dataset MNIST
dsMNIST = MNIST(root='./data/raw/MNIST', train=True, download=True,transform=transform)
dsFashionMNIST = FashionMNIST(root='./data/raw/FashionMNIST', train=True, download=True,transform=transform)
dstestMNIST = MNIST(root='./data/raw/MNIST', train=False, download=True,transform=transform)
dstestFashionMNIST = FashionMNIST(root='./data/raw/FashionMNIST', train=False, download=True,transform=transform)


bsize = 64



testloaderMNIST = DataLoader(dstestMNIST, batch_size=bsize, shuffle=False)
testloaderFashionMNIST = DataLoader(dstestFashionMNIST, batch_size=bsize, shuffle=False)

trainloaderMNIST = DataLoader(dsMNIST, batch_size=bsize, shuffle=True)
trainloaderFashionMNIST = DataLoader(dsFashionMNIST, batch_size=bsize, shuffle=True)

SqueezeNet(
  (features): Sequential(
    (0): Conv2d(3, 96, kernel_size=(7, 7), stride=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
    (3): Fire(
      (squeeze): Conv2d(96, 16, kernel_size=(1, 1), stride=(1, 1))
      (squeeze_activation): ReLU(inplace=True)
      (expand1x1): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
      (expand1x1_activation): ReLU(inplace=True)
      (expand3x3): Conv2d(16, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (expand3x3_activation): ReLU(inplace=True)
    )
    (4): Fire(
      (squeeze): Conv2d(128, 16, kernel_size=(1, 1), stride=(1, 1))
      (squeeze_activation): ReLU(inplace=True)
      (expand1x1): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
      (expand1x1_activation): ReLU(inplace=True)
      (expand3x3): Conv2d(16, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (expand3x3_activation): ReLU(inplace=True)
    )
    (5): Fire(
   

Using cache found in C:\Users\stefa/.cache\torch\hub\pytorch_vision_v0.10.0


Change the model to accomodate 10 classes

In [15]:
# change output layer to match the number of classes for MNIST and FashionMNIST
import torch.nn as nn

MNIST_Classes = 10
FashionMNIST_Classes = 10

modelMNIST.classifier[1] = nn.Conv2d(512, MNIST_Classes, kernel_size=(1, 1), stride=(1, 1))
modelMNIST.num_classes = MNIST_Classes

modelFashionMNIST.classifier[1] = nn.Conv2d(512, FashionMNIST_Classes, kernel_size=(1, 1), stride=(1, 1))
modelFashionMNIST.num_classes = FashionMNIST_Classes

import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizerMNIST = optim.Adam(modelMNIST.parameters(), lr=0.001)

optimizerFashionMNIST = optim.Adam(modelFashionMNIST.parameters(), lr=0.001)

TRAINING TIME

In [16]:
from tqdm import tqdm
import torch

def train_model(model, dataloader, optimizer, criterion, num_epochs=5, device=None):
    if device is None:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        print(f"Using device: {device}")
    model.to(device)
    model.train()

    for epoch in range(num_epochs):
        running_loss = 0.0
        correct = 0
        total = 0

        loop = tqdm(dataloader, desc=f"Epoch {epoch+1}/{num_epochs}", leave=False)
        for inputs, labels in loop:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            loop.set_postfix(loss=loss.item())

        epoch_loss = running_loss / len(dataloader)
        accuracy = correct / total * 100
        print(f"Epoch {epoch+1}/{num_epochs} - Loss: {epoch_loss:.4f}, Accuracy: {accuracy:.2f}%")

    return model

finedTunedModelMNIST = train_model(modelMNIST, trainloaderMNIST, optimizerMNIST, criterion, num_epochs=5)
finedTunedModelFashionMNIST = train_model(modelFashionMNIST, trainloaderFashionMNIST, optimizerFashionMNIST, criterion, num_epochs=5)


Using device: cuda


                                                                         

Epoch 1/5 - Loss: 0.9587, Accuracy: 67.09%


                                                                          

Epoch 2/5 - Loss: 0.1815, Accuracy: 94.67%


                                                                          

Epoch 3/5 - Loss: 0.1071, Accuracy: 96.79%


                                                                          

Epoch 4/5 - Loss: 0.0875, Accuracy: 97.35%


                                                                          

Epoch 5/5 - Loss: 0.0728, Accuracy: 97.83%
Using device: cuda


                                                                        

Epoch 1/5 - Loss: 0.7272, Accuracy: 73.50%


                                                                        

Epoch 2/5 - Loss: 0.3897, Accuracy: 86.11%


                                                                        

Epoch 3/5 - Loss: 0.3299, Accuracy: 88.22%


                                                                        

Epoch 4/5 - Loss: 0.3010, Accuracy: 89.21%


                                                                         

Epoch 5/5 - Loss: 0.2763, Accuracy: 89.96%




Testing the model on the train sets.

In [17]:
#testing the models on test dataloaders

def test_model(model, dataloader, criterion, device=None):
    if device is None:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        print(f"Using device: {device}")
    model.to(device)
    model.eval()

    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(dataloader)
    accuracy = correct / total * 100
    print(f"Test Loss: {epoch_loss:.4f}, Test Accuracy: {accuracy:.2f}%")
    
    return epoch_loss, accuracy

# Test the models
test_loss_MNIST, test_accuracy_MNIST = test_model(finedTunedModelMNIST, testloaderMNIST, criterion)
test_loss_FashionMNIST, test_accuracy_FashionMNIST = test_model(finedTunedModelFashionMNIST, testloaderFashionMNIST, criterion)

Using device: cuda
Test Loss: 0.0378, Test Accuracy: 98.75%
Using device: cuda
Test Loss: 0.2849, Test Accuracy: 90.17%


Exporting the model weights to a file



In [22]:
# export the models to models call them DATASETTYPE_squeezenet1_0_finetuned_baseline.pth

import os
def save_model(model, dataset_type, filename='SN_finetuned_baseline.pth'):
    os.makedirs('models', exist_ok=True)
    model_path = os.path.join('models', f"{dataset_type}_{filename}")
    torch.save(model.state_dict(), model_path)
    print(f"Model saved to {model_path}")
    
save_model(finedTunedModelMNIST, 'MNIST')
save_model(finedTunedModelFashionMNIST, 'FMNIST')

# save also as t7 files


Model saved to models\MNIST_SN_finetuned_baseline.pth
Model saved to models\FMNIST_SN_finetuned_baseline.pth
