In [None]:
# do this step below to get lightning, lightning bolts, etc.
# !pip install lightning-bolts torchvision torchsummary

# Classification Revisited, with ResNets

*AY 128/256 (UC Berkeley, 2018–2024)*

Previously, we used CNNs in our own custom model to classify images. You are asked to stack up your own model to classify galaxies in Lab 3, and you should definitely explore different CNN depths, kernel sizes, and filters to get a feel for how this works. Many of them will be able to perform the functions you need for Lab 3 adequately. That said, some architectures are better than others, and an architecture we are going to introduce today, **ResNets (Residual neural Networks)** are particularly efficient at image classification.

More generally, PyTorch comes with a bunch of models and pre-trained weights that come from fitting to some generic (in this case classification) data sets. As it turns out, many of the features in these trained CNNs are widely applicable and can be easily repurposed for another task. In this lecture, we will explore how to do that.

Again, we'll make use of the Fashion MNIST labeled dataset, which you may recall looked something like this:

<img src="https://github.com/zalandoresearch/fashion-mnist/blob/master/doc/img/fashion-mnist-sprite.png?raw=true" width="80%">

With labels: 

In [1]:
def output_label(label):
    output_mapping = {
                 0: "T-shirt/Top",
                 1: "Trouser",
                 2: "Pullover",
                 3: "Dress",
                 4: "Coat", 
                 5: "Sandal", 
                 6: "Shirt",
                 7: "Sneaker",
                 8: "Bag",
                 9: "Ankle Boot"
                 }
    input = (label.item() if type(label) == torch.Tensor else label)
    return output_mapping[input]

In [30]:
import datetime, os
import numpy as np
import warnings
import pandas as pd
import matplotlib.pyplot as plt

# from IPython.external import mathjax

import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F
from torchmetrics.functional import accuracy
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import ModelCheckpoint, ModelSummary
from pytorch_lightning.loggers import CSVLogger

import torchvision
import torchvision.transforms as transforms
from torchsummary import summary
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import confusion_matrix

# use a GPU or MPS (Mac) if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if torch.backends.mps.is_available():
    device = "mps"

print("pytorch version:", torch.__version__)
print("device:", device)

pytorch version: 2.3.0+cu121
device: cuda:0


In [3]:
train_csv = pd.read_csv("../Lecture20_lab3_pytorch/fashion-mnist_train.csv.gz")
test_csv = pd.read_csv("../Lecture20_lab3_pytorch/fashion-mnist_test.csv.gz")

In [4]:
%%writefile fashion_dataset.py

import numpy as np
from torch.utils.data import Dataset

class FashionDataset(Dataset):
    """User defined class to build a datset using Pytorch class Dataset."""
    
    def __init__(self, data, transform = None):
        """Method to initilaize variables.""" 
        self.fashion_MNIST = list(data.values)
        self.transform = transform
        
        label = []
        image = []
        
        for i in self.fashion_MNIST:
             # first column is of labels.
            label.append(i[0])
            image.append(i[1:])
        self.labels = np.asarray(label)
        # Dimension of Images = 28 * 28 * 1. where height = width = 28 and color_channels = 1.
        self.images = np.asarray(image).reshape(-1, 28, 28, 1).astype('float32')

    def __getitem__(self, index):
        label = self.labels[index]
        image = self.images[index]
        
        if self.transform is not None:
            image = self.transform(image)

        return image, label

    def __len__(self):
        return len(self.images)

Overwriting fashion_dataset.py


In [5]:
from fashion_dataset import FashionDataset

In [70]:
batch_size = 128

## Transform data into Tensor that has a range from 0 to 1
#train_set = FashionDataset(train_csv, transform=transforms.Compose([transforms.ToTensor()]))
#test_set = FashionDataset(test_csv, transform=transforms.Compose([transforms.ToTensor()]))
#
#train_loader = DataLoader(train_set, batch_size=batch_size, num_workers=2, persistent_workers=True)
#test_loader = DataLoader(train_set, batch_size=batch_size, num_workers=2, persistent_workers=True)
data_transforms = {
    'train': transforms.Compose([
        torchvision.transforms.ToPILImage(),
        torchvision.transforms.RandomAffine(degrees=15, shear=0.1),
        #transforms.Resize(28),
        transforms.RandomHorizontalFlip(),
        transforms.Grayscale(3), 
        transforms.ToTensor(), 
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ]),
    'test': transforms.Compose([
        torchvision.transforms.ToPILImage(),
        torchvision.transforms.RandomAffine(degrees=15, shear=0.1),
        #transforms.Resize(28),
        transforms.Grayscale(3),
        transforms.ToTensor(), 
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ])
}

#train_transforms = transforms.Compose([
#        torchvision.transforms.ToPILImage(),
#        torchvision.transforms.RandomAffine(degrees=15, shear=0.1),
#        torchvision.transforms.RandomHorizontalFlip(),
#        torchvision.transforms.ToTensor(),
#])

#train_set = FashionDataset(train_csv, transform=train_transforms)
train_set = FashionDataset(train_csv, transform=data_transforms['train'])
#test_set = FashionDataset(test_csv, transform=transforms.Compose([transforms.ToTensor()]))
test_set = FashionDataset(test_csv, transform=data_transforms['test'])

train_loader = DataLoader(train_set, batch_size=batch_size, num_workers=2)
test_loader = DataLoader(train_set, batch_size=batch_size, num_workers=2)

In [71]:
def to_categorical(y, num_classes):
    """ 
    Converts a class vector (integers) to a binary class matrix.

    Args:
        y: A tensor of class indices (integers).
        num_classes: The total number of classes.

    Returns:
        A binary matrix representation of the input.
    """
    return torch.eye(num_classes)[y]

nb_classes = 10
train_label = torch.tensor([train_set[i][1] for i in range(10)])
train_label

tensor([2, 9, 6, 0, 3, 4, 4, 5, 4, 8])

In [72]:
#y_train = to_categorical(train_label, nb_classes)

## Our (Previous) CNN

In [73]:
import pytorch_lightning as pl

Let's inspect the model we created:

In [74]:
#run_time_string = datetime.datetime.utcnow().isoformat(timespec='minutes')
#filename = f'datalab_nn_pytorch_{run_time_string}'
#
#early_stop_callback = EarlyStopping(
#   monitor='val_accuracy',
#   min_delta=0.03,
#   patience=3,
#   verbose=True,
#   mode='max'
#)
#
#checkpoint_callback = ModelCheckpoint(
#    monitor='val_accuracy',
#    mode='max',
#    dirpath='nn_results',
#    filename=filename,
#    verbose=True,
#    save_top_k=1
#)
#
#logger = [CSVLogger("nn_results1", name=filename), TensorBoardLogger("nn_results", name=filename)]
#
## reproducibility
#pl.seed_everything(42)
#
#if device == "gpu":
#    myTrainer=pl.Trainer(callbacks=[early_stop_callback, checkpoint_callback], logger=logger,
#                                    gpus=-1, accelerator='auto', auto_select_gpus=True, max_epochs=5)
#else:
#    myTrainer=pl.Trainer(callbacks=[early_stop_callback, checkpoint_callback], logger=logger,
#                                    accelerator='auto', max_epochs=5)
#model=mycnn()
#myTrainer.fit(model)

In [75]:
# get a batch of data from the test set
data = next(iter(test_loader))
data[0].shape

torch.Size([128, 3, 28, 28])

In [76]:
#cpt = !ls -t nn_results/*
#cpt = cpt[0]
#print(cpt)
#
## Load up the model class
#model = mycnn.load_from_checkpoint(cpt).to(device)
#model.eval()  # Set the model to evaluation mode
#
## Example input data, replace with your actual data
#example_input = data[0].to(device)
#
## Get predictions
#with torch.no_grad():  # Disable gradient calculation
#    predictions = model(example_input)
#
## If you want the class with the highest probability
#predicted_class = torch.argmax(predictions, dim=1)
#
#print(predicted_class)

In [77]:
data[1] # these are the real labels

tensor([2, 9, 6, 0, 3, 4, 4, 5, 4, 8, 0, 8, 9, 0, 2, 2, 9, 3, 3, 3, 8, 7, 4, 4,
        0, 4, 4, 8, 7, 1, 5, 0, 5, 3, 2, 7, 3, 4, 2, 1, 6, 0, 9, 6, 0, 5, 6, 7,
        7, 2, 5, 2, 2, 4, 1, 4, 9, 8, 3, 4, 5, 5, 6, 3, 5, 8, 5, 9, 8, 1, 2, 8,
        1, 3, 6, 8, 3, 4, 2, 5, 0, 2, 6, 8, 1, 2, 7, 6, 6, 4, 6, 5, 0, 1, 7, 3,
        5, 8, 4, 3, 8, 5, 0, 5, 3, 0, 8, 5, 6, 1, 0, 7, 6, 1, 9, 7, 6, 9, 3, 3,
        2, 6, 0, 6, 3, 6, 3, 5])

In [78]:
#test_set.labels

In [79]:
#all_test = torch.Tensor(test_set.images).reshape(-1, test_set.images.shape[3], 
#                                                 test_set.images.shape[1], 
#                                                 test_set.images.shape[2])
#with torch.no_grad():  # Disable gradient calculation
#    predictions = model(all_test.to(device))
#predicted_class = torch.argmax(predictions, dim=1)
#
#conf_mat = confusion_matrix(test_set.labels, predicted_class.cpu())
#
#import seaborn as sns
#sns.set_context("poster")
#conf_mat_normalized = conf_mat.astype('float') / conf_mat.sum(axis=1)[:, np.newaxis]
#sns.heatmap(conf_mat_normalized)
#plt.ylabel('True label')
#plt.xlabel('Predicted label')

In [80]:
#conf_mat

| Label | Description |
| --- | --- |
| 0 | T-shirt/top |
| 1 | Trouser |
| 2 | Pullover |
| 3 | Dress |
| 4 | Coat |
| 5 | Sandal |
| 6 | Shirt |
| 7 | Sneaker |
| 8 | Bag |
| 9 | Ankle boot |

In [81]:
lookup = {0: "T-shirt/top",
          1: "Trouser",
          2: "Pullover",
          3: "Dress",
          4: "Coat",
          5: "Sandal",
          6: "Shirt",
          7: "Sneaker",
          8: "Bag",
          9: "Ankle boot"}

In [82]:
#ind_wrong = []
#for i, (pred, actual) in enumerate(zip(predicted_class.cpu(),test_set.labels)):
#    if pred != actual:
#        ind_wrong.append((i, pred.item(), actual))

In [83]:
#ind_wrong[1]

In [84]:
#ind = 10
#plt.imshow(all_test[ind_wrong[ind][0]][0,:,:], cmap=plt.cm.gray_r, interpolation='nearest')
#plt.axis("off")
#plt.title(f"pred={lookup[ind_wrong[ind][1]]} true={lookup[ind_wrong[ind][2]]}")

In [85]:
# XXX same as above
#train_transforms = transforms.Compose([
#        torchvision.transforms.ToPILImage(),
#        torchvision.transforms.RandomAffine(degrees=15, shear=0.1),
#        torchvision.transforms.RandomHorizontalFlip(),
#        torchvision.transforms.ToTensor(),
#])
#
#train_set = FashionDataset(train_csv, transform=train_transforms)
#test_set = FashionDataset(test_csv, transform=transforms.Compose([transforms.ToTensor()]))
#
#train_loader = DataLoader(train_set, batch_size=batch_size, num_workers=2)
#test_loader = DataLoader(train_set, batch_size=batch_size, num_workers=2)

In [87]:
#image, label = next(iter(train_set))
#plt.axis('off')
#
#plt.imshow(image.squeeze(), cmap=plt.cm.gray_r, interpolation='nearest')
#output_label(label)

In [96]:
class mycnn_dropout(pl.LightningModule):

    def __init__(self):
        super().__init__()

        # set this to an example input size to the see a summary
        # see https://pytorch-lightning.readthedocs.io/en/latest/common/debugging.html
        #self._example_input_array = torch.randn((1, 1, 28, 28))
        self._example_input_array = torch.randn((1, 3, 28, 28))

        # define the layers here
        # Conv2d(in_channels, out_channels, kernel_size)
        # see https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html
        self.layer1 = nn.Sequential(
            #nn.Conv2d(1, 32, kernel_size=3),
            nn.Conv2d(3, 32, kernel_size=3),
            
            # see https://github.com/sksq96/pytorch-summary/issues/55#issuecomment-471844028
            # to understand why pytorch and keras differ here
            nn.BatchNorm2d(32, affine=False),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Dropout(p=0.1)
        )
        
        self.layer3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3),
            nn.ReLU(),
        )
        
        self.fc1=torch.nn.Linear(1152, 32)
        self.fc2=torch.nn.Linear(32, 10)
    
        self.loss = nn.NLLLoss()
        
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        
        # Flatten
        x = x.view(x.size(0), -1)
        
        # add dropout 
        x = nn.Dropout(p=0.2)(x)

        x=torch.relu(self.fc1(x))
        x=F.log_softmax(self.fc2(x), dim=-1)
        return x

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters())
        
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer,
            mode='min',
            factor=0.75,
            patience=2,
            min_lr=1e-6,
            verbose=True
        )
        
        return {"optimizer": optimizer, "lr_scheduler": scheduler, "monitor": "val_accuracy"}
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self.forward(x)
        loss = self.loss(logits, y)
        self.log('train_loss', loss)
        return loss
    
    def _evaluate(self, batch, batch_idx, stage=None):
        x, y = batch
        logits = self.forward(x)
        loss = self.loss(logits, y)
        preds = torch.argmax(logits, dim=-1)
        acc = accuracy(preds, y, task="multiclass", num_classes=nb_classes)

        if stage:
            self.log(f'{stage}_loss', loss, prog_bar=True)
            self.log(f'{stage}_accuracy', acc, prog_bar=True)

        return loss, acc
    
    def validation_step(self, batch, batch_idx):
        return self._evaluate(batch, batch_idx, 'val')[0]
    
    def train_dataloader(self):
        return train_loader
    
    def val_dataloader(self):
        return test_loader

In [98]:
run_time_string = datetime.datetime.utcnow().isoformat(timespec='minutes')
filename = f'datalab_nn_pytorch_dropout_{run_time_string}'

early_stop_callback = EarlyStopping(
   monitor='val_accuracy',
   min_delta=0.001,
   patience=3,
   verbose=True,
   mode='max'
)

checkpoint_callback = ModelCheckpoint(
    monitor='val_accuracy',
    mode='max',
    dirpath='nn_results',
    filename=filename,
    verbose=True,
    save_top_k=1
)

logger = [CSVLogger("nn_results1", name=filename), TensorBoardLogger("nn_results", name=filename)]

pl.seed_everything(42)

#if device == "gpu":
if True:
    myTrainer=pl.Trainer(callbacks=[early_stop_callback, checkpoint_callback], logger=logger,
                     gpus=-1, accelerator='cuda', auto_select_gpus=True, max_epochs=5)
else:
    myTrainer=pl.Trainer(callbacks=[early_stop_callback, checkpoint_callback], logger=logger,
                         max_epochs=5)
    
model_dropout = mycnn_dropout()

#summary(model_dropout.to(device), input_size=(1, 28, 28))
summary(model_dropout.to(device), input_size=(3, 28, 28))

Global seed set to 42
Auto select gpus: [0]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 26, 26]             896
       BatchNorm2d-2           [-1, 32, 26, 26]               0
              ReLU-3           [-1, 32, 26, 26]               0
         MaxPool2d-4           [-1, 32, 13, 13]               0
            Conv2d-5           [-1, 64, 11, 11]          18,496
              ReLU-6           [-1, 64, 11, 11]               0
         MaxPool2d-7             [-1, 64, 5, 5]               0
           Dropout-8             [-1, 64, 5, 5]               0
            Conv2d-9            [-1, 128, 3, 3]          73,856
             ReLU-10            [-1, 128, 3, 3]               0
           Linear-11                   [-1, 32]          36,896
           Linear-12                   [-1, 10]             330
Total params: 130,474
Trainable params: 130,474
Non-trainable params: 0
-------------------------------

In [99]:
myTrainer.fit(model_dropout)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type       | Params | In sizes        | Out sizes      
--------------------------------------------------------------------------
0 | layer1 | Sequential | 896    | [1, 3, 28, 28]  | [1, 32, 13, 13]
1 | layer2 | Sequential | 18.5 K | [1, 32, 13, 13] | [1, 64, 5, 5]  
2 | layer3 | Sequential | 73.9 K | [1, 64, 5, 5]   | [1, 128, 3, 3] 
3 | fc1    | Linear     | 36.9 K | [1, 1152]       | [1, 32]        
4 | fc2    | Linear     | 330    | [1, 32]         | [1, 10]        
5 | loss   | NLLLoss    | 0      | ?               | ?              
--------------------------------------------------------------------------
130 K     Trainable params
0         Non-trainable params
130 K     Total params
0.522     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_accuracy improved. New best score: 0.826
Epoch 0, global step 469: 'val_accuracy' reached 0.82565 (best 0.82565), saving model to 'nn_results/datalab_nn_pytorch_dropout_2024-11-25T07:15.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

Metric val_accuracy improved by 0.028 >= min_delta = 0.001. New best score: 0.854
Epoch 1, global step 938: 'val_accuracy' reached 0.85385 (best 0.85385), saving model to 'nn_results/datalab_nn_pytorch_dropout_2024-11-25T07:15.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

Metric val_accuracy improved by 0.013 >= min_delta = 0.001. New best score: 0.866
Epoch 2, global step 1407: 'val_accuracy' reached 0.86637 (best 0.86637), saving model to 'nn_results/datalab_nn_pytorch_dropout_2024-11-25T07:15.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

Epoch 3, global step 1876: 'val_accuracy' was not in top 1


Validation: 0it [00:00, ?it/s]

Metric val_accuracy improved by 0.015 >= min_delta = 0.001. New best score: 0.882
Epoch 4, global step 2345: 'val_accuracy' reached 0.88173 (best 0.88173), saving model to 'nn_results/datalab_nn_pytorch_dropout_2024-11-25T07:15.ckpt' as top 1
`Trainer.fit` stopped: `max_epochs=5` reached.


# ResNets
Purpose: ResNet is primarily designed for image classification tasks.
Architecture: ResNet uses a series of convolutional layers with residual connections (skip connections) to allow gradients to flow directly through the network, mitigating the vanishing gradient problem in very deep networks.
Output: Produces a single label or a vector of class probabilities for an input image.
Key Feature: The residual blocks allow the model to learn identity mappings easily, making it more robust for deep architectures.
Input/Output Size:

    Input: Fixed-size image (e.g., 224x224 for ResNet-50).
    Output: A vector representing class probabilities.

In [100]:
from torchvision import models, transforms

class myresnet(pl.LightningModule):
    def __init__(self, num_classes=nb_classes):
        super(myresnet, self).__init__()
        
        # set this to an example input size to the see a summary
        # see https://pytorch-lightning.readthedocs.io/en/latest/common/debugging.html
        self._example_input_array = torch.randn((1, 3, 28, 28))
        
        self.model = models.resnet18(pretrained=True)
        # Freeze all layers initially (so their weights don't update)
        for param in self.model.parameters():
            param.requires_grad = False

        # Modify the final fully connected layer to match the number of classes
        self.model.fc = nn.Linear(self.model.fc.in_features, num_classes)

        # Unfreeze specific layers (e.g., the final fully connected layer)
        #for param in self.model.fc.parameters():
        #    param.requires_grad = True

        #self.loss = nn.CrossEntropyLoss()
        self.loss = nn.NLLLoss()


    def forward(self, x):
        x = self.model(x)
        #x = F.log_softmax(x, dim=-1)
        return x

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters())
        
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer,
            mode='min',
            factor=0.75,
            patience=2,
            min_lr=1e-6,
            verbose=True
        )
        
        return {"optimizer": optimizer, "lr_scheduler": scheduler, "monitor": "val_accuracy"}
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self.forward(x)
        loss = self.loss(logits, y)
        self.log('train_loss', loss)
        return loss
    
    def _evaluate(self, batch, batch_idx, stage=None):
        x, y = batch
        logits = self.forward(x)
        loss = self.loss(logits, y)
        preds = torch.argmax(logits, dim=-1)
        acc = accuracy(preds, y, task="multiclass", num_classes=nb_classes)

        if stage:
            self.log(f'{stage}_loss', loss, prog_bar=True)
            self.log(f'{stage}_accuracy', acc, prog_bar=True)

        return loss, acc
    
    def validation_step(self, batch, batch_idx):
        return self._evaluate(batch, batch_idx, 'val')[0]
    
    def train_dataloader(self):
        return train_loader
    
    def val_dataloader(self):
        return test_loader

In [101]:
run_time_string = datetime.datetime.utcnow().isoformat(timespec='minutes')
filename = f'datalab_nn_pytorch_resnet_{run_time_string}'

early_stop_callback = EarlyStopping(
   monitor='val_accuracy',
   min_delta=0.001,
   patience=3,
   verbose=True,
   mode='max'
)

checkpoint_callback = ModelCheckpoint(
    monitor='val_accuracy',
    mode='max',
    dirpath='nn_results',
    filename=filename,
    verbose=True,
    save_top_k=1
)

logger = [CSVLogger("nn_results2", name=filename), TensorBoardLogger("nn_results", name=filename)]

pl.seed_everything(42)

if device == "gpu":
    myTrainer=pl.Trainer(callbacks=[early_stop_callback, checkpoint_callback], logger=logger,
                     gpus=-1, accelerator='dp', auto_select_gpus=True, max_epochs=5)
else:
    myTrainer=pl.Trainer(callbacks=[early_stop_callback, checkpoint_callback], logger=logger,
                         max_epochs=5)
    
model_resnet = myresnet().to(device)
summary(model_resnet, input_size=(3, 28, 28))

Global seed set to 42
GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 14, 14]           9,408
       BatchNorm2d-2           [-1, 64, 14, 14]             128
              ReLU-3           [-1, 64, 14, 14]               0
         MaxPool2d-4             [-1, 64, 7, 7]               0
            Conv2d-5             [-1, 64, 7, 7]          36,864
       BatchNorm2d-6             [-1, 64, 7, 7]             128
              ReLU-7             [-1, 64, 7, 7]               0
            Conv2d-8             [-1, 64, 7, 7]          36,864
       BatchNorm2d-9             [-1, 64, 7, 7]             128
             ReLU-10             [-1, 64, 7, 7]               0
       BasicBlock-11             [-1, 64, 7, 7]               0
           Conv2d-12             [-1, 64, 7, 7]          36,864
      BatchNorm2d-13             [-1, 64, 7, 7]             128
             ReLU-14             [-1, 6

In [102]:
myTrainer.fit(model_resnet)


  | Name  | Type    | Params | In sizes       | Out sizes
---------------------------------------------------------------
0 | model | ResNet  | 11.2 M | [1, 3, 28, 28] | [1, 10]  
1 | loss  | NLLLoss | 0      | ?              | ?        
---------------------------------------------------------------
5.1 K     Trainable params
11.2 M    Non-trainable params
11.2 M    Total params
44.727    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_accuracy improved. New best score: 0.129
Epoch 0, global step 469: 'val_accuracy' reached 0.12947 (best 0.12947), saving model to 'nn_results/datalab_nn_pytorch_resnet_2024-11-25T07:19.ckpt' as top 1
  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


In [103]:
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision.datasets import FashionMNIST
from torch.utils.data import random_split, DataLoader
import time
import copy

In [104]:
#data_transforms = {
#    'train': transforms.Compose([
#        #transforms.Resize(28),
#        transforms.RandomHorizontalFlip(),
#        transforms.Grayscale(3), 
#        transforms.ToTensor(), 
#        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
#    ]),
#    'test': transforms.Compose([
#        #transforms.Resize(28),
#        transforms.Grayscale(3),
#        transforms.ToTensor(), 
#        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
#    ])
#}
#
#trainset = FashionMNIST(root='./data', train=True, 
#                        download=True, transform=data_transforms['train'])
#
#trainset, valset = random_split(trainset, (50000,10000))
#
#trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, 
#                                          num_workers=4)
#
#testset = FashionMNIST(root='./data', train=False, 
#                       download=True,transform=data_transforms['test'])
#
#testloader = DataLoader(testset, batch_size=batch_size, shuffle=False, 
#                                         num_workers=4)
#
#valloader = DataLoader(valset, batch_size=batch_size, shuffle=False, 
#                                         num_workers=4)
#
#classes = ('T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal',
#           'Shirt', 'Sneaker', 'Bag', 'Ankle Boot')
#
#
#image_datasets = {'train': trainset, 'val': valset, 'test': testset}
#dataloaders = {'train': trainloader, 'val': valloader, 'test': testloader}
#dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val', 'test']}
#
#
#device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [105]:
#dataloaders = {'train': train_loader, 'val': test_loader}
def train_model(model, criterion, optimizer, scheduler, val_acc, 
                val_loss, train_acc, train_loss,epoch, 
                num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    list = {'train': {'acc': train_acc, 'loss': train_loss}, 
        'val':{'acc': val_acc, 'loss': val_loss}}
    next = epoch
    for epoch in range(next, next+num_epochs):
        print('Epoch {}/{}'.format(epoch, next + num_epochs - 1))
        print('-' * 10)
        
        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                scheduler.step()
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode
        
            running_loss = 0.0
            running_corrects = 0
        
            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)
        
                # zero the parameter gradients
                optimizer.zero_grad()
        
                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
        
                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
        
                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                
            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]
            list[phase]['loss'].append(epoch_loss)
            list[phase]['acc'].append(epoch_acc.item())
        
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))
            
            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                
        
        print()
        
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))
    
    # load best model weights
    model.load_state_dict(best_model_wts)
    
        
    return model, epoch + 1

In [106]:
model = models.resnet18(pretrained=True)
#for param in model.parameters():
#    param.requires_grad = False


# Parameters of newly constructed modules have requires_grad=True by default
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 10)

model = model.to(device)

criterion = nn.CrossEntropyLoss()

optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [107]:
# Lists for plotting loss and accuracy and variable to
# keep track of the epoch.
# Rerun this cell if you want to restart training to empty the lists.
epoch = 0
val_acc = []
val_loss = []
train_acc = []
train_loss = []

In [108]:
model, epoch = train_model(model, criterion, optimizer, scheduler, val_acc, val_loss,  train_acc, train_loss, epoch, num_epochs=5)

Epoch 0/4
----------
train Loss: 0.5311 Acc: 0.8123
val Loss: 0.3462 Acc: 0.8706

Epoch 1/4
----------
train Loss: 0.3006 Acc: 0.8908
val Loss: 0.2939 Acc: 0.8936

Epoch 2/4
----------
train Loss: 0.2522 Acc: 0.9074
val Loss: 0.2774 Acc: 0.9008

Epoch 3/4
----------
train Loss: 0.2228 Acc: 0.9188
val Loss: 0.2638 Acc: 0.9067

Epoch 4/4
----------
train Loss: 0.2002 Acc: 0.9257
val Loss: 0.2598 Acc: 0.9095

Training complete in 2m 44s
Best val Acc: 0.909500


In [109]:
torch.save({'epoch' : epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'train_loss': train_loss, 'val_loss': val_loss, 
            'train_acc': train_acc, 'val_acc': val_acc}, 
           './FMNIST_ResNet18_noresize.tar')

In [110]:
checkpoint = torch.load('./FMNIST_ResNet18_noresize.tar')
model.load_state_dict(checkpoint['model_state_dict'])  
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
train_loss = checkpoint['train_loss']
train_acc = checkpoint['train_acc']
val_loss = checkpoint['val_loss']
val_acc = checkpoint['val_acc']
epoch = checkpoint['epoch']