In [27]:
import torch

# see if CUDA is available
if torch.cuda.is_available():
    print("CUDA is available")

# see if cuDNN is available
if torch.backends.cudnn.enabled:
    print("cuDNN is available")

# set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device is set to:", device)

CUDA is available
cuDNN is available
Device is set to: cuda


In [28]:
from torchvision import transforms, datasets, models
from torch.utils.data import DataLoader, random_split
import pytorch_lightning as pl
from pytorch_lightning import Trainer, loggers
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
import torchmetrics
import tensorboard

class CIFAR100DataModule(pl.LightningDataModule):
    def __init__(self, data_dir='./data', batch_size=128):
        super().__init__()
        self.data_dir = data_dir
        self.batch_size = batch_size
        self.transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))
        ])

    def setup(self, stage=None):
        # Load data
        cifar100_train = datasets.CIFAR100(root=self.data_dir, train=True, download=True, transform=self.transform)
        cifar100_test = datasets.CIFAR100(root=self.data_dir, train=False, download=True, transform=self.transform)

        # Train/val split
        self.train, self.val = random_split(cifar100_train, [45000, 5000])
        self.test = cifar100_test

    def train_dataloader(self):
        return DataLoader(self.train, batch_size=self.batch_size, shuffle=True, num_workers=7, persistent_workers=True)

    def val_dataloader(self):
        return DataLoader(self.val, batch_size=self.batch_size, num_workers=7, persistent_workers=True)

    def test_dataloader(self):
        return DataLoader(self.test, batch_size=self.batch_size, num_workers=7, persistent_workers=True)


In [33]:
class BasicBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = self.relu(out)
        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_channels, out_channels, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.conv3 = nn.Conv2d(out_channels, out_channels * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(out_channels * 4)
        self.relu = nn.ReLU(inplace=True)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels * 4:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels * 4, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * 4)
            )

    def forward(self, x):
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = self.relu(out)
        return out
        

class ResNet(pl.LightningModule):
    def __init__(self, block, num_blocks, num_classes=100, learning_rate=0.01):
        super(ResNet, self).__init__()
        self.in_channels = 64
        self.learning_rate = learning_rate

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)

        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        # Accuracy metrics
        self.train_acc = torchmetrics.Accuracy(num_classes=num_classes, average='macro', task='multiclass')
        self.val_acc = torchmetrics.Accuracy(num_classes=num_classes, average='macro', task='multiclass')

        # Testing metrics
        self.test_acc = torchmetrics.Accuracy(num_classes=num_classes, average='macro', task='multiclass')
        self.test_precision = torchmetrics.Precision(num_classes=num_classes, average='macro', task='multiclass')
        self.test_recall = torchmetrics.Recall(num_classes=num_classes, average='macro', task='multiclass')
        self.test_f1 = torchmetrics.F1Score(num_classes=num_classes, average='macro', task='multiclass')

    def _make_layer(self, block, out_channels, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride))
            self.in_channels = out_channels * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        return optimizer

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        self.train_acc(logits, y)
        self.log('train_loss', loss, on_epoch=True, prog_bar=True)
        self.log('train_acc', self.train_acc, on_epoch=True, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        self.val_acc(logits, y)
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_acc', self.val_acc, prog_bar=True)

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        # Update metrics
        self.test_acc(logits, y)
        self.test_precision(logits, y)
        self.test_recall(logits, y)
        self.test_f1(logits, y)

        self.log('test_loss', loss)
        self.log('test_acc', self.test_acc, prog_bar=True)
        self.log('test_precision', self.test_precision, prog_bar=True)
        self.log('test_recall', self.test_recall, prog_bar=True)
        self.log('test_f1', self.test_f1, prog_bar=True)
        return {"loss": loss}

def ResNet200(num_classes=100, learning_rate=0.01):
    return ResNet(Bottleneck, [3, 24, 36, 3], num_classes, learning_rate)

In [30]:
early_stopping = EarlyStopping(
    monitor='val_acc',
    min_delta=0.00,
    patience=10,
    verbose=True,
    mode='max'
)

checkpoint_callback = ModelCheckpoint(
    monitor='val_acc',
    filename='best-checkpoint',
    save_top_k=1,
    mode='max', 
    verbose=True
)

data_module = CIFAR100DataModule()
model = ResNet200(num_classes=100, learning_rate=0.001)
logger = TensorBoardLogger('C:/Users/Owner/Documents/MMA/MULTI TASK/Final Project/', name='cnn_logs')
trainer = pl.Trainer(max_epochs=500, devices=1 if torch.cuda.is_available() else 0, callbacks=[early_stopping, checkpoint_callback])
trainer.fit(model, datamodule=data_module)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Files already downloaded and verified
Files already downloaded and verified


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

   | Name           | Type                | Params
--------------------------------------------------------
0  | conv1          | Conv2d              | 1.7 K 
1  | bn1            | BatchNorm2d         | 128   
2  | relu           | ReLU                | 0     
3  | layer1         | Sequential          | 215 K 
4  | layer2         | Sequential          | 6.8 M 
5  | layer3         | Sequential          | 40.6 M
6  | layer4         | Sequential          | 15.0 M
7  | avgpool        | AdaptiveAvgPool2d   | 0     
8  | fc             | Linear              | 204 K 
9  | train_acc      | MulticlassAccuracy  | 0     
10 | val_acc        | MulticlassAccuracy  | 0     
11 | test_acc       | MulticlassAccuracy  | 0     
12 | test_precision | MulticlassPrecision | 0     
13 | test_recall    | MulticlassRecall    | 0     
14 | test_f1        | MulticlassF1Score   | 0     
--------------------------------------------------------
62.8 M    Trainable params


Epoch 0: 100%|██████████| 352/352 [1:00:22<00:00,  0.10it/s, v_num=62, train_loss_step=3.900, train_acc_step=0.0873, val_loss=4.020, val_acc=0.0735, train_loss_epoch=4.340, train_acc_epoch=0.041]

Metric val_acc improved. New best score: 0.073
Epoch 0, global step 352: 'val_acc' reached 0.07350 (best 0.07350), saving model to 'c:\\Users\\Owner\\Documents\\MMA\\MULTI TASK\\Final Project\\lightning_logs\\version_62\\checkpoints\\best-checkpoint.ckpt' as top 1


Epoch 1: 100%|██████████| 352/352 [1:16:17<00:00,  0.08it/s, v_num=62, train_loss_step=3.700, train_acc_step=0.114, val_loss=3.560, val_acc=0.145, train_loss_epoch=3.780, train_acc_epoch=0.109]  

Metric val_acc improved by 0.071 >= min_delta = 0.0. New best score: 0.145
Epoch 1, global step 704: 'val_acc' reached 0.14491 (best 0.14491), saving model to 'c:\\Users\\Owner\\Documents\\MMA\\MULTI TASK\\Final Project\\lightning_logs\\version_62\\checkpoints\\best-checkpoint.ckpt' as top 1


Epoch 2: 100%|██████████| 352/352 [1:08:41<00:00,  0.09it/s, v_num=62, train_loss_step=2.940, train_acc_step=0.174, val_loss=3.180, val_acc=0.205, train_loss_epoch=3.380, train_acc_epoch=0.179]

Metric val_acc improved by 0.060 >= min_delta = 0.0. New best score: 0.205
Epoch 2, global step 1056: 'val_acc' reached 0.20464 (best 0.20464), saving model to 'c:\\Users\\Owner\\Documents\\MMA\\MULTI TASK\\Final Project\\lightning_logs\\version_62\\checkpoints\\best-checkpoint.ckpt' as top 1


Epoch 3: 100%|██████████| 352/352 [1:35:52<00:00,  0.06it/s, v_num=62, train_loss_step=2.580, train_acc_step=0.245, val_loss=2.970, val_acc=0.269, train_loss_epoch=2.920, train_acc_epoch=0.260]

Metric val_acc improved by 0.064 >= min_delta = 0.0. New best score: 0.269
Epoch 3, global step 1408: 'val_acc' reached 0.26889 (best 0.26889), saving model to 'c:\\Users\\Owner\\Documents\\MMA\\MULTI TASK\\Final Project\\lightning_logs\\version_62\\checkpoints\\best-checkpoint.ckpt' as top 1


Epoch 4: 100%|██████████| 352/352 [48:44<00:00,  0.12it/s, v_num=62, train_loss_step=2.710, train_acc_step=0.197, val_loss=2.520, val_acc=0.345, train_loss_epoch=2.500, train_acc_epoch=0.342]  

Metric val_acc improved by 0.076 >= min_delta = 0.0. New best score: 0.345
Epoch 4, global step 1760: 'val_acc' reached 0.34478 (best 0.34478), saving model to 'c:\\Users\\Owner\\Documents\\MMA\\MULTI TASK\\Final Project\\lightning_logs\\version_62\\checkpoints\\best-checkpoint.ckpt' as top 1


Epoch 5: 100%|██████████| 352/352 [39:09<00:00,  0.15it/s, v_num=62, train_loss_step=2.330, train_acc_step=0.252, val_loss=3.330, val_acc=0.335, train_loss_epoch=2.180, train_acc_epoch=0.407]

Epoch 5, global step 2112: 'val_acc' was not in top 1


Epoch 6: 100%|██████████| 352/352 [58:39<00:00,  0.10it/s, v_num=62, train_loss_step=1.940, train_acc_step=0.376, val_loss=2.160, val_acc=0.427, train_loss_epoch=1.970, train_acc_epoch=0.459]

Metric val_acc improved by 0.082 >= min_delta = 0.0. New best score: 0.427
Epoch 6, global step 2464: 'val_acc' reached 0.42657 (best 0.42657), saving model to 'c:\\Users\\Owner\\Documents\\MMA\\MULTI TASK\\Final Project\\lightning_logs\\version_62\\checkpoints\\best-checkpoint.ckpt' as top 1


Epoch 7: 100%|██████████| 352/352 [48:38<00:00,  0.12it/s, v_num=62, train_loss_step=1.850, train_acc_step=0.321, val_loss=2.010, val_acc=0.456, train_loss_epoch=1.740, train_acc_epoch=0.513]

Metric val_acc improved by 0.029 >= min_delta = 0.0. New best score: 0.456
Epoch 7, global step 2816: 'val_acc' reached 0.45558 (best 0.45558), saving model to 'c:\\Users\\Owner\\Documents\\MMA\\MULTI TASK\\Final Project\\lightning_logs\\version_62\\checkpoints\\best-checkpoint.ckpt' as top 1


Epoch 8: 100%|██████████| 352/352 [53:16<00:00,  0.11it/s, v_num=62, train_loss_step=1.580, train_acc_step=0.475, val_loss=3.060, val_acc=0.474, train_loss_epoch=1.620, train_acc_epoch=0.541]

Metric val_acc improved by 0.018 >= min_delta = 0.0. New best score: 0.474
Epoch 8, global step 3168: 'val_acc' reached 0.47395 (best 0.47395), saving model to 'c:\\Users\\Owner\\Documents\\MMA\\MULTI TASK\\Final Project\\lightning_logs\\version_62\\checkpoints\\best-checkpoint.ckpt' as top 1


Epoch 9: 100%|██████████| 352/352 [1:09:31<00:00,  0.08it/s, v_num=62, train_loss_step=1.330, train_acc_step=0.504, val_loss=3.600, val_acc=0.463, train_loss_epoch=1.410, train_acc_epoch=0.593]

Epoch 9, global step 3520: 'val_acc' was not in top 1


Epoch 10: 100%|██████████| 352/352 [48:52<00:00,  0.12it/s, v_num=62, train_loss_step=1.380, train_acc_step=0.474, val_loss=2.140, val_acc=0.479, train_loss_epoch=1.340, train_acc_epoch=0.609] 

Metric val_acc improved by 0.005 >= min_delta = 0.0. New best score: 0.479
Epoch 10, global step 3872: 'val_acc' reached 0.47914 (best 0.47914), saving model to 'c:\\Users\\Owner\\Documents\\MMA\\MULTI TASK\\Final Project\\lightning_logs\\version_62\\checkpoints\\best-checkpoint.ckpt' as top 1


Epoch 11: 100%|██████████| 352/352 [49:50<00:00,  0.12it/s, v_num=62, train_loss_step=1.160, train_acc_step=0.549, val_loss=4.690, val_acc=0.494, train_loss_epoch=1.240, train_acc_epoch=0.634] 

Metric val_acc improved by 0.015 >= min_delta = 0.0. New best score: 0.494
Epoch 11, global step 4224: 'val_acc' reached 0.49382 (best 0.49382), saving model to 'c:\\Users\\Owner\\Documents\\MMA\\MULTI TASK\\Final Project\\lightning_logs\\version_62\\checkpoints\\best-checkpoint.ckpt' as top 1


Epoch 12: 100%|██████████| 352/352 [53:36<00:00,  0.11it/s, v_num=62, train_loss_step=0.992, train_acc_step=0.602, val_loss=2.000, val_acc=0.520, train_loss_epoch=0.949, train_acc_epoch=0.710]

Metric val_acc improved by 0.026 >= min_delta = 0.0. New best score: 0.520
Epoch 12, global step 4576: 'val_acc' reached 0.52026 (best 0.52026), saving model to 'c:\\Users\\Owner\\Documents\\MMA\\MULTI TASK\\Final Project\\lightning_logs\\version_62\\checkpoints\\best-checkpoint.ckpt' as top 1


Epoch 13: 100%|██████████| 352/352 [48:30<00:00,  0.12it/s, v_num=62, train_loss_step=0.999, train_acc_step=0.552, val_loss=6.250, val_acc=0.494, train_loss_epoch=0.770, train_acc_epoch=0.760] 

Epoch 13, global step 4928: 'val_acc' was not in top 1


Epoch 14: 100%|██████████| 352/352 [50:13<00:00,  0.12it/s, v_num=62, train_loss_step=0.987, train_acc_step=0.570, val_loss=2.130, val_acc=0.499, train_loss_epoch=0.766, train_acc_epoch=0.762]

Epoch 14, global step 5280: 'val_acc' was not in top 1


Epoch 15: 100%|██████████| 352/352 [50:12<00:00,  0.12it/s, v_num=62, train_loss_step=0.615, train_acc_step=0.642, val_loss=2.120, val_acc=0.536, train_loss_epoch=0.709, train_acc_epoch=0.778]

Metric val_acc improved by 0.015 >= min_delta = 0.0. New best score: 0.536
Epoch 15, global step 5632: 'val_acc' reached 0.53567 (best 0.53567), saving model to 'c:\\Users\\Owner\\Documents\\MMA\\MULTI TASK\\Final Project\\lightning_logs\\version_62\\checkpoints\\best-checkpoint.ckpt' as top 1


Epoch 16: 100%|██████████| 352/352 [51:18<00:00,  0.11it/s, v_num=62, train_loss_step=0.715, train_acc_step=0.622, val_loss=2.350, val_acc=0.518, train_loss_epoch=0.425, train_acc_epoch=0.862]

Epoch 16, global step 5984: 'val_acc' was not in top 1


Epoch 17: 100%|██████████| 352/352 [42:56<00:00,  0.14it/s, v_num=62, train_loss_step=0.310, train_acc_step=0.874, val_loss=7.180, val_acc=0.542, train_loss_epoch=0.299, train_acc_epoch=0.903] 

Metric val_acc improved by 0.006 >= min_delta = 0.0. New best score: 0.542
Epoch 17, global step 6336: 'val_acc' reached 0.54172 (best 0.54172), saving model to 'c:\\Users\\Owner\\Documents\\MMA\\MULTI TASK\\Final Project\\lightning_logs\\version_62\\checkpoints\\best-checkpoint.ckpt' as top 1


Epoch 18: 100%|██████████| 352/352 [1:27:16<00:00,  0.07it/s, v_num=62, train_loss_step=0.243, train_acc_step=0.827, val_loss=10.60, val_acc=0.516, train_loss_epoch=0.245, train_acc_epoch=0.921] 

Epoch 18, global step 6688: 'val_acc' was not in top 1


Epoch 19: 100%|██████████| 352/352 [1:22:21<00:00,  0.07it/s, v_num=62, train_loss_step=0.497, train_acc_step=0.684, val_loss=6.390, val_acc=0.494, train_loss_epoch=0.285, train_acc_epoch=0.909]

Epoch 19, global step 7040: 'val_acc' was not in top 1


Epoch 20: 100%|██████████| 352/352 [1:36:59<00:00,  0.06it/s, v_num=62, train_loss_step=0.327, train_acc_step=0.832, val_loss=2.480, val_acc=0.530, train_loss_epoch=0.337, train_acc_epoch=0.890]

Epoch 20, global step 7392: 'val_acc' was not in top 1


Epoch 21: 100%|██████████| 352/352 [1:18:18<00:00,  0.07it/s, v_num=62, train_loss_step=0.309, train_acc_step=0.864, val_loss=2.570, val_acc=0.500, train_loss_epoch=0.227, train_acc_epoch=0.927] 

Epoch 21, global step 7744: 'val_acc' was not in top 1


Epoch 22: 100%|██████████| 352/352 [1:19:30<00:00,  0.07it/s, v_num=62, train_loss_step=0.258, train_acc_step=0.841, val_loss=3.240, val_acc=0.536, train_loss_epoch=0.201, train_acc_epoch=0.936] 

Epoch 22, global step 8096: 'val_acc' was not in top 1


Epoch 23: 100%|██████████| 352/352 [1:15:47<00:00,  0.08it/s, v_num=62, train_loss_step=0.197, train_acc_step=0.876, val_loss=2.630, val_acc=0.546, train_loss_epoch=0.300, train_acc_epoch=0.905] 

Metric val_acc improved by 0.004 >= min_delta = 0.0. New best score: 0.546
Epoch 23, global step 8448: 'val_acc' reached 0.54608 (best 0.54608), saving model to 'c:\\Users\\Owner\\Documents\\MMA\\MULTI TASK\\Final Project\\lightning_logs\\version_62\\checkpoints\\best-checkpoint.ckpt' as top 1


Epoch 24: 100%|██████████| 352/352 [38:07<00:00,  0.15it/s, v_num=62, train_loss_step=0.102, train_acc_step=0.954, val_loss=7.230, val_acc=0.509, train_loss_epoch=0.0959, train_acc_epoch=0.970] 

Epoch 24, global step 8800: 'val_acc' was not in top 1


Epoch 25: 100%|██████████| 352/352 [37:44<00:00,  0.16it/s, v_num=62, train_loss_step=0.0774, train_acc_step=0.983, val_loss=2.630, val_acc=0.549, train_loss_epoch=0.115, train_acc_epoch=0.964] 

Metric val_acc improved by 0.003 >= min_delta = 0.0. New best score: 0.549
Epoch 25, global step 9152: 'val_acc' reached 0.54898 (best 0.54898), saving model to 'c:\\Users\\Owner\\Documents\\MMA\\MULTI TASK\\Final Project\\lightning_logs\\version_62\\checkpoints\\best-checkpoint.ckpt' as top 1


Epoch 26: 100%|██████████| 352/352 [48:53<00:00,  0.12it/s, v_num=62, train_loss_step=0.216, train_acc_step=0.871, val_loss=3.330, val_acc=0.522, train_loss_epoch=0.132, train_acc_epoch=0.959]  

Epoch 26, global step 9504: 'val_acc' was not in top 1


Epoch 27: 100%|██████████| 352/352 [48:05<00:00,  0.12it/s, v_num=62, train_loss_step=0.0987, train_acc_step=0.974, val_loss=2.540, val_acc=0.545, train_loss_epoch=0.145, train_acc_epoch=0.954]

Epoch 27, global step 9856: 'val_acc' was not in top 1


Epoch 28: 100%|██████████| 352/352 [46:35<00:00,  0.13it/s, v_num=62, train_loss_step=0.163, train_acc_step=0.889, val_loss=3.800, val_acc=0.523, train_loss_epoch=0.147, train_acc_epoch=0.953]  

Epoch 28, global step 10208: 'val_acc' was not in top 1


Epoch 29: 100%|██████████| 352/352 [1:28:37<00:00,  0.07it/s, v_num=62, train_loss_step=0.0823, train_acc_step=0.972, val_loss=4.570, val_acc=0.551, train_loss_epoch=0.124, train_acc_epoch=0.960]

Metric val_acc improved by 0.002 >= min_delta = 0.0. New best score: 0.551
Epoch 29, global step 10560: 'val_acc' reached 0.55066 (best 0.55066), saving model to 'c:\\Users\\Owner\\Documents\\MMA\\MULTI TASK\\Final Project\\lightning_logs\\version_62\\checkpoints\\best-checkpoint.ckpt' as top 1


Epoch 30: 100%|██████████| 352/352 [1:46:40<00:00,  0.05it/s, v_num=62, train_loss_step=0.224, train_acc_step=0.876, val_loss=2.760, val_acc=0.520, train_loss_epoch=0.186, train_acc_epoch=0.942]    

Epoch 30, global step 10912: 'val_acc' was not in top 1


Epoch 31: 100%|██████████| 352/352 [49:31<00:00,  0.12it/s, v_num=62, train_loss_step=0.192, train_acc_step=0.946, val_loss=3.430, val_acc=0.524, train_loss_epoch=0.152, train_acc_epoch=0.951]  

Epoch 31, global step 11264: 'val_acc' was not in top 1


Epoch 32: 100%|██████████| 352/352 [43:50<00:00,  0.13it/s, v_num=62, train_loss_step=0.00939, train_acc_step=1.000, val_loss=3.130, val_acc=0.550, train_loss_epoch=0.0531, train_acc_epoch=0.985]

Epoch 32, global step 11616: 'val_acc' was not in top 1


Epoch 33: 100%|██████████| 352/352 [55:18<00:00,  0.11it/s, v_num=62, train_loss_step=0.108, train_acc_step=0.882, val_loss=3.540, val_acc=0.545, train_loss_epoch=0.0352, train_acc_epoch=0.990]  

Epoch 33, global step 11968: 'val_acc' was not in top 1


Epoch 34: 100%|██████████| 352/352 [51:38<00:00,  0.11it/s, v_num=62, train_loss_step=0.303, train_acc_step=0.921, val_loss=3.300, val_acc=0.478, train_loss_epoch=0.0706, train_acc_epoch=0.979]   

Epoch 34, global step 12320: 'val_acc' was not in top 1


Epoch 35: 100%|██████████| 352/352 [50:45<00:00,  0.12it/s, v_num=62, train_loss_step=0.186, train_acc_step=0.926, val_loss=2.980, val_acc=0.537, train_loss_epoch=0.141, train_acc_epoch=0.955]  

Epoch 35, global step 12672: 'val_acc' was not in top 1


Epoch 36: 100%|██████████| 352/352 [51:40<00:00,  0.11it/s, v_num=62, train_loss_step=0.0805, train_acc_step=0.961, val_loss=2.550, val_acc=0.542, train_loss_epoch=0.103, train_acc_epoch=0.969] 

Epoch 36, global step 13024: 'val_acc' was not in top 1


Epoch 37: 100%|██████████| 352/352 [1:52:53<00:00,  0.05it/s, v_num=62, train_loss_step=0.0146, train_acc_step=1.000, val_loss=2.640, val_acc=0.556, train_loss_epoch=0.117, train_acc_epoch=0.963]

Metric val_acc improved by 0.005 >= min_delta = 0.0. New best score: 0.556
Epoch 37, global step 13376: 'val_acc' reached 0.55557 (best 0.55557), saving model to 'c:\\Users\\Owner\\Documents\\MMA\\MULTI TASK\\Final Project\\lightning_logs\\version_62\\checkpoints\\best-checkpoint.ckpt' as top 1


Epoch 38: 100%|██████████| 352/352 [49:29<00:00,  0.12it/s, v_num=62, train_loss_step=0.107, train_acc_step=0.947, val_loss=2.790, val_acc=0.533, train_loss_epoch=0.0684, train_acc_epoch=0.980]  

Epoch 38, global step 13728: 'val_acc' was not in top 1


Epoch 39: 100%|██████████| 352/352 [53:05<00:00,  0.11it/s, v_num=62, train_loss_step=0.0303, train_acc_step=0.971, val_loss=3.250, val_acc=0.540, train_loss_epoch=0.0655, train_acc_epoch=0.979] 

Epoch 39, global step 14080: 'val_acc' was not in top 1


Epoch 40: 100%|██████████| 352/352 [52:01<00:00,  0.11it/s, v_num=62, train_loss_step=0.110, train_acc_step=0.923, val_loss=2.820, val_acc=0.543, train_loss_epoch=0.174, train_acc_epoch=0.946]    

Epoch 40, global step 14432: 'val_acc' was not in top 1


Epoch 41: 100%|██████████| 352/352 [1:00:39<00:00,  0.10it/s, v_num=62, train_loss_step=0.0244, train_acc_step=1.000, val_loss=2.790, val_acc=0.547, train_loss_epoch=0.0622, train_acc_epoch=0.981]

Epoch 41, global step 14784: 'val_acc' was not in top 1


Epoch 42: 100%|██████████| 352/352 [51:06<00:00,  0.11it/s, v_num=62, train_loss_step=0.042, train_acc_step=0.974, val_loss=2.990, val_acc=0.548, train_loss_epoch=0.0304, train_acc_epoch=0.992]   

Epoch 42, global step 15136: 'val_acc' was not in top 1


Epoch 43: 100%|██████████| 352/352 [50:42<00:00,  0.12it/s, v_num=62, train_loss_step=0.0417, train_acc_step=0.981, val_loss=2.740, val_acc=0.543, train_loss_epoch=0.121, train_acc_epoch=0.962] 

Epoch 43, global step 15488: 'val_acc' was not in top 1


Epoch 44: 100%|██████████| 352/352 [54:04<00:00,  0.11it/s, v_num=62, train_loss_step=0.0888, train_acc_step=0.941, val_loss=2.820, val_acc=0.548, train_loss_epoch=0.074, train_acc_epoch=0.977] 

Epoch 44, global step 15840: 'val_acc' was not in top 1


Epoch 45: 100%|██████████| 352/352 [57:00<00:00,  0.10it/s, v_num=62, train_loss_step=0.0282, train_acc_step=0.971, val_loss=3.050, val_acc=0.550, train_loss_epoch=0.0435, train_acc_epoch=0.987]

Epoch 45, global step 16192: 'val_acc' was not in top 1


Epoch 46: 100%|██████████| 352/352 [54:44<00:00,  0.11it/s, v_num=62, train_loss_step=0.196, train_acc_step=0.895, val_loss=3.710, val_acc=0.522, train_loss_epoch=0.0505, train_acc_epoch=0.984]  

Epoch 46, global step 16544: 'val_acc' was not in top 1


Epoch 47: 100%|██████████| 352/352 [51:11<00:00,  0.11it/s, v_num=62, train_loss_step=0.147, train_acc_step=0.918, val_loss=2.920, val_acc=0.533, train_loss_epoch=0.128, train_acc_epoch=0.960]  

Monitored metric val_acc did not improve in the last 10 records. Best score: 0.556. Signaling Trainer to stop.
Epoch 47, global step 16896: 'val_acc' was not in top 1


Epoch 47: 100%|██████████| 352/352 [51:11<00:00,  0.11it/s, v_num=62, train_loss_step=0.147, train_acc_step=0.918, val_loss=2.920, val_acc=0.533, train_loss_epoch=0.128, train_acc_epoch=0.960]


AttributeError: 'function' object has no attribute 'load_from_checkpoint'

In [34]:
def load_resnet200_from_checkpoint(checkpoint_path, num_classes=100, learning_rate=0.01):
    model = ResNet200(num_classes=num_classes, learning_rate=learning_rate)
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['state_dict'])
    return model


best_model_path = checkpoint_callback.best_model_path
best_model = load_resnet200_from_checkpoint(best_model_path, num_classes=100, learning_rate=0.01)
trainer.test(best_model, datamodule=data_module)

Files already downloaded and verified
Files already downloaded and verified


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 79/79 [00:09<00:00,  8.53it/s]


[{'test_loss': 2.614758253097534,
  'test_acc': 0.5537999868392944,
  'test_precision': 0.5764963626861572,
  'test_recall': 0.5537999868392944,
  'test_f1': 0.5511781573295593}]

In [40]:
class CIFAR10DataModule(pl.LightningDataModule):
    def __init__(self, data_dir='./data', batch_size=128):
        super().__init__()
        self.data_dir = data_dir
        self.batch_size = batch_size
        self.transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
        ])

    def setup(self, stage=None):
        # Load data
        cifar10_train = datasets.CIFAR10(root=self.data_dir, train=True, download=True, transform=self.transform)
        cifar10_test = datasets.CIFAR10(root=self.data_dir, train=False, download=True, transform=self.transform)

        # Train/val split
        self.train, self.val = random_split(cifar10_train, [45000, 5000])
        self.test = cifar10_test

    def train_dataloader(self):
        return DataLoader(self.train, batch_size=self.batch_size, shuffle=True, num_workers=7, persistent_workers=True)

    def val_dataloader(self):
        return DataLoader(self.val, batch_size=self.batch_size, num_workers=7, persistent_workers=True)

    def test_dataloader(self):
        return DataLoader(self.test, batch_size=self.batch_size, num_workers=7, persistent_workers=True)



def ResNet50(num_classes=10, learning_rate=0.01):
    return ResNet(Bottleneck, [3, 4, 6, 3], num_classes, learning_rate)

In [41]:
early_stopping = EarlyStopping(
    monitor='val_acc',
    min_delta=0.00,
    patience=10,
    verbose=True,
    mode='max'
)

checkpoint_callback = ModelCheckpoint(
    monitor='val_acc',
    filename='best-checkpoint',
    save_top_k=1,
    mode='max', 
    verbose=True
)

data_module = CIFAR10DataModule()
model = ResNet50(num_classes=10, learning_rate=0.001)
logger = TensorBoardLogger('C:/Users/Owner/Documents/MMA/MULTI TASK/Final Project/', name='cnn2_logs')
trainer = pl.Trainer(max_epochs=500, devices=1 if torch.cuda.is_available() else 0, callbacks=[early_stopping, checkpoint_callback])
trainer.fit(model, datamodule=data_module)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Files already downloaded and verified
Files already downloaded and verified


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

   | Name           | Type                | Params
--------------------------------------------------------
0  | conv1          | Conv2d              | 1.7 K 
1  | bn1            | BatchNorm2d         | 128   
2  | relu           | ReLU                | 0     
3  | layer1         | Sequential          | 215 K 
4  | layer2         | Sequential          | 1.2 M 
5  | layer3         | Sequential          | 7.1 M 
6  | layer4         | Sequential          | 15.0 M
7  | avgpool        | AdaptiveAvgPool2d   | 0     
8  | fc             | Linear              | 20.5 K
9  | train_acc      | MulticlassAccuracy  | 0     
10 | val_acc        | MulticlassAccuracy  | 0     
11 | test_acc       | MulticlassAccuracy  | 0     
12 | test_precision | MulticlassPrecision | 0     
13 | test_recall    | MulticlassRecall    | 0     
14 | test_f1        | MulticlassF1Score   | 0     
--------------------------------------------------------
23.5 M    Trainable params


Epoch 0: 100%|██████████| 352/352 [00:42<00:00,  8.35it/s, v_num=64, train_loss_step=0.906, train_acc_step=0.711, val_loss=1.200, val_acc=0.563, train_loss_epoch=1.540, train_acc_epoch=0.442]

Metric val_acc improved. New best score: 0.563
Epoch 0, global step 352: 'val_acc' reached 0.56255 (best 0.56255), saving model to 'c:\\Users\\Owner\\Documents\\MMA\\MULTI TASK\\Final Project\\lightning_logs\\version_64\\checkpoints\\best-checkpoint.ckpt' as top 1


Epoch 0:   0%|          | 0/352 [03:25<?, ?it/s]
Epoch 1: 100%|██████████| 352/352 [00:42<00:00,  8.32it/s, v_num=64, train_loss_step=0.916, train_acc_step=0.704, val_loss=0.886, val_acc=0.687, train_loss_epoch=0.988, train_acc_epoch=0.648]

Metric val_acc improved by 0.124 >= min_delta = 0.0. New best score: 0.687
Epoch 1, global step 704: 'val_acc' reached 0.68684 (best 0.68684), saving model to 'c:\\Users\\Owner\\Documents\\MMA\\MULTI TASK\\Final Project\\lightning_logs\\version_64\\checkpoints\\best-checkpoint.ckpt' as top 1


Epoch 2: 100%|██████████| 352/352 [00:42<00:00,  8.23it/s, v_num=64, train_loss_step=0.555, train_acc_step=0.771, val_loss=0.764, val_acc=0.740, train_loss_epoch=0.765, train_acc_epoch=0.727]

Metric val_acc improved by 0.054 >= min_delta = 0.0. New best score: 0.740
Epoch 2, global step 1056: 'val_acc' reached 0.74036 (best 0.74036), saving model to 'c:\\Users\\Owner\\Documents\\MMA\\MULTI TASK\\Final Project\\lightning_logs\\version_64\\checkpoints\\best-checkpoint.ckpt' as top 1


Epoch 3: 100%|██████████| 352/352 [00:43<00:00,  8.18it/s, v_num=64, train_loss_step=0.579, train_acc_step=0.833, val_loss=0.879, val_acc=0.707, train_loss_epoch=0.619, train_acc_epoch=0.782]

Epoch 3, global step 1408: 'val_acc' was not in top 1


Epoch 4: 100%|██████████| 352/352 [00:42<00:00,  8.27it/s, v_num=64, train_loss_step=0.362, train_acc_step=0.918, val_loss=0.605, val_acc=0.796, train_loss_epoch=0.509, train_acc_epoch=0.824]

Metric val_acc improved by 0.056 >= min_delta = 0.0. New best score: 0.796
Epoch 4, global step 1760: 'val_acc' reached 0.79593 (best 0.79593), saving model to 'c:\\Users\\Owner\\Documents\\MMA\\MULTI TASK\\Final Project\\lightning_logs\\version_64\\checkpoints\\best-checkpoint.ckpt' as top 1


Epoch 5: 100%|██████████| 352/352 [00:42<00:00,  8.25it/s, v_num=64, train_loss_step=0.417, train_acc_step=0.802, val_loss=0.732, val_acc=0.765, train_loss_epoch=0.411, train_acc_epoch=0.857]

Epoch 5, global step 2112: 'val_acc' was not in top 1


Epoch 6: 100%|██████████| 352/352 [00:42<00:00,  8.24it/s, v_num=64, train_loss_step=0.396, train_acc_step=0.861, val_loss=0.825, val_acc=0.771, train_loss_epoch=0.346, train_acc_epoch=0.878]

Epoch 6, global step 2464: 'val_acc' was not in top 1


Epoch 7: 100%|██████████| 352/352 [00:41<00:00,  8.57it/s, v_num=64, train_loss_step=0.231, train_acc_step=0.935, val_loss=0.583, val_acc=0.808, train_loss_epoch=0.289, train_acc_epoch=0.898]

Metric val_acc improved by 0.012 >= min_delta = 0.0. New best score: 0.808
Epoch 7, global step 2816: 'val_acc' reached 0.80838 (best 0.80838), saving model to 'c:\\Users\\Owner\\Documents\\MMA\\MULTI TASK\\Final Project\\lightning_logs\\version_64\\checkpoints\\best-checkpoint.ckpt' as top 1


Epoch 8: 100%|██████████| 352/352 [00:40<00:00,  8.75it/s, v_num=64, train_loss_step=0.238, train_acc_step=0.881, val_loss=0.580, val_acc=0.816, train_loss_epoch=0.220, train_acc_epoch=0.922] 

Metric val_acc improved by 0.008 >= min_delta = 0.0. New best score: 0.816
Epoch 8, global step 3168: 'val_acc' reached 0.81625 (best 0.81625), saving model to 'c:\\Users\\Owner\\Documents\\MMA\\MULTI TASK\\Final Project\\lightning_logs\\version_64\\checkpoints\\best-checkpoint.ckpt' as top 1


Epoch 9: 100%|██████████| 352/352 [00:40<00:00,  8.76it/s, v_num=64, train_loss_step=0.296, train_acc_step=0.916, val_loss=0.758, val_acc=0.795, train_loss_epoch=0.173, train_acc_epoch=0.938] 

Epoch 9, global step 3520: 'val_acc' was not in top 1


Epoch 10: 100%|██████████| 352/352 [00:40<00:00,  8.75it/s, v_num=64, train_loss_step=0.126, train_acc_step=0.953, val_loss=0.711, val_acc=0.805, train_loss_epoch=0.142, train_acc_epoch=0.949] 

Epoch 10, global step 3872: 'val_acc' was not in top 1


Epoch 11: 100%|██████████| 352/352 [00:40<00:00,  8.76it/s, v_num=64, train_loss_step=0.194, train_acc_step=0.936, val_loss=0.764, val_acc=0.807, train_loss_epoch=0.123, train_acc_epoch=0.956] 

Epoch 11, global step 4224: 'val_acc' was not in top 1


Epoch 12: 100%|██████████| 352/352 [00:40<00:00,  8.75it/s, v_num=64, train_loss_step=0.169, train_acc_step=0.944, val_loss=0.709, val_acc=0.823, train_loss_epoch=0.106, train_acc_epoch=0.962] 

Metric val_acc improved by 0.007 >= min_delta = 0.0. New best score: 0.823
Epoch 12, global step 4576: 'val_acc' reached 0.82325 (best 0.82325), saving model to 'c:\\Users\\Owner\\Documents\\MMA\\MULTI TASK\\Final Project\\lightning_logs\\version_64\\checkpoints\\best-checkpoint.ckpt' as top 1


Epoch 13: 100%|██████████| 352/352 [00:40<00:00,  8.72it/s, v_num=64, train_loss_step=0.136, train_acc_step=0.956, val_loss=0.788, val_acc=0.812, train_loss_epoch=0.0972, train_acc_epoch=0.966]

Epoch 13, global step 4928: 'val_acc' was not in top 1


Epoch 14: 100%|██████████| 352/352 [00:40<00:00,  8.75it/s, v_num=64, train_loss_step=0.0924, train_acc_step=0.966, val_loss=0.670, val_acc=0.836, train_loss_epoch=0.109, train_acc_epoch=0.962] 

Metric val_acc improved by 0.012 >= min_delta = 0.0. New best score: 0.836
Epoch 14, global step 5280: 'val_acc' reached 0.83574 (best 0.83574), saving model to 'c:\\Users\\Owner\\Documents\\MMA\\MULTI TASK\\Final Project\\lightning_logs\\version_64\\checkpoints\\best-checkpoint.ckpt' as top 1


Epoch 15: 100%|██████████| 352/352 [00:40<00:00,  8.70it/s, v_num=64, train_loss_step=0.231, train_acc_step=0.910, val_loss=0.750, val_acc=0.832, train_loss_epoch=0.0579, train_acc_epoch=0.980] 

Epoch 15, global step 5632: 'val_acc' was not in top 1


Epoch 16: 100%|██████████| 352/352 [00:40<00:00,  8.76it/s, v_num=64, train_loss_step=0.0536, train_acc_step=0.964, val_loss=0.850, val_acc=0.820, train_loss_epoch=0.0586, train_acc_epoch=0.980] 

Epoch 16, global step 5984: 'val_acc' was not in top 1


Epoch 17: 100%|██████████| 352/352 [00:40<00:00,  8.73it/s, v_num=64, train_loss_step=0.0445, train_acc_step=0.950, val_loss=0.852, val_acc=0.823, train_loss_epoch=0.058, train_acc_epoch=0.980]  

Epoch 17, global step 6336: 'val_acc' was not in top 1


Epoch 18: 100%|██████████| 352/352 [00:40<00:00,  8.74it/s, v_num=64, train_loss_step=0.122, train_acc_step=0.959, val_loss=0.821, val_acc=0.825, train_loss_epoch=0.065, train_acc_epoch=0.978]  

Epoch 18, global step 6688: 'val_acc' was not in top 1


Epoch 19: 100%|██████████| 352/352 [00:40<00:00,  8.74it/s, v_num=64, train_loss_step=0.137, train_acc_step=0.948, val_loss=0.830, val_acc=0.823, train_loss_epoch=0.0506, train_acc_epoch=0.982] 

Epoch 19, global step 7040: 'val_acc' was not in top 1


Epoch 20: 100%|██████████| 352/352 [00:40<00:00,  8.73it/s, v_num=64, train_loss_step=0.106, train_acc_step=0.950, val_loss=0.762, val_acc=0.845, train_loss_epoch=0.0488, train_acc_epoch=0.983]  

Metric val_acc improved by 0.010 >= min_delta = 0.0. New best score: 0.845
Epoch 20, global step 7392: 'val_acc' reached 0.84532 (best 0.84532), saving model to 'c:\\Users\\Owner\\Documents\\MMA\\MULTI TASK\\Final Project\\lightning_logs\\version_64\\checkpoints\\best-checkpoint.ckpt' as top 1


Epoch 21: 100%|██████████| 352/352 [00:40<00:00,  8.68it/s, v_num=64, train_loss_step=0.0545, train_acc_step=0.966, val_loss=0.771, val_acc=0.832, train_loss_epoch=0.0472, train_acc_epoch=0.984] 

Epoch 21, global step 7744: 'val_acc' was not in top 1


Epoch 22: 100%|██████████| 352/352 [00:40<00:00,  8.75it/s, v_num=64, train_loss_step=0.146, train_acc_step=0.952, val_loss=0.801, val_acc=0.835, train_loss_epoch=0.0755, train_acc_epoch=0.975]  

Epoch 22, global step 8096: 'val_acc' was not in top 1


Epoch 23: 100%|██████████| 352/352 [00:40<00:00,  8.73it/s, v_num=64, train_loss_step=0.0312, train_acc_step=0.988, val_loss=0.763, val_acc=0.844, train_loss_epoch=0.0354, train_acc_epoch=0.988] 

Epoch 23, global step 8448: 'val_acc' was not in top 1


Epoch 24: 100%|██████████| 352/352 [00:40<00:00,  8.72it/s, v_num=64, train_loss_step=0.0373, train_acc_step=0.989, val_loss=0.803, val_acc=0.837, train_loss_epoch=0.0469, train_acc_epoch=0.984] 

Epoch 24, global step 8800: 'val_acc' was not in top 1


Epoch 25: 100%|██████████| 352/352 [00:40<00:00,  8.66it/s, v_num=64, train_loss_step=0.015, train_acc_step=0.987, val_loss=0.844, val_acc=0.839, train_loss_epoch=0.0246, train_acc_epoch=0.991]   

Epoch 25, global step 9152: 'val_acc' was not in top 1


Epoch 26: 100%|██████████| 352/352 [00:40<00:00,  8.65it/s, v_num=64, train_loss_step=0.137, train_acc_step=0.977, val_loss=0.864, val_acc=0.830, train_loss_epoch=0.0374, train_acc_epoch=0.987]  

Epoch 26, global step 9504: 'val_acc' was not in top 1


Epoch 27: 100%|██████████| 352/352 [00:40<00:00,  8.64it/s, v_num=64, train_loss_step=0.157, train_acc_step=0.950, val_loss=0.847, val_acc=0.839, train_loss_epoch=0.0419, train_acc_epoch=0.985]  

Epoch 27, global step 9856: 'val_acc' was not in top 1


Epoch 28: 100%|██████████| 352/352 [00:40<00:00,  8.64it/s, v_num=64, train_loss_step=0.0546, train_acc_step=0.986, val_loss=0.774, val_acc=0.852, train_loss_epoch=0.0361, train_acc_epoch=0.987] 

Metric val_acc improved by 0.006 >= min_delta = 0.0. New best score: 0.852
Epoch 28, global step 10208: 'val_acc' reached 0.85154 (best 0.85154), saving model to 'c:\\Users\\Owner\\Documents\\MMA\\MULTI TASK\\Final Project\\lightning_logs\\version_64\\checkpoints\\best-checkpoint.ckpt' as top 1


Epoch 29: 100%|██████████| 352/352 [00:40<00:00,  8.59it/s, v_num=64, train_loss_step=0.00419, train_acc_step=1.000, val_loss=0.927, val_acc=0.832, train_loss_epoch=0.0201, train_acc_epoch=0.993] 

Epoch 29, global step 10560: 'val_acc' was not in top 1


Epoch 30: 100%|██████████| 352/352 [00:40<00:00,  8.61it/s, v_num=64, train_loss_step=0.116, train_acc_step=0.981, val_loss=0.980, val_acc=0.805, train_loss_epoch=0.0416, train_acc_epoch=0.986]  

Epoch 30, global step 10912: 'val_acc' was not in top 1


Epoch 31: 100%|██████████| 352/352 [00:40<00:00,  8.77it/s, v_num=64, train_loss_step=0.0205, train_acc_step=1.000, val_loss=0.815, val_acc=0.836, train_loss_epoch=0.041, train_acc_epoch=0.986]  

Epoch 31, global step 11264: 'val_acc' was not in top 1


Epoch 32: 100%|██████████| 352/352 [00:40<00:00,  8.75it/s, v_num=64, train_loss_step=0.0146, train_acc_step=0.990, val_loss=1.050, val_acc=0.815, train_loss_epoch=0.0226, train_acc_epoch=0.992] 

Epoch 32, global step 11616: 'val_acc' was not in top 1


Epoch 33: 100%|██████████| 352/352 [00:40<00:00,  8.77it/s, v_num=64, train_loss_step=0.0893, train_acc_step=0.993, val_loss=0.890, val_acc=0.841, train_loss_epoch=0.0284, train_acc_epoch=0.990]  

Epoch 33, global step 11968: 'val_acc' was not in top 1


Epoch 34: 100%|██████████| 352/352 [00:40<00:00,  8.76it/s, v_num=64, train_loss_step=0.0395, train_acc_step=0.973, val_loss=0.990, val_acc=0.826, train_loss_epoch=0.0245, train_acc_epoch=0.991]  

Epoch 34, global step 12320: 'val_acc' was not in top 1


Epoch 35: 100%|██████████| 352/352 [00:40<00:00,  8.76it/s, v_num=64, train_loss_step=0.105, train_acc_step=0.967, val_loss=0.797, val_acc=0.841, train_loss_epoch=0.0411, train_acc_epoch=0.986]  

Epoch 35, global step 12672: 'val_acc' was not in top 1


Epoch 36: 100%|██████████| 352/352 [00:40<00:00,  8.76it/s, v_num=64, train_loss_step=0.166, train_acc_step=0.960, val_loss=0.907, val_acc=0.832, train_loss_epoch=0.0209, train_acc_epoch=0.992]   

Epoch 36, global step 13024: 'val_acc' was not in top 1


Epoch 37: 100%|██████████| 352/352 [00:40<00:00,  8.75it/s, v_num=64, train_loss_step=0.00235, train_acc_step=1.000, val_loss=0.853, val_acc=0.851, train_loss_epoch=0.0209, train_acc_epoch=0.993] 

Epoch 37, global step 13376: 'val_acc' was not in top 1


Epoch 38: 100%|██████████| 352/352 [00:40<00:00,  8.76it/s, v_num=64, train_loss_step=0.015, train_acc_step=0.990, val_loss=0.888, val_acc=0.852, train_loss_epoch=0.0137, train_acc_epoch=0.995]   

Metric val_acc improved by 0.000 >= min_delta = 0.0. New best score: 0.852
Epoch 38, global step 13728: 'val_acc' reached 0.85188 (best 0.85188), saving model to 'c:\\Users\\Owner\\Documents\\MMA\\MULTI TASK\\Final Project\\lightning_logs\\version_64\\checkpoints\\best-checkpoint.ckpt' as top 1


Epoch 39: 100%|██████████| 352/352 [00:40<00:00,  8.73it/s, v_num=64, train_loss_step=0.0122, train_acc_step=1.000, val_loss=0.898, val_acc=0.837, train_loss_epoch=0.0411, train_acc_epoch=0.985] 

Epoch 39, global step 14080: 'val_acc' was not in top 1


Epoch 40: 100%|██████████| 352/352 [00:40<00:00,  8.73it/s, v_num=64, train_loss_step=0.0338, train_acc_step=0.990, val_loss=0.770, val_acc=0.848, train_loss_epoch=0.0589, train_acc_epoch=0.981] 

Epoch 40, global step 14432: 'val_acc' was not in top 1


Epoch 41: 100%|██████████| 352/352 [00:40<00:00,  8.76it/s, v_num=64, train_loss_step=0.000576, train_acc_step=1.000, val_loss=0.794, val_acc=0.855, train_loss_epoch=0.0103, train_acc_epoch=0.997]

Metric val_acc improved by 0.003 >= min_delta = 0.0. New best score: 0.855
Epoch 41, global step 14784: 'val_acc' reached 0.85503 (best 0.85503), saving model to 'c:\\Users\\Owner\\Documents\\MMA\\MULTI TASK\\Final Project\\lightning_logs\\version_64\\checkpoints\\best-checkpoint.ckpt' as top 1


Epoch 42: 100%|██████████| 352/352 [00:40<00:00,  8.75it/s, v_num=64, train_loss_step=0.0106, train_acc_step=1.000, val_loss=0.794, val_acc=0.858, train_loss_epoch=0.00221, train_acc_epoch=1.000] 

Metric val_acc improved by 0.003 >= min_delta = 0.0. New best score: 0.858
Epoch 42, global step 15136: 'val_acc' reached 0.85768 (best 0.85768), saving model to 'c:\\Users\\Owner\\Documents\\MMA\\MULTI TASK\\Final Project\\lightning_logs\\version_64\\checkpoints\\best-checkpoint.ckpt' as top 1


Epoch 43: 100%|██████████| 352/352 [00:40<00:00,  8.71it/s, v_num=64, train_loss_step=0.00688, train_acc_step=1.000, val_loss=0.798, val_acc=0.866, train_loss_epoch=0.00291, train_acc_epoch=0.999] 

Metric val_acc improved by 0.009 >= min_delta = 0.0. New best score: 0.866
Epoch 43, global step 15488: 'val_acc' reached 0.86630 (best 0.86630), saving model to 'c:\\Users\\Owner\\Documents\\MMA\\MULTI TASK\\Final Project\\lightning_logs\\version_64\\checkpoints\\best-checkpoint.ckpt' as top 1


Epoch 44: 100%|██████████| 352/352 [00:40<00:00,  8.71it/s, v_num=64, train_loss_step=0.015, train_acc_step=1.000, val_loss=0.976, val_acc=0.822, train_loss_epoch=0.0355, train_acc_epoch=0.988]    

Epoch 44, global step 15840: 'val_acc' was not in top 1


Epoch 45: 100%|██████████| 352/352 [00:40<00:00,  8.75it/s, v_num=64, train_loss_step=0.0755, train_acc_step=0.971, val_loss=0.827, val_acc=0.847, train_loss_epoch=0.0349, train_acc_epoch=0.988] 

Epoch 45, global step 16192: 'val_acc' was not in top 1


Epoch 46: 100%|██████████| 352/352 [00:40<00:00,  8.76it/s, v_num=64, train_loss_step=0.000891, train_acc_step=1.000, val_loss=0.847, val_acc=0.852, train_loss_epoch=0.0174, train_acc_epoch=0.994]

Epoch 46, global step 16544: 'val_acc' was not in top 1


Epoch 47: 100%|██████████| 352/352 [00:40<00:00,  8.78it/s, v_num=64, train_loss_step=0.0185, train_acc_step=0.983, val_loss=0.855, val_acc=0.854, train_loss_epoch=0.00755, train_acc_epoch=0.997] 

Epoch 47, global step 16896: 'val_acc' was not in top 1


Epoch 48: 100%|██████████| 352/352 [00:40<00:00,  8.75it/s, v_num=64, train_loss_step=0.014, train_acc_step=1.000, val_loss=0.822, val_acc=0.844, train_loss_epoch=0.0407, train_acc_epoch=0.986]    

Epoch 48, global step 17248: 'val_acc' was not in top 1


Epoch 49: 100%|██████████| 352/352 [00:40<00:00,  8.76it/s, v_num=64, train_loss_step=0.00209, train_acc_step=1.000, val_loss=0.959, val_acc=0.842, train_loss_epoch=0.0184, train_acc_epoch=0.994] 

Epoch 49, global step 17600: 'val_acc' was not in top 1


Epoch 50: 100%|██████████| 352/352 [00:40<00:00,  8.73it/s, v_num=64, train_loss_step=0.033, train_acc_step=0.980, val_loss=0.842, val_acc=0.854, train_loss_epoch=0.00537, train_acc_epoch=0.998]  

Epoch 50, global step 17952: 'val_acc' was not in top 1


Epoch 51: 100%|██████████| 352/352 [00:40<00:00,  8.74it/s, v_num=64, train_loss_step=0.0169, train_acc_step=1.000, val_loss=0.992, val_acc=0.833, train_loss_epoch=0.0138, train_acc_epoch=0.995]   

Epoch 51, global step 18304: 'val_acc' was not in top 1


Epoch 52: 100%|██████████| 352/352 [00:40<00:00,  8.75it/s, v_num=64, train_loss_step=0.0391, train_acc_step=0.995, val_loss=1.010, val_acc=0.831, train_loss_epoch=0.0224, train_acc_epoch=0.992]  

Epoch 52, global step 18656: 'val_acc' was not in top 1


Epoch 53: 100%|██████████| 352/352 [00:40<00:00,  8.74it/s, v_num=64, train_loss_step=0.00162, train_acc_step=1.000, val_loss=0.899, val_acc=0.847, train_loss_epoch=0.0243, train_acc_epoch=0.992] 

Monitored metric val_acc did not improve in the last 10 records. Best score: 0.866. Signaling Trainer to stop.
Epoch 53, global step 19008: 'val_acc' was not in top 1


Epoch 53: 100%|██████████| 352/352 [00:40<00:00,  8.74it/s, v_num=64, train_loss_step=0.00162, train_acc_step=1.000, val_loss=0.899, val_acc=0.847, train_loss_epoch=0.0243, train_acc_epoch=0.992]


In [44]:
def load_resnet50_from_checkpoint(checkpoint_path, num_classes=10, learning_rate=0.01):
    model = ResNet50(num_classes=num_classes, learning_rate=learning_rate)
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['state_dict'])
    return model


best_model_path = checkpoint_callback.best_model_path
best_model = load_resnet50_from_checkpoint(best_model_path, num_classes=10, learning_rate=0.01)
trainer.test(best_model, datamodule=data_module)

Files already downloaded and verified
Files already downloaded and verified


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 79/79 [00:03<00:00, 23.26it/s]


[{'test_loss': 0.8349453806877136,
  'test_acc': 0.8579999804496765,
  'test_precision': 0.8574167490005493,
  'test_recall': 0.8579999804496765,
  'test_f1': 0.8572114109992981}]