In [1]:
# import os
# os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

https://lernapparat.de/debug-device-assert/

In [2]:
import torch
import torch.nn as nn
from torchvision import transforms
from torch.utils.data import DataLoader
from torch import optim

In [3]:
import sys
sys.path.insert(1, "../../../")

In [4]:
from train import *
from data_preprocessing import *
from data_augmentation import *
import torch.optim.lr_scheduler as lr_scheduler
from Models.yolov8cls_path import Model

In [5]:
if torch.cuda.is_available():
    device=torch.device('cuda')
else:
    device=torch.device('cpu') 

In [6]:
model = Model(num_classes=10, 
              residual_connection=False, 
              CSP=False, 
              add_hidden=False,
              classifyV8=False,
              bottleneck=1.0, 
              variant='s', 
              device=device, 
              dtype=torch.float32)

In [7]:
data_path = '../../../../datasets/imagenette2/'
norms_path = os.path.join(data_path, 'norms.json')

In [8]:
means = get_means(path=norms_path, train_loader=None)
stds = get_stds(path=norms_path, train_loader=None)

Means are: [0.44969913363456726, 0.44868946075439453, 0.45163223147392273]
stds are: [0.28648287057876587, 0.28796446323394775, 0.2865694761276245]



Profiling your personal module 
https://pytorch.org/tutorials/beginner/profiler.html

https://discuss.pytorch.org/t/how-to-prevent-overfitting/1902
Right now, with my augmented dataset, at epoch 8, I am getting a testset Top1 accuracy of 45% but a trainset Top1 accuracy of 69%.

You should strongly consider data augmentation in some meaningful way. If you’re attempting to do classification then think about what augmentations might add useful information and help distinguish classes in your dataset. In one of my cases, introducing background variation increased recognition rate by over 50%. Basically, with small datasets there is too much overfitting so you want the network to learn real-world distinctions vs. irrelevant artifacts like backgrounds / shadows etc.

In [9]:
transformations = transforms.Compose([transforms.RandomResizedCrop((224, 224)),
                                              Augmentation(),
                                              transforms.ToTensor(),
                                              transforms.Normalize(mean=means, std=stds)])
transformations_val = transforms.Compose([transforms.Resize((224, 224)),
                                                 transforms.ToTensor(),
                                                 transforms.Normalize(mean=means, std=stds)
                                                 ])

In [10]:
train_dataset = ImageNetSubset(path=data_path, train=True, transform=transformations, half=False, show=False)
val_dataset = ImageNetSubset(path=data_path, train=False, transform=transformations_val, half=False, show=False)

In [11]:
epochs=10
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=4)

In [12]:
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.0001)

In [13]:
loss_fn = nn.NLLLoss()

In [14]:
scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

In [15]:
history, gradient_stats = train(epochs, train_loader, val_loader, model, optimizer, loss_fn, scheduler, outputs_path='../../log/YOLOv8cls-version-1/training/')

2024-12-08 18:38:13.984091 Epoch 1 
2024-12-08 18:38:31.409380 Batch 15 
2024-12-08 18:38:33.779508 Batch 30 
2024-12-08 18:38:36.263418 Batch 45 
2024-12-08 18:38:38.974709 Batch 60 
2024-12-08 18:38:41.445733 Batch 75 
2024-12-08 18:38:44.086240 Batch 90 
2024-12-08 18:38:47.385274 Batch 105 
2024-12-08 18:38:50.267189 Batch 120 
2024-12-08 18:38:53.175568 Batch 135 
[Train] Accuracy: 16.7494%, Loss per batch: 2.2321
2024-12-08 18:39:10.443350 Batch 15 
2024-12-08 18:39:12.373760 Batch 30 
2024-12-08 18:39:14.468293 Batch 45 
2024-12-08 18:39:16.240795 Batch 60 
[Val] Accuracy: 21.0955%, loss per batch: 2.204
Epoch 1: SGD lr 0.0100 -> 0.0090
2024-12-08 18:39:17.451157 Epoch 2 
2024-12-08 18:39:31.262484 Batch 15 
2024-12-08 18:39:34.131033 Batch 30 
2024-12-08 18:39:36.758500 Batch 45 
2024-12-08 18:39:39.515135 Batch 60 
2024-12-08 18:39:42.466658 Batch 75 
2024-12-08 18:39:45.147063 Batch 90 
2024-12-08 18:39:48.078739 Batch 105 
2024-12-08 18:39:51.141962 Batch 120 
2024-12-08 18:

In [16]:
history, gradient_stats = train(epochs, train_loader, val_loader, model, optimizer, 
                                loss_fn, scheduler, outputs_path='../../log/YOLOv8cls-version-1/training/', resume=True)

  state = torch.load(os.path.join(outputs_path, f"state.pt"))


2024-12-08 18:48:39.041176 Epoch 11 
2024-12-08 18:48:53.024690 Batch 15 
2024-12-08 18:48:55.700808 Batch 30 
2024-12-08 18:48:58.419483 Batch 45 
2024-12-08 18:49:01.140800 Batch 60 
2024-12-08 18:49:03.872071 Batch 75 
2024-12-08 18:49:06.635061 Batch 90 
2024-12-08 18:49:09.408585 Batch 105 
2024-12-08 18:49:12.193805 Batch 120 
2024-12-08 18:49:15.002414 Batch 135 
[Train] Accuracy: 49.002%, Loss per batch: 1.486
2024-12-08 18:49:31.133624 Batch 15 
2024-12-08 18:49:32.796981 Batch 30 
2024-12-08 18:49:34.786163 Batch 45 
2024-12-08 18:49:36.584807 Batch 60 
[Val] Accuracy: 51.2866%, loss per batch: 1.443
Epoch 11: SGD lr 0.0035 -> 0.0031
2024-12-08 18:49:37.567437 Epoch 12 
2024-12-08 18:49:51.592443 Batch 15 
2024-12-08 18:49:54.313291 Batch 30 
2024-12-08 18:49:57.065709 Batch 45 
2024-12-08 18:49:59.800605 Batch 60 
2024-12-08 18:50:02.506326 Batch 75 
2024-12-08 18:50:05.339605 Batch 90 
2024-12-08 18:50:08.081546 Batch 105 
2024-12-08 18:50:10.855335 Batch 120 
2024-12-08 18