In [1]:
# import os
# os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

https://lernapparat.de/debug-device-assert/

In [1]:
import torch
import torch.nn as nn
from torchvision import transforms
from torch.utils.data import DataLoader
from torch import optim

In [2]:
import sys
sys.path.insert(1, "../../../")

In [3]:
from train import *
from data_preprocessing import *
from data_augmentation import *
import torch.optim.lr_scheduler as lr_scheduler
from Models.yolov8cls_path import Model

In [4]:
if torch.cuda.is_available():
    device=torch.device('cuda')
else:
    device=torch.device('cpu') 

In [5]:
model = Model(num_classes=10, 
              residual_connection=True, 
              CSP=True, 
              add_hidden=True,
              classifyV8=True,
              bottleneck=1.0, 
              variant='s', 
              device=device, 
              dtype=torch.float32)

In [6]:
data_path = '../../../../datasets/imagenette2/'
norms_path = os.path.join(data_path, 'norms.json')

In [7]:
means = get_means(path=norms_path, train_loader=None)
stds = get_stds(path=norms_path, train_loader=None)

Means are: [0.44969913363456726, 0.44868946075439453, 0.45163223147392273]
stds are: [0.28648287057876587, 0.28796446323394775, 0.2865694761276245]



Profiling your personal module 
https://pytorch.org/tutorials/beginner/profiler.html

https://discuss.pytorch.org/t/how-to-prevent-overfitting/1902
Right now, with my augmented dataset, at epoch 8, I am getting a testset Top1 accuracy of 45% but a trainset Top1 accuracy of 69%.

You should strongly consider data augmentation in some meaningful way. If you’re attempting to do classification then think about what augmentations might add useful information and help distinguish classes in your dataset. In one of my cases, introducing background variation increased recognition rate by over 50%. Basically, with small datasets there is too much overfitting so you want the network to learn real-world distinctions vs. irrelevant artifacts like backgrounds / shadows etc.

In [8]:
transformations = transforms.Compose([transforms.RandomResizedCrop((224, 224)),
                                              Augmentation(),
                                              transforms.ToTensor(),
                                              transforms.Normalize(mean=means, std=stds)])
transformations_val = transforms.Compose([transforms.Resize((224, 224)),
                                                 transforms.ToTensor(),
                                                 transforms.Normalize(mean=means, std=stds)
                                                 ])

In [9]:
train_dataset = ImageNetSubset(path=data_path, train=True, transform=transformations, half=False, show=False)
val_dataset = ImageNetSubset(path=data_path, train=False, transform=transformations_val, half=False, show=False)

In [10]:
epochs=10
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=4)

In [11]:
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.0001)

In [12]:
loss_fn = nn.NLLLoss()

In [13]:
scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

In [14]:
history, gradient_stats = train(epochs, train_loader, val_loader, model, optimizer, loss_fn, scheduler, outputs_path='../../log/YOLOv8cls-version-5/training/')

2024-12-08 18:15:17.357163 Epoch 1 
2024-12-08 18:15:33.186914 Batch 15 
2024-12-08 18:15:35.842873 Batch 30 
2024-12-08 18:15:38.385833 Batch 45 
2024-12-08 18:15:41.117270 Batch 60 
2024-12-08 18:15:43.891518 Batch 75 
2024-12-08 18:15:46.636789 Batch 90 
2024-12-08 18:15:49.355019 Batch 105 
2024-12-08 18:15:51.895369 Batch 120 
2024-12-08 18:15:54.716225 Batch 135 
[Train] Accuracy: 26.5287%, Loss per batch: 2.0423
2024-12-08 18:16:10.717812 Batch 15 
2024-12-08 18:16:12.317421 Batch 30 
2024-12-08 18:16:14.248104 Batch 45 
2024-12-08 18:16:15.859858 Batch 60 
[Val] Accuracy: 36.1274%, loss per batch: 1.8331
Epoch 1: SGD lr 0.0100 -> 0.0090
2024-12-08 18:16:16.990376 Epoch 2 
2024-12-08 18:16:30.497077 Batch 15 
2024-12-08 18:16:33.163858 Batch 30 
2024-12-08 18:16:35.835411 Batch 45 
2024-12-08 18:16:38.251176 Batch 60 
2024-12-08 18:16:41.044145 Batch 75 
2024-12-08 18:16:43.931763 Batch 90 
2024-12-08 18:16:46.502935 Batch 105 
2024-12-08 18:16:49.067005 Batch 120 
2024-12-08 18

In [15]:
history, gradient_stats = train(epochs, train_loader, val_loader, model, optimizer, 
                                loss_fn, scheduler, outputs_path='../../log/YOLOv8cls-version-5/training/', resume=True)

  state = torch.load(os.path.join(outputs_path, f"state.pt"))


2024-12-08 18:25:06.175147 Epoch 11 
2024-12-08 18:25:20.231139 Batch 15 
2024-12-08 18:25:23.228820 Batch 30 
2024-12-08 18:25:26.090275 Batch 45 
2024-12-08 18:25:28.843335 Batch 60 
2024-12-08 18:25:31.737618 Batch 75 
2024-12-08 18:25:34.630933 Batch 90 
2024-12-08 18:25:37.573393 Batch 105 
2024-12-08 18:25:40.197501 Batch 120 
2024-12-08 18:25:43.024126 Batch 135 
[Train] Accuracy: 64.4102%, Loss per batch: 1.0779
2024-12-08 18:25:59.763312 Batch 15 
2024-12-08 18:26:01.588000 Batch 30 
2024-12-08 18:26:03.866676 Batch 45 
2024-12-08 18:26:05.626523 Batch 60 
[Val] Accuracy: 68.3057%, loss per batch: 0.9768
Epoch 11: SGD lr 0.0035 -> 0.0031
2024-12-08 18:26:06.611558 Epoch 12 
2024-12-08 18:26:20.661377 Batch 15 
2024-12-08 18:26:23.528063 Batch 30 
2024-12-08 18:26:26.280272 Batch 45 
2024-12-08 18:26:28.848591 Batch 60 
2024-12-08 18:26:31.917600 Batch 75 
2024-12-08 18:26:35.015758 Batch 90 
2024-12-08 18:26:37.886838 Batch 105 
2024-12-08 18:26:40.235771 Batch 120 
2024-12-08