In [1]:
# import os
# os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

https://lernapparat.de/debug-device-assert/

In [1]:
import torch
import torch.nn as nn
from torchvision import transforms
from torch.utils.data import DataLoader
from torch import optim

In [2]:
import sys
sys.path.insert(1, "../../../")

In [3]:
from train import *
from data_preprocessing import *
from data_augmentation import *
import torch.optim.lr_scheduler as lr_scheduler
from Models.yolov8cls_path import Model

In [4]:
if torch.cuda.is_available():
    device=torch.device('cuda')
else:
    device=torch.device('cpu') 

In [5]:
model = Model(num_classes=10, 
              residual_connection=True, 
              CSP=True, 
              add_hidden=False,
              classifyV8=False,
              bottleneck=1.0, 
              variant='s', 
              device=device, 
              dtype=torch.float32)

In [6]:
data_path = '../../../../datasets/imagenette2/'
norms_path = os.path.join(data_path, 'norms.json')

In [7]:
means = get_means(path=norms_path, train_loader=None)
stds = get_stds(path=norms_path, train_loader=None)

Means are: [0.44969913363456726, 0.44868946075439453, 0.45163223147392273]
stds are: [0.28648287057876587, 0.28796446323394775, 0.2865694761276245]



Profiling your personal module 
https://pytorch.org/tutorials/beginner/profiler.html

https://discuss.pytorch.org/t/how-to-prevent-overfitting/1902
Right now, with my augmented dataset, at epoch 8, I am getting a testset Top1 accuracy of 45% but a trainset Top1 accuracy of 69%.

You should strongly consider data augmentation in some meaningful way. If you’re attempting to do classification then think about what augmentations might add useful information and help distinguish classes in your dataset. In one of my cases, introducing background variation increased recognition rate by over 50%. Basically, with small datasets there is too much overfitting so you want the network to learn real-world distinctions vs. irrelevant artifacts like backgrounds / shadows etc.

In [8]:
transformations = transforms.Compose([transforms.RandomResizedCrop((224, 224)),
                                              Augmentation(),
                                              transforms.ToTensor(),
                                              transforms.Normalize(mean=means, std=stds)])
transformations_val = transforms.Compose([transforms.Resize((224, 224)),
                                                 transforms.ToTensor(),
                                                 transforms.Normalize(mean=means, std=stds)
                                                 ])

In [9]:
train_dataset = ImageNetSubset(path=data_path, train=True, transform=transformations, half=False, show=False)
val_dataset = ImageNetSubset(path=data_path, train=False, transform=transformations_val, half=False, show=False)

In [10]:
epochs=10
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=4)

In [11]:
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.0001)

In [12]:
loss_fn = nn.NLLLoss()

In [13]:
scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

In [14]:
history, gradient_stats = train(epochs, train_loader, val_loader, model, optimizer, loss_fn, scheduler, outputs_path='../../log/YOLOv8cls-version-3/training/')

2024-12-08 20:08:46.531739 Epoch 1 
2024-12-08 20:09:00.325095 Batch 15 
2024-12-08 20:09:03.125328 Batch 30 
2024-12-08 20:09:05.604406 Batch 45 
2024-12-08 20:09:07.950189 Batch 60 
2024-12-08 20:09:10.235485 Batch 75 
2024-12-08 20:09:12.299925 Batch 90 
2024-12-08 20:09:14.725019 Batch 105 
2024-12-08 20:09:17.279288 Batch 120 
2024-12-08 20:09:19.343465 Batch 135 
[Train] Accuracy: 24.6911%, Loss per batch: 2.0698
2024-12-08 20:09:34.183245 Batch 15 
2024-12-08 20:09:35.827190 Batch 30 
2024-12-08 20:09:37.632745 Batch 45 
2024-12-08 20:09:39.234703 Batch 60 
[Val] Accuracy: 33.9363%, loss per batch: 1.8975
Epoch 1: SGD lr 0.0100 -> 0.0090
2024-12-08 20:09:40.282487 Epoch 2 
2024-12-08 20:09:53.168282 Batch 15 
2024-12-08 20:09:55.617394 Batch 30 
2024-12-08 20:09:58.118986 Batch 45 
2024-12-08 20:10:00.410654 Batch 60 
2024-12-08 20:10:02.904682 Batch 75 
2024-12-08 20:10:05.589794 Batch 90 
2024-12-08 20:10:08.007283 Batch 105 
2024-12-08 20:10:10.008614 Batch 120 
2024-12-08 20

In [15]:
history, gradient_stats = train(epochs, train_loader, val_loader, model, optimizer, 
                                loss_fn, scheduler, outputs_path='../../log/YOLOv8cls-version-3/training/', resume=True)

  state = torch.load(os.path.join(outputs_path, f"state.pt"))


2024-12-08 20:18:14.249106 Epoch 11 
2024-12-08 20:18:27.936833 Batch 15 
2024-12-08 20:18:30.695155 Batch 30 
2024-12-08 20:18:33.075800 Batch 45 
2024-12-08 20:18:35.650416 Batch 60 
2024-12-08 20:18:38.705324 Batch 75 
2024-12-08 20:18:41.341095 Batch 90 
2024-12-08 20:18:43.629238 Batch 105 
2024-12-08 20:18:46.432522 Batch 120 
2024-12-08 20:18:48.977328 Batch 135 
[Train] Accuracy: 63.2168%, Loss per batch: 1.0999
2024-12-08 20:19:05.011213 Batch 15 
2024-12-08 20:19:06.697591 Batch 30 
2024-12-08 20:19:08.983378 Batch 45 
2024-12-08 20:19:10.809538 Batch 60 
[Val] Accuracy: 67.7452%, loss per batch: 0.9925
Epoch 11: SGD lr 0.0035 -> 0.0031
2024-12-08 20:19:11.757651 Epoch 12 
2024-12-08 20:19:25.381827 Batch 15 
2024-12-08 20:19:28.226624 Batch 30 
2024-12-08 20:19:30.800758 Batch 45 
2024-12-08 20:19:33.106769 Batch 60 
2024-12-08 20:19:35.741965 Batch 75 
2024-12-08 20:19:38.547286 Batch 90 
2024-12-08 20:19:41.206037 Batch 105 
2024-12-08 20:19:43.661337 Batch 120 
2024-12-08