In [1]:
# import os
# os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

https://lernapparat.de/debug-device-assert/

In [1]:
import torch
import torch.nn as nn
from torchvision import transforms
from torch.utils.data import DataLoader
from torch import optim

In [2]:
import sys
sys.path.insert(1, "../../../")

In [3]:
from train import *
from data_preprocessing import *
from data_augmentation import *
import torch.optim.lr_scheduler as lr_scheduler
from Models.yolov8cls_path import Model

In [4]:
if torch.cuda.is_available():
    device=torch.device('cuda')
else:
    device=torch.device('cpu') 

In [5]:
model = Model(num_classes=10, 
              residual_connection=True, 
              CSP=True, 
              add_hidden=True,
              classifyV8=False,
              bottleneck=1.0, 
              variant='s', 
              device=device, 
              dtype=torch.float32)

In [6]:
data_path = '../../../../datasets/imagenette2/'
norms_path = os.path.join(data_path, 'norms.json')

In [7]:
means = get_means(path=norms_path, train_loader=None)
stds = get_stds(path=norms_path, train_loader=None)

Means are: [0.44969913363456726, 0.44868946075439453, 0.45163223147392273]
stds are: [0.28648287057876587, 0.28796446323394775, 0.2865694761276245]



Profiling your personal module 
https://pytorch.org/tutorials/beginner/profiler.html

https://discuss.pytorch.org/t/how-to-prevent-overfitting/1902
Right now, with my augmented dataset, at epoch 8, I am getting a testset Top1 accuracy of 45% but a trainset Top1 accuracy of 69%.

You should strongly consider data augmentation in some meaningful way. If you’re attempting to do classification then think about what augmentations might add useful information and help distinguish classes in your dataset. In one of my cases, introducing background variation increased recognition rate by over 50%. Basically, with small datasets there is too much overfitting so you want the network to learn real-world distinctions vs. irrelevant artifacts like backgrounds / shadows etc.

In [8]:
transformations = transforms.Compose([transforms.RandomResizedCrop((224, 224)),
                                              Augmentation(),
                                              transforms.ToTensor(),
                                              transforms.Normalize(mean=means, std=stds)])
transformations_val = transforms.Compose([transforms.Resize((224, 224)),
                                                 transforms.ToTensor(),
                                                 transforms.Normalize(mean=means, std=stds)
                                                 ])

In [9]:
train_dataset = ImageNetSubset(path=data_path, train=True, transform=transformations, half=False, show=False)
val_dataset = ImageNetSubset(path=data_path, train=False, transform=transformations_val, half=False, show=False)

In [10]:
epochs=10
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=4)

In [11]:
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.0001)

In [12]:
loss_fn = nn.NLLLoss()

In [13]:
scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

In [14]:
history, gradient_stats = train(epochs, train_loader, val_loader, model, optimizer, loss_fn, scheduler, outputs_path='../../log/YOLOv8cls-version-4/training/')

2024-12-08 20:35:38.517374 Epoch 1 
2024-12-08 20:35:52.222891 Batch 15 
2024-12-08 20:35:54.778926 Batch 30 
2024-12-08 20:35:57.260733 Batch 45 
2024-12-08 20:35:59.702554 Batch 60 
2024-12-08 20:36:02.184153 Batch 75 
2024-12-08 20:36:04.588491 Batch 90 
2024-12-08 20:36:06.576683 Batch 105 
2024-12-08 20:36:08.834560 Batch 120 
2024-12-08 20:36:11.474636 Batch 135 
[Train] Accuracy: 26.0112%, Loss per batch: 2.0539
2024-12-08 20:36:25.885564 Batch 15 
2024-12-08 20:36:27.614711 Batch 30 
2024-12-08 20:36:29.525386 Batch 45 
2024-12-08 20:36:31.091705 Batch 60 
[Val] Accuracy: 35.8981%, loss per batch: 1.8608
Epoch 1: SGD lr 0.0100 -> 0.0090
2024-12-08 20:36:32.215248 Epoch 2 
2024-12-08 20:36:44.867413 Batch 15 
2024-12-08 20:36:47.236158 Batch 30 
2024-12-08 20:36:49.551075 Batch 45 
2024-12-08 20:36:51.778730 Batch 60 
2024-12-08 20:36:54.335864 Batch 75 
2024-12-08 20:36:56.372106 Batch 90 
2024-12-08 20:36:58.869974 Batch 105 
2024-12-08 20:37:01.371510 Batch 120 
2024-12-08 20

In [15]:
history, gradient_stats = train(epochs, train_loader, val_loader, model, optimizer, 
                                loss_fn, scheduler, outputs_path='../../log/YOLOv8cls-version-4/training/', resume=True)

  state = torch.load(os.path.join(outputs_path, f"state.pt"))


2024-12-08 20:45:06.088592 Epoch 11 
2024-12-08 20:45:21.565594 Batch 15 
2024-12-08 20:45:24.235178 Batch 30 
2024-12-08 20:45:26.630657 Batch 45 
2024-12-08 20:45:29.131698 Batch 60 
2024-12-08 20:45:32.084178 Batch 75 
2024-12-08 20:45:34.868948 Batch 90 
2024-12-08 20:45:37.636044 Batch 105 
2024-12-08 20:45:40.018844 Batch 120 
2024-12-08 20:45:42.519738 Batch 135 
[Train] Accuracy: 63.3119%, Loss per batch: 1.0988
2024-12-08 20:45:58.098159 Batch 15 
2024-12-08 20:45:59.808384 Batch 30 
2024-12-08 20:46:02.171153 Batch 45 
2024-12-08 20:46:03.867263 Batch 60 
[Val] Accuracy: 68.7643%, loss per batch: 0.9659
Epoch 11: SGD lr 0.0035 -> 0.0031
2024-12-08 20:46:04.822173 Epoch 12 
2024-12-08 20:46:18.771436 Batch 15 
2024-12-08 20:46:21.634684 Batch 30 
2024-12-08 20:46:24.489995 Batch 45 
2024-12-08 20:46:27.291009 Batch 60 
2024-12-08 20:46:30.199909 Batch 75 
2024-12-08 20:46:32.879122 Batch 90 
2024-12-08 20:46:35.404403 Batch 105 
2024-12-08 20:46:38.376872 Batch 120 
2024-12-08