In [1]:
# import os
# os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

https://lernapparat.de/debug-device-assert/

In [1]:
import torch
import torch.nn as nn
from torchvision import transforms
from torch.utils.data import DataLoader
from torch import optim

In [2]:
import sys
sys.path.insert(1, "../../../")

In [3]:
from train import *
from data_preprocessing import *
from data_augmentation import *
import torch.optim.lr_scheduler as lr_scheduler
from Models.yolov8cls_path import Model

In [4]:
if torch.cuda.is_available():
    device=torch.device('cuda')
else:
    device=torch.device('cpu') 

In [5]:
model = Model(num_classes=10, 
              residual_connection=True, 
              CSP=False, 
              add_hidden=False,
              classifyV8=False,
              bottleneck=1.0, 
              variant='s', 
              device=device, 
              dtype=torch.float32)

In [6]:
data_path = '../../../../datasets/imagenette2/'
norms_path = os.path.join(data_path, 'norms.json')

In [7]:
means = get_means(path=norms_path, train_loader=None)
stds = get_stds(path=norms_path, train_loader=None)

Means are: [0.44969913363456726, 0.44868946075439453, 0.45163223147392273]
stds are: [0.28648287057876587, 0.28796446323394775, 0.2865694761276245]



Profiling your personal module 
https://pytorch.org/tutorials/beginner/profiler.html

https://discuss.pytorch.org/t/how-to-prevent-overfitting/1902
Right now, with my augmented dataset, at epoch 8, I am getting a testset Top1 accuracy of 45% but a trainset Top1 accuracy of 69%.

You should strongly consider data augmentation in some meaningful way. If you’re attempting to do classification then think about what augmentations might add useful information and help distinguish classes in your dataset. In one of my cases, introducing background variation increased recognition rate by over 50%. Basically, with small datasets there is too much overfitting so you want the network to learn real-world distinctions vs. irrelevant artifacts like backgrounds / shadows etc.

In [8]:
transformations = transforms.Compose([transforms.RandomResizedCrop((224, 224)),
                                              Augmentation(),
                                              transforms.ToTensor(),
                                              transforms.Normalize(mean=means, std=stds)])
transformations_val = transforms.Compose([transforms.Resize((224, 224)),
                                                 transforms.ToTensor(),
                                                 transforms.Normalize(mean=means, std=stds)
                                                 ])

In [9]:
train_dataset = ImageNetSubset(path=data_path, train=True, transform=transformations, half=False, show=False)
val_dataset = ImageNetSubset(path=data_path, train=False, transform=transformations_val, half=False, show=False)

In [10]:
epochs=10
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=4)

In [11]:
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.0001)

In [12]:
loss_fn = nn.NLLLoss()

In [13]:
scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

In [14]:
history, gradient_stats = train(epochs, train_loader, val_loader, model, optimizer, loss_fn, scheduler, outputs_path='../../log/YOLOv8cls-version-2/training/')

2024-12-08 19:47:44.882752 Epoch 1 
2024-12-08 19:48:00.555886 Batch 15 
2024-12-08 19:48:02.792795 Batch 30 
2024-12-08 19:48:05.385732 Batch 45 
2024-12-08 19:48:08.097029 Batch 60 
2024-12-08 19:48:10.427223 Batch 75 
2024-12-08 19:48:13.241094 Batch 90 
2024-12-08 19:48:15.943053 Batch 105 
2024-12-08 19:48:19.039205 Batch 120 
2024-12-08 19:48:22.610514 Batch 135 
[Train] Accuracy: 26.4231%, Loss per batch: 2.0437
2024-12-08 19:48:39.441962 Batch 15 
2024-12-08 19:48:41.140857 Batch 30 
2024-12-08 19:48:43.184305 Batch 45 
2024-12-08 19:48:45.252919 Batch 60 
[Val] Accuracy: 34.6752%, loss per batch: 1.857
Epoch 1: SGD lr 0.0100 -> 0.0090
2024-12-08 19:48:46.435122 Epoch 2 
2024-12-08 19:48:59.163487 Batch 15 
2024-12-08 19:49:02.054908 Batch 30 
2024-12-08 19:49:04.303561 Batch 45 
2024-12-08 19:49:06.995991 Batch 60 
2024-12-08 19:49:09.554807 Batch 75 
2024-12-08 19:49:11.929398 Batch 90 
2024-12-08 19:49:14.486892 Batch 105 
2024-12-08 19:49:17.062934 Batch 120 
2024-12-08 19:

In [15]:
history, gradient_stats = train(epochs, train_loader, val_loader, model, optimizer, 
                                loss_fn, scheduler, outputs_path='../../log/YOLOv8cls-version-2/training/', resume=True)

  state = torch.load(os.path.join(outputs_path, f"state.pt"))


2024-12-08 19:57:03.004068 Epoch 11 
2024-12-08 19:57:17.117896 Batch 15 
2024-12-08 19:57:19.704664 Batch 30 
2024-12-08 19:57:22.288480 Batch 45 
2024-12-08 19:57:24.663826 Batch 60 
2024-12-08 19:57:27.552630 Batch 75 
2024-12-08 19:57:30.021543 Batch 90 
2024-12-08 19:57:32.771355 Batch 105 
2024-12-08 19:57:35.542646 Batch 120 
2024-12-08 19:57:38.278372 Batch 135 
[Train] Accuracy: 62.7099%, Loss per batch: 1.1255
2024-12-08 19:57:54.128577 Batch 15 
2024-12-08 19:57:55.726401 Batch 30 
2024-12-08 19:57:57.780009 Batch 45 
2024-12-08 19:57:59.447201 Batch 60 
[Val] Accuracy: 65.4777%, loss per batch: 1.0684
Epoch 11: SGD lr 0.0035 -> 0.0031
2024-12-08 19:58:00.385356 Epoch 12 
2024-12-08 19:58:14.275912 Batch 15 
2024-12-08 19:58:17.034217 Batch 30 
2024-12-08 19:58:19.815782 Batch 45 
2024-12-08 19:58:22.610539 Batch 60 
2024-12-08 19:58:25.407594 Batch 75 
2024-12-08 19:58:28.218248 Batch 90 
2024-12-08 19:58:31.035451 Batch 105 
2024-12-08 19:58:33.824120 Batch 120 
2024-12-08