In [18]:
import torch
import torch.nn as nn
from torchvision import transforms
from torch.utils.data import DataLoader
from torch import optim

In [19]:
import sys
sys.path.insert(1, "../../../")

In [20]:
from train import *
from data_preprocessing import *
from Models.yolov8cls_path import Model

In [21]:
if torch.cuda.is_available():
    device=torch.device('cuda')
else:
    device=torch.device('cpu') 

In [22]:
model = Model(num_classes=10, 
              residual_connection=True, 
              CSP=True, 
              add_hidden=True,
              classifyV8=True,
              bottleneck=1.0, 
              variant='s', 
              device=device, 
              dtype=torch.float32)

In [23]:
data_path = '../../../../dummy_datasets/'
norms_path = os.path.join(data_path, 'norms.json')

In [24]:
means = get_means(path=norms_path, train_loader=None)
stds = get_stds(path=norms_path, train_loader=None)

Means are: [0.4405549168586731, 0.4407285749912262, 0.4381718039512634]
stds are: [0.25142669677734375, 0.25270089507102966, 0.25131651759147644]


In [25]:
transformations = transforms.Compose([transforms.ToTensor(),
                                      transforms.Resize((224, 224)),
                                      transforms.Normalize(mean=means, std=stds)])

In [26]:
train_dataset = ImageNetSubset(path=data_path, train=True, transform=transformations, half=False, show=False)
val_dataset = ImageNetSubset(path=data_path, train=False, transform=transformations, half=False, show=False)

In [11]:
epochs=10
train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=256, shuffle=False, num_workers=4)

In [12]:
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.0001)

In [13]:
loss_fn = nn.NLLLoss()

In [14]:
prof = torch.profiler.profile(
        schedule=torch.profiler.schedule(wait=1, warmup=1, active=epochs-2, repeat=1),
        on_trace_ready=torch.profiler.tensorboard_trace_handler('../../log/YOLOv8cls-version-5/training'),
        record_shapes=True,
        profile_memory=True,
        with_flops=True,
        with_modules=True,
        with_stack=True)


Profiling your personal module 
https://pytorch.org/tutorials/beginner/profiler.html

In [15]:
history, gradient_stats, prof = train(epochs, train_loader, val_loader, model, optimizer, loss_fn, prof, model_path='../../log/YOLOv8cls-version-5/training/models/')

2024-11-20 21:08:12.937560 Epoch 1 
2024-11-20 21:08:19.144918 Batch 1 
[Train] Accuracy: 6.6667%, Loss per batch: 2.3275
2024-11-20 21:08:28.191479 Batch 1 
[Val] Accuracy: 10.0%, loss per batch: 2.3028
2024-11-20 21:08:29.817436 Epoch 2 
2024-11-20 21:08:35.971528 Batch 1 
[Train] Accuracy: 23.3333%, Loss per batch: 2.1452
2024-11-20 21:08:44.549557 Batch 1 
[Val] Accuracy: 10.0%, loss per batch: 2.3028
2024-11-20 21:08:46.142679 Epoch 3 
2024-11-20 21:08:53.517662 Batch 1 
[Train] Accuracy: 30.0%, Loss per batch: 2.0105
2024-11-20 21:09:01.908913 Batch 1 
[Val] Accuracy: 10.0%, loss per batch: 2.3028
2024-11-20 21:09:03.520542 Epoch 4 
2024-11-20 21:09:09.981119 Batch 1 
[Train] Accuracy: 36.6667%, Loss per batch: 1.9328
2024-11-20 21:09:18.563198 Batch 1 
[Val] Accuracy: 10.0%, loss per batch: 2.303
2024-11-20 21:09:20.199726 Epoch 5 
2024-11-20 21:09:26.589242 Batch 1 
[Train] Accuracy: 46.6667%, Loss per batch: 1.7953
2024-11-20 21:09:34.883322 Batch 1 
[Val] Accuracy: 10.0%, los

In [16]:
print(prof.key_averages().table(row_limit=-1))

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  Total MFLOPs  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                          ProfilerStep*         0.55%     766.350ms       100.00%      138.347s       17.293s      19.45 Mb    -466.33 Mb             8            --  
                                          Training Loop         0.03%      38.355ms        51.31%       70.990s        8.874s      19.43 Mb    -137.83 Mb             8            --  
                     Validation Loop Before Data Loader         4.35%        6.0

In [17]:
save_train_outputs('../../log/YOLOv8cls-version-5/training', history, gradient_stats)