In [1]:
import torch
import torch.nn as nn
from torchvision import transforms
from torch.utils.data import DataLoader
from torch import optim

In [2]:
import sys
sys.path.insert(1, "../")

In [3]:
from train import *
from data_preprocessing import *
from Models.yolov8cls_path import *

In [4]:
if torch.cuda.is_available():
    device=torch.device('cuda')
else:
    device=torch.device('cpu') 

In [5]:
model = Model(num_classes=10)

In [6]:
data_path = '../../dummy_datasets/'
norms_path = os.path.join(data_path, 'norms.json')

In [7]:
means = get_means(path=norms_path, train_loader=None)
stds = get_stds(path=norms_path, train_loader=None)

Means are: [0.4405549168586731, 0.4407285749912262, 0.4381718039512634]
stds are: [0.25142669677734375, 0.25270089507102966, 0.25131651759147644]


In [8]:
transformations = transforms.Compose([transforms.ToTensor(),
                                      transforms.Resize((224, 224)),
                                      transforms.Normalize(mean=means, std=stds)])

In [9]:
train_dataset = ImageNetSubset(path=data_path, train=True, transform=transformations, half=False, show=False)
val_dataset = ImageNetSubset(path=data_path, train=False, transform=transformations, half=False, show=False)

In [10]:
epochs=3
train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=256, shuffle=False, num_workers=4)

In [11]:
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=0.0001)

In [12]:
loss_fn = nn.NLLLoss()

In [13]:
prof = torch.profiler.profile(
        # schedule=torch.profiler.schedule(wait=0, warmup=0, active=1, repeat=1),
        on_trace_ready=torch.profiler.tensorboard_trace_handler('./log/darknet19'),
        record_shapes=True,
        profile_memory=True,
        with_flops=True,
        with_modules=True,
        with_stack=True)


Profiling your personal module 
https://pytorch.org/tutorials/beginner/profiler.html

In [14]:
history, gradient_stats, prof = train(epochs, train_loader, val_loader, model, optimizer, loss_fn, prof)

2024-11-16 18:25:15.257475 Epoch 1: 
2024-11-16 18:25:21.629539 Batch 1: 
torch.Size([30, 10, 1, 1])
torch.Size([30, 10])
[Train] Accuracy: 13.3333, Loss per batch: 2.2849
2024-11-16 18:25:33.310796 Batch 1: 
torch.Size([256, 10, 1, 1])
torch.Size([256, 10])
2024-11-16 18:25:44.577420 Batch 2: 
torch.Size([244, 10, 1, 1])
torch.Size([244, 10])
[Val] Accuracy: 10.0%, loss per batch: 2.3026
2024-11-16 18:25:55.961150 Epoch 2: 
2024-11-16 18:26:04.550771 Batch 1: 
torch.Size([30, 10, 1, 1])
torch.Size([30, 10])
[Train] Accuracy: 23.3333, Loss per batch: 2.2079
2024-11-16 18:26:16.975491 Batch 1: 
torch.Size([256, 10, 1, 1])
torch.Size([256, 10])
2024-11-16 18:26:26.980195 Batch 2: 
torch.Size([244, 10, 1, 1])
torch.Size([244, 10])
[Val] Accuracy: 10.0%, loss per batch: 2.3027
2024-11-16 18:26:39.092225 Epoch 3: 
2024-11-16 18:26:45.962882 Batch 1: 
torch.Size([30, 10, 1, 1])
torch.Size([30, 10])
[Train] Accuracy: 40.0, Loss per batch: 2.0945
2024-11-16 18:26:59.123262 Batch 1: 
torch.Size

## Inference time

In [13]:
from ultralytics import YOLO

In [31]:
model = YOLO('../Models/yolov8s-cls.pt')

In [32]:
# model = model.model

In [33]:
img = torch.rand(1,3,640,640)
_ = transforms.Compose([transforms.Normalize(mean=[0.5, 0.5, 0.5],
                        std=[0.5, 0.5, 0.5])])
img = _(img)

In [37]:
with torch.autograd.profiler.profile(use_device='cpu',
                                    record_shapes=True,
                                    # profile_memory=True,
                                    with_flops=True,
                                    with_modules=True,
                                    with_stack=True) as prof:
   model.predict(img)
print(prof.key_averages().table(row_limit=-1))


0: 640x640 velvet 0.96, wool 0.01, window_screen 0.00, chain_mail 0.00, honeycomb 0.00, 97.3ms
Speed: 0.0ms preprocess, 97.3ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)
-----------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                         Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  Total MFLOPs  
-----------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                  aten::empty         0.28%     300.800us         0.28%     300.800us       5.277us            57            --  
                     aten::to         0.03%      32.300us         2.71%       2.874ms     261.264us            11            --  
             aten::lift_fresh         0.00%       2.000us         0.00%       2.000us       1.000us             2            --  
                aten::detac

In [36]:
6.451+41631.744+2.560 +1.229

41641.984

YOLOv8n</br>
Mine: 3.290265 GFLOPs ; On the website: 4.3 GFLOPs</br>
YOLOv8s</br>
Mine: 12.449996 GFLOPs ; On the website: 13.5 GFLOPs</br>
YOLOv8m</br>
Mine: 41.640755 GFLOPs ; On the website: 42.7 GFLOPs

In [38]:
from Models.yolov8cls_path import *

In [41]:
model = Model(num_classes=1000, 
              residual_connection=True, 
              CSP=True, 
              add_hidden=True,
              classifyV8=True,
              bottleneck=1.0, 
              variant='s', 
              device=device, 
              dtype=torch.float32)

In [42]:
img = torch.rand(1,3,640,640)
_ = transforms.Compose([transforms.Normalize(mean=[0.5, 0.5, 0.5],
                        std=[0.5, 0.5, 0.5])])
img = _(img)

In [44]:
with torch.autograd.profiler.profile(use_device='cpu',
                                    record_shapes=True,
                                    # profile_memory=True,
                                    with_flops=True,
                                    with_modules=True,
                                    with_stack=True) as prof:
   model(img)
print(prof.key_averages().table(row_limit=-1))

--------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                            Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  Total MFLOPs  
--------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                      Conv block         8.01%      15.271ms        93.67%     178.537ms       6.867ms            26            --  
                    aten::conv2d         0.27%     511.200us        57.42%     109.447ms       4.209ms            26     12445.286  
               aten::convolution         0.46%     879.600us        57.15%     108.935ms       4.190ms            26            --  
              aten::_convolution         0.26%     489.100us        56.69%     108.056ms       4.156ms            26            --  
        aten::mkldnn_convolution        56.11%     106.956ms        5

In [46]:
from torch.profiler import profile, ProfilerActivity

In [47]:
with profile(activities=[ProfilerActivity.CPU],
                                    schedule=torch.profiler.schedule(wait=1, warmup=2, active=10, repeat=1),
                                    on_trace_ready=torch.profiler.tensorboard_trace_handler('../log/darknet19/inference'),
                                    record_shapes=True,
                                    profile_memory=True,
                                    with_flops=True,
                                    with_modules=True,
                                    with_stack=True) as prof:
    for i in range(13):
        out = model(img)
        prof.step()
print(prof.key_averages().table(row_limit=-1))

--------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                            Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  Total MFLOPs  
--------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                   ProfilerStep*         1.04%      10.654ms       100.00%        1.025s     102.514ms     210.21 Mb      -1.90 Gb            10            --  
                      Conv block         7.14%      73.200ms        90.87%     931.522ms       3.583ms       1.93 Gb           0 b           260            --  
                    aten::conv2d         0.21%       2.163ms        58.33%     597.986ms       2.300ms     660.16 Mb           0 b           260    124452.864  
               aten::convolution  

In [64]:
img = train_dataset[0][0].unsqueeze(0)

In [65]:
img.shape

torch.Size([1, 3, 224, 224])

In [61]:
img = torch.rand(1,3,640,640)
_ = transforms.Compose([transforms.Normalize(mean=[0.5, 0.5, 0.5],
                        std=[0.5, 0.5, 0.5])])
img = _(img)

In [62]:
img.shape

torch.Size([1, 3, 640, 640])

In [66]:
with profile(activities=[ProfilerActivity.CPU],
                                    schedule=torch.profiler.schedule(wait=1, warmup=1, active=10, repeat=1),
                                    on_trace_ready=torch.profiler.tensorboard_trace_handler('../../log/YOLOv8cls-version-5/inference'),
                                    record_shapes=True,
                                    profile_memory=True,
                                    with_flops=True,
                                    with_modules=True,
                                    with_stack=True) as prof:
    for i in range(12):
        out = model(img)
        prof.step()
print(prof.key_averages().table(row_limit=-1))

--------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                            Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  Total MFLOPs  
--------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                   ProfilerStep*         2.94%       8.735ms       100.00%     297.577ms      29.758ms       9.50 Mb    -255.63 Mb            10            --  
                      Conv block        14.37%      42.772ms        85.05%     253.078ms     973.378us     243.03 Mb           0 b           260            --  
                    aten::conv2d         0.68%       2.027ms        50.46%     150.159ms     577.536us      80.87 Mb           0 b           260     15245.476  
               aten::convolution  

# FLOPS ARE DIFFERENT DEPENDING ON THE IMG SIZE

such a dumb mistake. I forgot to change image size for YOLOv8!