In [1]:
import os
import torch
import torch.nn as nn
from torchvision import transforms

In [2]:
import sys
sys.path.insert(1, "../../../")

In [3]:
from data_preprocessing import get_means, get_stds, ImageNetSubset
from Models.yolov8cls_path import Model

In [4]:
if torch.cuda.is_available():
    device=torch.device('cuda')
else:
    device=torch.device('cpu') 

In [5]:
data_path = '../../../../dummy_datasets/'
norms_path = os.path.join(data_path, 'norms.json')

In [6]:
means = get_means(path=norms_path, train_loader=None)
stds = get_stds(path=norms_path, train_loader=None)

Means are: [0.4405549168586731, 0.4407285749912262, 0.4381718039512634]
stds are: [0.25142669677734375, 0.25270089507102966, 0.25131651759147644]


In [7]:
transformations = transforms.Compose([transforms.ToTensor(),
                                      transforms.Resize((224, 224)),
                                      transforms.Normalize(mean=means, std=stds)])

In [8]:
train_dataset = ImageNetSubset(path=data_path, train=True, transform=transformations, half=False, show=False)

# Inference

In [62]:
img = train_dataset[0][0].unsqueeze(0)

In [63]:
from torch.profiler import profile, ProfilerActivity

In [67]:
model = Model(num_classes=1000, 
              residual_connection=True, 
              CSP=True, 
              add_hidden=True,
              classifyV8=True,
              bottleneck=1.0, 
              variant='s', 
              device=device, 
              dtype=torch.float32)

In [69]:
with profile(activities=[ProfilerActivity.CPU],
                                    schedule=torch.profiler.schedule(wait=1, warmup=1, active=10, repeat=1),
                                    on_trace_ready=torch.profiler.tensorboard_trace_handler('../../log/YOLOv8cls-version-5/inference'),
                                    record_shapes=True,
                                    profile_memory=True,
                                    with_flops=True,
                                    with_modules=True,
                                    with_stack=True) as prof:
    for i in range(12):
        out = model(img)
        prof.step()
print(prof.key_averages().table(row_limit=-1))

--------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                            Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  Total MFLOPs  
--------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                   ProfilerStep*         2.20%       7.229ms       100.00%     327.848ms      32.785ms       7.83 Mb    -257.30 Mb            10            --  
                      Conv block        13.67%      44.813ms        87.48%     286.798ms       1.103ms     243.03 Mb           0 b           260            --  
                    aten::conv2d         0.68%       2.219ms        54.94%     180.114ms     692.746us      80.87 Mb           0 b           260     15245.476  
               aten::convolution  

In [66]:
model = Model(num_classes=1000, 
              residual_connection=True, 
              CSP=True, 
              add_hidden=True,
              classifyV8=True,
              bottleneck=1.0, 
              variant='s', 
              device=device, 
              dtype=torch.float32)

In [41]:
with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
                                    # schedule=torch.profiler.schedule(wait=1, warmup=1, active=1, repeat=1),
                                    on_trace_ready=torch.profiler.tensorboard_trace_handler('../../log/YOLOv8cls-version-5/inference'),
                                    record_shapes=True,
                                    profile_memory=True,
                                    with_flops=True,
                                    with_modules=True,
                                    with_stack=True) as prof:
    out = model(img)
print(prof.key_averages().table(row_limit=-1))

--------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                            Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  Total KFLOPs  
--------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                      Conv block        23.94%       5.663ms        83.74%      19.806ms     761.758us      12.52 Mb           0 b            26            --  
                    aten::conv2d         0.71%     169.100us        39.53%       9.350ms     359.615us       4.16 Mb           0 b            26    402612.224  
               aten::convolution         1.62%     383.300us        38.82%       9.181ms     353.112us       4.16 Mb           0 b            26            --  
              aten::_convolution  

In [43]:
model = Model(num_classes=1000, 
              residual_connection=True, 
              CSP=True, 
              add_hidden=True,
              classifyV8=True,
              bottleneck=1.0, 
              variant='s', 
              device=device, 
              dtype=torch.float32)

In [51]:
model = torch.load('../../../Models/yolov8n-cls.pt')['model']

In [52]:
img = train_dataset[0][0].unsqueeze(0).half()

In [53]:
with torch.autograd.profiler.profile(use_device='cpu',
                                    record_shapes=True,
                                    # profile_memory=True,
                                    with_flops=True,
                                    with_modules=True,
                                    with_stack=True) as prof:
   model.predict(img)
print(prof.key_averages().table(row_limit=-1))

--------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                            Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  Total KFLOPs  
--------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                    aten::conv2d         0.07%     217.500us        98.27%     326.319ms      12.551ms            26    402612.224  
               aten::convolution         0.08%     249.400us        98.20%     326.102ms      12.542ms            26            --  
              aten::_convolution         0.11%     357.300us        98.13%     325.852ms      12.533ms            26            --  
               aten::thnn_conv2d         0.07%     221.400us        98.02%     325.495ms      12.519ms            26            --  
      aten::_slow_conv2d_forward        97.87%     325.012ms        9

In [61]:
402612*10**3/10**9

0.402612