In [1]:
import os
import torch
import torch.nn as nn
from torchvision import transforms
import numpy as np

In [2]:
import sys
sys.path.insert(1, "../../")

In [14]:
from data_preprocessing import get_norms, ImageNetSubset
from Models.darknet19 import Darknet19
from evaluation import count_parameters

In [4]:
if torch.cuda.is_available():
    device=torch.device('cuda:0')
else:
    device=torch.device('cpu') 
device=torch.device('cpu') 

In [5]:
device

device(type='cpu')

In [16]:
data_path = '../../../datasets/imagenette2/'
norms_path = os.path.join(data_path, 'norms.json')
norms = get_norms(path=norms_path, train_loader=None)
means, stds = norms['means'], norms['stds']

In [17]:
norms = get_norms(path=norms_path, train_loader=None)
means, stds = norms['means'], norms['stds']

# Inference at 640x640

In [18]:
transformations = transforms.Compose([transforms.ToTensor(),
                                      transforms.Resize((640, 640)),
                                      transforms.Normalize(mean=means, std=stds)])

In [19]:
train_dataset = ImageNetSubset(path=data_path, train=True, transform=transformations, half=False, show=False)

In [20]:
model = Darknet19(num_classes=1000, device=device, dtype=torch.float32)

In [21]:
model

Darknet19(
  (max_pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv1): Conv(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act): LeakyReLU(negative_slope=0.1)
  )
  (conv2): Conv(
    (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act): LeakyReLU(negative_slope=0.1)
  )
  (seq3_5): Sequential(
    (conv3): Conv(
      (conv): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act): LeakyReLU(negative_slope=0.1)
    )
    (conv4): Conv(
      (conv): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=1e-05, mome

In [135]:
from torch.profiler import profile, ProfilerActivity

In [136]:
from torch.autograd.profiler import record_function

In [103]:
img = train_dataset[6][0].unsqueeze(0).to(device)
with profile(activities=[ProfilerActivity.CPU],
                                    schedule=torch.profiler.schedule(wait=3, warmup=1, active=10, repeat=1),
                                    on_trace_ready=torch.profiler.tensorboard_trace_handler('../log/darknet19/inference_at_640'),
                                    record_shapes=True,
                                    profile_memory=True,
                                    with_flops=True,
                                    with_modules=True,
                                    with_stack=False) as prof:
    for i in range(14):
        # torch.cuda.synchronize(device=device)
        with record_function('inference'):
            with torch.no_grad():
                model(img)
        # torch.cuda.synchronize(device=device)
        prof.step()
print(prof.key_averages(group_by_input_shape=True).table(row_limit=-1))

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  --------------------------------------------------------------------------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls                                                                      Input Shapes  Total GFLOPs  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  --------------------------------------------------------------------------------  ------------  
                    ProfilerStep*         0.01%     345.200us       100.00%        3.798s     379.828ms           0 b           0 b            10                                                                                []            --  
                        

In [104]:
from evaluation import *
df = generate_dataframe(prof)

In [142]:
img.shape

torch.Size([1, 3, 640, 640])

In [141]:
img = train_dataset[6][0].unsqueeze(0).to(device)
with profile(activities=[ProfilerActivity.CPU],
                                    schedule=torch.profiler.schedule(wait=1, warmup=1, active=1, repeat=1),
                                    on_trace_ready=torch.profiler.tensorboard_trace_handler('../log/darknet19/inference_at_640'),
                                    record_shapes=True,
                                    profile_memory=True,
                                    with_flops=True,
                                    with_modules=True,
                                    with_stack=False) as prof:
    for i in range(3):
        # torch.cuda.synchronize(device=device)
        with record_function('inference'):
            with torch.no_grad():
                model(img)
        # torch.cuda.synchronize(device=device)
        prof.step()
print(prof.key_averages(group_by_input_shape=True).table(row_limit=-1))

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  --------------------------------------------------------------------------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls                                                                      Input Shapes  Total MFLOPs  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  --------------------------------------------------------------------------------  ------------  
                    ProfilerStep*         0.02%      57.900us       100.00%     372.378ms     372.378ms           0 b           0 b             1                                                                                []            --  
                        

# Inference at 224x224

In [107]:
transformations = transforms.Compose([transforms.ToTensor(),
                                      transforms.Resize((224, 224)),
                                      transforms.Normalize(mean=means, std=stds)])

In [108]:
train_dataset = ImageNetSubset(path=data_path, train=True, transform=transformations, half=False, show=False)

In [109]:
model = Darknet19(num_classes=1000, device=device, dtype=torch.float32)

In [110]:
from torch.profiler import profile, ProfilerActivity

In [111]:
img = train_dataset[6][0].unsqueeze(0).to(device)
img.shape, img.device

(torch.Size([1, 3, 224, 224]), device(type='cpu'))

In [121]:
with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
                                    schedule=torch.profiler.schedule(wait=1, warmup=1, active=10, repeat=1),
                                    on_trace_ready=torch.profiler.tensorboard_trace_handler('../log/darknet19/inference_at_224'),
                                    record_shapes=True,
                                    profile_memory=True,
                                    with_flops=True,
                                    with_modules=True,
                                    with_stack=False) as prof:
    for i in range(12):
        # torch.cuda.synchronize(device=device)
        with record_function('inference'):
            with torch.no_grad():
                model(img)
        # torch.cuda.synchronize(device=device)
        prof.step()
print(prof.key_averages(group_by_input_shape=True).table(row_limit=-1))

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  --------------------------------------------------------------------------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg       CPU Mem  Self CPU Mem    # of Calls                                                                      Input Shapes  Total MFLOPs  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  --------------------------------------------------------------------------------  ------------  
                    ProfilerStep*         0.08%     544.200us       100.00%     706.746ms      70.6

In [114]:
img.shape

torch.Size([1, 3, 224, 224])

In [125]:
img = train_dataset[6][0].unsqueeze(0).to(device)
with profile(activities=[ProfilerActivity.CPU],
                                    schedule=torch.profiler.schedule(wait=1, warmup=1, active=1, repeat=1),
                                    on_trace_ready=torch.profiler.tensorboard_trace_handler('../log/darknet19/inference_at_224'),
                                    record_shapes=True,
                                    profile_memory=True,
                                    with_flops=True,
                                    with_modules=True,
                                    with_stack=False) as prof:
    for i in range(3):
        # torch.cuda.synchronize(device=device)
        with record_function('inference'):
            with torch.no_grad():
                model(img)
        # torch.cuda.synchronize(device=device)
        prof.step()
print(prof.key_averages(group_by_input_shape=True).table(row_limit=-1))

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  --------------------------------------------------------------------------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls                                                                      Input Shapes  Total MFLOPs  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  --------------------------------------------------------------------------------  ------------  
                    ProfilerStep*         0.05%      33.100us       100.00%      60.608ms      60.608ms           0 b           0 b             1                                                                                []            --  
                        

# GFLOPs at 224x224 using Ultralytics functions

In [22]:
from ultralytics_flops import get_flops, get_flops_with_torch_profiler

In [23]:
get_flops(model, imgsz=224)

5.618206368

In [24]:
get_flops_with_torch_profiler(model, imgsz=224)

5.581979648

# GFLOPs at 640x640 using Ultralytics functions

In [126]:
from ultralytics_flops import get_flops, get_flops_with_torch_profiler

In [127]:
get_flops(model, imgsz=640)

45.863692799999995

In [128]:
get_flops_with_torch_profiler(model, imgsz=640)

45.5671808

# Count Parameters

In [207]:
count_parameters(model)

20841376