In [14]:
import torch
import torch.nn as nn
from torchvision import transforms
from torch.utils.data import DataLoader
from torch import optim

In [15]:
import sys
sys.path.insert(1, "../")

In [16]:
from train import *
from data_preprocessing import *
from Models.darknet19 import *

In [17]:
if torch.cuda.is_available():
    device=torch.device('cuda')
else:
    device=torch.device('cpu') 

In [18]:
model = Darknet19(num_classes=10)

In [19]:
data_path = '../../dummy_datasets/'
norms_path = os.path.join(data_path, 'norms.json')

In [20]:
means = get_means(path=norms_path, train_loader=None)
stds = get_stds(path=norms_path, train_loader=None)

Means are: [0.4405549168586731, 0.4407285749912262, 0.4381718039512634]
stds are: [0.25142669677734375, 0.25270089507102966, 0.25131651759147644]


In [21]:
transformations = transforms.Compose([transforms.ToTensor(),
                                      transforms.Resize((224, 224)),
                                      transforms.Normalize(mean=means, std=stds)])

In [22]:
train_dataset = ImageNetSubset(path=data_path, train=True, transform=transformations, half=False, show=False)
val_dataset = ImageNetSubset(path=data_path, train=False, transform=transformations, half=False, show=False)

In [23]:
epochs=10
train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=256, shuffle=False, num_workers=4)

In [24]:
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=0.0001)

In [25]:
loss_fn = nn.NLLLoss()

In [13]:
darknet_history, darknet_gradient_stats = train(epochs, train_loader, val_loader, model, optimizer, loss_fn)

2024-11-15 15:32:20.678039 Epoch 1: 
2024-11-15 15:32:27.769869 Batch 1: 
[Train] Accuracy: 3.3333, Loss per batch: 2.3073
2024-11-15 15:32:40.186194 Batch 1: 
2024-11-15 15:32:53.185183 Batch 2: 
[Val] Accuracy: 10.0%, loss per batch: 2.3026


ValueError: cannot set a row with mismatched columns

## Inference time
## Training time

In [33]:
img = train_dataset[0][0].unsqueeze(0)

In [55]:
with torch.autograd.profiler.profile(use_device='cuda', profile_memory=True) as prof:
   model(img)
print(prof)

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                     aten::conv2d         0.02%      18.100us        11.81%      11.774ms      11.774ms       6.12 Mb           0 b             1  
                aten::convolution         0.07%      65.800us        11.80%      11.755ms      11.755ms       6.12 Mb           0 b             1  
               aten::_convolution         0.03%      31.200us        11.73%      11.690ms      11.690ms       6.12 Mb           0 b             1  
         aten::mkldnn_convolution        11.65%      11.605ms        11.70%      11.659ms      11.659ms       6.

In [43]:
torch.cuda.memory_summary(device=torch.device('cpu'))

ValueError: Expected a cuda device, but got: cpu

In [45]:
from torch.profiler import profile, record_function, ProfilerActivity

In [48]:
with profile(activities=[ProfilerActivity.CPU], record_shapes=True, profile_memory=True) as prof:
    with record_function("model_inference"):
        model(img)

In [54]:
print(prof.key_averages().table())

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                  model_inference         4.18%       4.529ms       100.00%     108.450ms     108.450ms           0 b     -60.64 Mb             1  
                     aten::conv2d         0.15%     166.100us        68.60%      74.399ms       3.916ms      17.23 Mb           0 b            19  
                aten::convolution         0.54%     589.100us        68.45%      74.233ms       3.907ms      17.23 Mb           0 b            19  
               aten::_convolution         0.31%     334.000us        67.91%      73.644ms       3.876ms      17.

In [57]:
prof.key_averages()[0]

<FunctionEventAvg key=aten::conv2d self_cpu_time=143.900us cpu_time=3.958ms  self_cuda_time=0.000us cuda_time=0.000us input_shapes= cpu_memory_usage=18065320 cuda_memory_usage=0>

There is also cuda_memory_usage, so I can measure cuda memory and time as well

In [60]:
import torch_tb_profiler

In [62]:
prof = torch.profiler.profile(
        schedule=torch.profiler.schedule(wait=1, warmup=1, active=3, repeat=1),
        on_trace_ready=torch.profiler.tensorboard_trace_handler('./log/darknet19'),
        record_shapes=True,
        profile_memory=True,
        with_stack=True)
