In [1]:
import torch
import torch.nn as nn
from models import vgg
import torch.utils.benchmark as benchmark
import sys

sys.path.append("../")


In [2]:
from pytorch_model_summary import summary

model = vgg.vgg16()
tensor = torch.rand((1,4, 224,224))
print(summary(model,torch.zeros((1,4,224,224)),show_input=True))

--------------------------------------------------------------------------
      Layer (type)            Input Shape         Param #     Tr. Param #
          Conv2d-1       [1, 4, 224, 224]           2,368           2,368
            ReLU-2      [1, 64, 224, 224]               0               0
          Conv2d-3      [1, 64, 224, 224]          36,928          36,928
            ReLU-4      [1, 64, 224, 224]               0               0
       MaxPool2d-5      [1, 64, 224, 224]               0               0
          Conv2d-6      [1, 64, 112, 112]          73,856          73,856
            ReLU-7     [1, 128, 112, 112]               0               0
          Conv2d-8     [1, 128, 112, 112]         147,584         147,584
            ReLU-9     [1, 128, 112, 112]               0               0
      MaxPool2d-10     [1, 128, 112, 112]               0               0
         Conv2d-11       [1, 128, 56, 56]         295,168         295,168
           ReLU-12       [1, 256, 56,

In [3]:
model = vgg.float_vgg16()
tensor = torch.rand((1,224,224,4))

In [4]:
from torch.profiler import profile, record_function, ProfilerActivity
with profile(activities=[ProfilerActivity.CUDA], record_shapes=True, with_stack=True,with_modules=True, with_flops=True) as prof:
    with record_function("model_inference"):
        model.cuda()
        tensor = tensor.cuda()
        model(tensor)

STAGE:2023-11-25 23:41:18 2141010:2141010 ActivityProfilerController.cpp:311] Completed Stage: Warm Up
STAGE:2023-11-25 23:41:20 2141010:2141010 ActivityProfilerController.cpp:317] Completed Stage: Collection
STAGE:2023-11-25 23:41:20 2141010:2141010 ActivityProfilerController.cpp:321] Completed Stage: Post Processing


In [5]:
print(prof.key_averages().table(sort_by="cuda_memory_usage"))

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                     cudaGetDeviceCount         0.08%       1.131ms         0.08%       1.131ms     377.000us       0.000us         0.00%       0.000us       0.000us             3  
                                cudaGetDeviceProperties         0.01%     112.000us         0.01%     112.000us      56.000us       0.000us         0.00%       0.000us       0.000us             2  
         

In [6]:
MB = 1024 * 1024
t = torch.cuda.get_device_properties(0).total_memory // MB
r = torch.cuda.memory_reserved(0) // MB
a = torch.cuda.memory_allocated(0) // MB
f = r-a  # free inside reserved
print(t,r,a,f)

24226 1040 523 517


In [7]:
MB = 1024 * 1024
model = vgg.float_vgg16()
tensor = torch.rand((1,224,224,4))
reset = torch.cuda.reset_max_memory_allocated()
print(reset)
memory_before = torch.cuda.max_memory_allocated(device=0) // MB
model.cuda()
tensor = tensor.cuda()
with torch.no_grad():
    y = model(tensor)
memory_after = torch.cuda.max_memory_allocated(device=0) // MB
peak_momery = memory_after -memory_before
print(peak_momery)
print(torch.cuda.memory_summary(device=0))

None
907
|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      | 536329 KiB |    915 MiB |   3186 MiB |   2662 MiB |
|       from large pool | 534528 KiB |    914 MiB |   3175 MiB |   2653 MiB |
|       from small pool |   1801 KiB |      2 MiB |     10 MiB |      9 MiB |
|---------------------------------------------------------------------------|
| Active memory         | 536329 KiB |    915 MiB |   3186 MiB |   2662 MiB |
|       from large pool | 534528 KiB |    914 MiB |   3175 MiB |   2653 MiB |
|       from small pool |   1801 KiB |      2 MiB |     10 MiB |      9 MiB |
|------------------------------------------------------

