In [3]:
import torch
import torch.nn as nn

print(f"torch version: {torch.__version__}")

DEVICE = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

print(f"torch cuda available: {torch.cuda.is_available()}")

import torch, torchtext
from torchtext.models import RobertaClassificationHead
from torchtext.functional import to_tensor
xlmr_large = torchtext.models.XLMR_LARGE_ENCODER
classifier_head = torchtext.models.RobertaClassificationHead(num_classes=2, input_dim = 1024)
model = xlmr_large.get_model(head=classifier_head)
transform = xlmr_large.transform()

torch version: 2.3.1+cpu
torch cuda available: False


Downloading: "https://download.pytorch.org/models/text/xlmr.large.encoder.pt" to C:\Users\sifanzhang/.cache\torch\hub\checkpoints\xlmr.large.encoder.pt
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2.08G/2.08G [25:50<00:00, 1.44MB/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5.07M/5.07M [00:00<00:00, 14.6MB/s]
Downloading: "https://download.pytorch.org/models/text/xlmr.vocab.pt" to C:\Users\sifanzhang/.cache\torch\hub\checkpoints\xlmr.vocab.pt
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████

In [4]:
small_input_batch = [
               "Hello world",
               "How are you!"
]
big_input_batch = [
               "Hello world",
               "How are you!",
               """`Well, Prince, so Genoa and Lucca are now just family estates of the
Buonapartes. But I warn you, if you don't tell me that this means war,
if you still try to defend the infamies and horrors perpetrated by
that Antichrist- I really believe he is Antichrist- I will have
nothing more to do with you and you are no longer my friend, no longer
my 'faithful slave,' as you call yourself! But how do you do? I see
I have frightened you- sit down and tell me all the news.`

It was in July, 1805, and the speaker was the well-known Anna
Pavlovna Scherer, maid of honor and favorite of the Empress Marya
Fedorovna. With these words she greeted Prince Vasili Kuragin, a man
of high rank and importance, who was the first to arrive at her
reception. Anna Pavlovna had had a cough for some days. She was, as
she said, suffering from la grippe; grippe being then a new word in
St. Petersburg, used only by the elite."""
]

In [5]:
input_batch=big_input_batch

model_input = to_tensor(transform(input_batch), padding_value=1)
output = model(model_input)
output.shape

torch.Size([3, 2])

In [6]:
ITERATIONS=10

In [7]:
print("slow path:")
print("==========")
with torch.autograd.profiler.profile(use_cuda=False) as prof:
  for i in range(ITERATIONS):
    output = model(model_input)
print(prof)

model.eval()

print("fast path:")
print("==========")
with torch.autograd.profiler.profile(use_cuda=False) as prof:
  with torch.no_grad():
    for i in range(ITERATIONS):
      output = model(model_input)
print(prof)

slow path:
--------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                        Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
--------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                    aten::eq         0.00%     508.000us         0.00%     508.000us     508.000us             1  
                             aten::embedding         0.00%     323.000us         0.00%       1.577ms       1.577ms             1  
                               aten::reshape         0.00%       3.000us         0.00%       8.000us       8.000us             1  
                                  aten::view         0.00%       5.000us         0.00%       5.000us       5.000us             1  
                          aten::index_select         0.00%       1.226ms

  output = torch._nested_tensor_from_mask(output, src_key_padding_mask.logical_not(), mask_check=False)


-------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                       Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
-------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                   aten::eq         0.00%      32.000us         0.00%      32.000us      32.000us             1  
                            aten::embedding         0.00%      21.000us         0.00%     287.000us     287.000us             1  
                              aten::reshape         0.00%       3.000us         0.00%       6.000us       6.000us             1  
                                 aten::view         0.00%       3.000us         0.00%       3.000us       3.000us             1  
                         aten::index_select         0.00%     245.000us         0.00%     

In [8]:
model.encoder.transformer.layers.enable_nested_tensor=False

In [9]:
model.to(DEVICE)
model_input = model_input.to(DEVICE)

print("slow path:")
print("==========")
with torch.autograd.profiler.profile(use_cuda=True) as prof:
  for i in range(ITERATIONS):
    output = model(model_input)
print(prof)

model.eval()

print("fast path:")
print("==========")
with torch.autograd.profiler.profile(use_cuda=True) as prof:
  with torch.no_grad():
    for i in range(ITERATIONS):
      output = model(model_input)
print(prof)

slow path:


  warn("CUDA is not available, disabling CUDA profiling")


-----------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                 Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
-----------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                             aten::eq         0.00%      35.000us         0.00%      35.000us      35.000us             1  
                                      aten::embedding         0.00%      56.000us         0.00%     367.000us     367.000us             1  
                                        aten::reshape         0.00%       5.000us         0.00%       9.000us       9.000us             1  
                                           aten::view         0.00%       4.000us         0.00%       4.000us       4.000us             1  
                    

In [10]:
model.encoder.transformer.layers.enable_nested_tensor = True

In [11]:
model.to(DEVICE)
model_input = model_input.to(DEVICE)

print("slow path:")
print("==========")
with torch.autograd.profiler.profile(use_cuda=True) as prof:
  for i in range(ITERATIONS):
    output = model(model_input)
print(prof)

model.eval()

print("fast path:")
print("==========")
with torch.autograd.profiler.profile(use_cuda=True) as prof:
  with torch.no_grad():
    for i in range(ITERATIONS):
      output = model(model_input)
print(prof)

slow path:
-----------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                 Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
-----------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                             aten::eq         0.00%      39.000us         0.00%      39.000us      39.000us             1  
                                      aten::embedding         0.00%      45.000us         0.00%     365.000us     365.000us             1  
                                        aten::reshape         0.00%       4.000us         0.00%      10.000us      10.000us             1  
                                           aten::view         0.00%       6.000us         0.00%       6.000us       6.000us             1  
         