In [3]:
pip install pytorch-transformers

Collecting pytorch-transformers
  Downloading pytorch_transformers-1.2.0-py3-none-any.whl (176 kB)
[K     |████████████████████████████████| 176 kB 13.5 MB/s 
[?25hCollecting sentencepiece
  Downloading sentencepiece-0.1.96-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[K     |████████████████████████████████| 1.2 MB 49.4 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.46-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 43.4 MB/s 
Collecting boto3
  Downloading boto3-1.20.27-py3-none-any.whl (131 kB)
[K     |████████████████████████████████| 131 kB 48.5 MB/s 
Collecting botocore<1.24.0,>=1.23.27
  Downloading botocore-1.23.27-py3-none-any.whl (8.5 MB)
[K     |████████████████████████████████| 8.5 MB 37.0 MB/s 
[?25hCollecting jmespath<1.0.0,>=0.7.1
  Downloading jmespath-0.10.0-py2.py3-none-any.whl (24 kB)
Collecting s3transfer<0.6.0,>=0.5.0
  Downloading s3transfer-0.5.0-py3-none-any.whl (79 kB)
[K     |██████████████████

In [4]:
import torch
from pytorch_transformers import GPT2Tokenizer, GPT2LMHeadModel


In [5]:
# Load pre-trained model tokenizer (vocabulary)
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

100%|██████████| 1042301/1042301 [00:00<00:00, 2116305.94B/s]
100%|██████████| 456318/456318 [00:00<00:00, 1330225.49B/s]


In [6]:
# Encode a text inputs
text = "What is the fastest car in the"
indexed_tokens = tokenizer.encode(text)
indexed_tokens

[1867, 318, 262, 14162, 1097, 287, 262]

In [7]:
# Convert indexed tokens in a PyTorch tensor
tokens_tensor = torch.tensor([indexed_tokens])
tokens_tensor

tensor([[ 1867,   318,   262, 14162,  1097,   287,   262]])

In [8]:
# Load pre-trained model (weights)
model = GPT2LMHeadModel.from_pretrained('gpt2')

100%|██████████| 665/665 [00:00<00:00, 364317.16B/s]
100%|██████████| 548118077/548118077 [00:17<00:00, 31141658.23B/s]


In [9]:
# Set the model in evaluation mode to deactivate the DropOut modules
model.eval()

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0): Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
      (1): Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): Laye

In [10]:
# If you have a GPU, put everything on cuda
tokens_tensor = tokens_tensor.to('cuda')
model.to('cuda')

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0): Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
      (1): Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): Laye

In [12]:
# Predict all tokens
with torch.no_grad():
    outputs = model(tokens_tensor)
    predictions = outputs[0]

predictions

tensor([[[ -37.9883,  -37.9580,  -41.4891,  ...,  -44.0717,  -43.7975,
           -37.7047],
         [ -90.5062,  -88.9512,  -95.2817,  ...,  -95.2051,  -95.9588,
           -90.8715],
         [ -96.4428,  -94.5894,  -97.7109,  ...,  -96.9249, -100.0142,
           -94.3136],
         ...,
         [ -94.2190,  -94.6732,  -97.5500,  ..., -104.5246, -103.3913,
           -95.9647],
         [ -66.9001,  -66.0432,  -69.7153,  ...,  -75.6980,  -73.9600,
           -66.7940],
         [ -96.1219,  -94.2472,  -96.9560,  ..., -103.5571, -100.5183,
           -95.6673]]], device='cuda:0')

In [13]:
# Get the predicted next sub-word
predicted_index = torch.argmax(predictions[0, -1, :]).item()
predicted_text = tokenizer.decode(indexed_tokens + [predicted_index])

predicted_text

' What is the fastest car in the world'