In [1]:
import torch
from pytorch_pretrained_bert import OpenAIGPTTokenizer, OpenAIGPTModel, OpenAIGPTLMHeadModel

# OPTIONAL: if you want to have more information on what's happening, activate the logger as follows
import logging
logging.basicConfig(level=logging.INFO)

# Load pre-trained model tokenizer (vocabulary)
tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')

# Tokenized input
text = "Who was Henson ? A puppeteer"
tokenized_text = tokenizer.tokenize(text)

# Get index for the first and the last token
ind = torch.zeros(2*len(text.split()), dtype=torch.long)
ind[1:] = torch.LongTensor([x for i, s in enumerate(tokenized_text) if '</w>' in s for x in (i, i+1)][:-1])

# Convert token to vocabulary indices
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)

# Convert inputs to PyTorch tensors
tokens_tensor = torch.tensor([indexed_tokens])

# Load pretrained model
model = OpenAIGPTModel.from_pretrained('openai-gpt')
tokens_tensor = tokens_tensor.to('cuda')
model.to('cuda')
model.eval()

# Get hidden states
with torch.no_grad():
    hidden_states = model(tokens_tensor)

# Sum
hidden_new = hidden_states[:, ind, :]
hidden_new = torch.nn.functional.avg_pool1d(hidden_new.permute(0, 2, 1), 2)
hidden_new = hidden_new * 2
hidden_new = hidden_new.permute(0, 2, 1)

Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex.


INFO:pytorch_pretrained_bert.tokenization_openai:loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/openai-gpt-vocab.json from cache at /home/yh1844/.pytorch_pretrained_bert/4ab93d0cd78ae80e746c27c9cd34e90b470abdabe0590c9ec742df61625ba310.b9628f6fe5519626534b82ce7ec72b22ce0ae79550325f45c604a25c0ad87fd6
INFO:pytorch_pretrained_bert.tokenization_openai:loading merges file https://s3.amazonaws.com/models.huggingface.co/bert/openai-gpt-merges.txt from cache at /home/yh1844/.pytorch_pretrained_bert/0f8de0dbd6a2bb6bde7d758f4c120dd6dd20b46f2bf0a47bc899c89f46532fde.20808570f9a3169212a577f819c845330da870aeb14c40f7319819fce10c3b76
INFO:pytorch_pretrained_bert.modeling_openai:loading weights file https://s3.amazonaws.com/models.huggingface.co/bert/openai-gpt-pytorch_model.bin from cache at /home/yh1844/.pytorch_pretrained_bert/e45ee1afb14c5d77c946e66cb0fa70073a77882097a1a2cefd51fd24b172355e.e7ee3fcd07c695a4c9f31ca735502c090230d988de03202f7af9ebe1c3a4054c
INFO:pytorch_pret

In [2]:
tokenized_text

['who</w>',
 'was</w>',
 'hen',
 'son</w>',
 '?</w>',
 'a</w>',
 'pupp',
 'ete',
 'er</w>']

In [3]:
len(tokenized_text), len(text.split())

(9, 6)

In [4]:
ind

tensor([0, 0, 1, 1, 2, 3, 4, 4, 5, 5, 6, 8])

In [5]:
hidden_states.size()

torch.Size([1, 9, 768])

In [6]:
hidden_new.size()

torch.Size([1, 6, 768])

In [8]:
hidden_states

tensor([[[ 1.9630e-01,  3.6651e-02, -2.0514e-01,  ...,  7.0617e-01,
          -2.7861e-01,  1.3519e-01],
         [-4.7051e-01,  1.5814e-01,  4.5239e-02,  ...,  7.8089e-01,
          -2.5194e-01,  4.2565e-01],
         [-5.1667e-01,  1.5813e+00, -9.7937e-01,  ..., -8.6645e-01,
          -3.4789e-03,  8.8305e-01],
         ...,
         [-5.0072e-01,  1.1430e+00, -2.0425e+00,  ..., -1.2931e+00,
          -1.7080e+00,  7.1497e-01],
         [-9.5884e-01,  2.7438e+00, -2.4036e+00,  ..., -1.1663e+00,
           2.4865e-01,  2.0937e-01],
         [ 4.2721e-01,  5.9180e-01,  2.3758e-01,  ..., -1.5640e-01,
          -6.8987e-01, -3.9058e-01]]], device='cuda:0')

In [9]:
hidden_new

tensor([[[  0.3926,   0.0733,  -0.4103,  ...,   1.4123,  -0.5572,   0.2704],
         [ -0.9410,   0.3163,   0.0905,  ...,   1.5618,  -0.5039,   0.8513],
         [ -0.2563,   1.3620,  -0.7539,  ...,  -0.4810,  -0.2442,   0.2666],
         [  0.6172,  -1.6505,   1.0711,  ...,   0.2621,   0.1283,  -0.8820],
         [  0.5773,  -0.7505,   0.5090,  ...,   0.2849,   0.1030,  -0.0947],
         [ -0.0735,   1.7348,  -1.8049,  ...,  -1.4495,  -2.3978,   0.3244]]],
       device='cuda:0')

In [10]:
4.2721 - 5.0072

-0.7351000000000001