In [23]:
import numpy as np

In [21]:
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("gpt2")

model = AutoModelForCausalLM.from_pretrained("gpt2")
# inference
input_ids = tokenizer(
    "studies have shown that owning a dog is good for you because ", return_tensors="pt"
).input_ids  # Batch size 1



outputs = model.generate(input_ids, pad_token_id=tokenizer.eos_token_id)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

studies have shown that owning a dog is good for you because  you can get a better


In [24]:
# pose sequence as a NLI premise and label as a hypothesis
from transformers import AutoModelForSequenceClassification, AutoTokenizer
nli_model = AutoModelForSequenceClassification.from_pretrained('facebook/bart-large-mnli')
tokenizer = AutoTokenizer.from_pretrained('facebook/bart-large-mnli')

premise = "I have a problem with my iphone that needs to be resolved asap!!"
labels = ["urgent", "not urgent", "phone", "tablet", "computer"]

label = labels[0]
all_logits = []


for label in labels:
    hypothesis = f'This example is {label}.'

    # run through model pre-trained on MNLI
    x = tokenizer.encode(premise, hypothesis, return_tensors='pt',
                         truncation_strategy='only_first')


    logits = nli_model(x)[0]
    all_logits.append(logits)


#     entail_contradiction_logits = logits[:,[0,2]]
#     probs = entail_contradiction_logits.softmax(dim=1)
#     prob_label_is_true = probs[:,1]
#     print(f'conclusion for {label}')
#     print(prob_label_is_true)



In [25]:
all_logits = np.array([logit.detach().numpy() for logit in all_logits])

In [26]:
all_logits

array([[[-3.8805737 ,  0.88837683,  2.6724663 ]],

       [[ 4.7216988 , -2.037772  , -2.572649  ]],

       [[-2.670961  ,  0.2681278 ,  2.621893  ]],

       [[ 5.074621  , -2.3442779 , -2.7126708 ]],

       [[ 0.842014  , -0.08584917, -1.0156575 ]]], dtype=float32)

In [27]:
maxes = np.max(all_logits, axis=-1, keepdims=True)
shifted_exp = np.exp(all_logits - maxes)
scores = shifted_exp / shifted_exp.sum(axis=-1, keepdims=True)

In [28]:
maxes

array([[[2.6724663]],

       [[4.7216988]],

       [[2.621893 ]],

       [[5.074621 ]],

       [[0.842014 ]]], dtype=float32)

In [31]:
scores

array([[[1.2192614e-03, 1.4362358e-01, 8.5515720e-01]],

       [[9.9816424e-01, 1.1577134e-03, 6.7812076e-04]],

       [[4.5701982e-03, 8.6370431e-02, 9.0905935e-01]],

       [[9.9898618e-01, 5.9920107e-04, 4.1455441e-04]],

       [[6.4456528e-01, 2.5485963e-01, 1.0057510e-01]]], dtype=float32)