In [11]:
from transformers import AutoTokenizer, AutoModel, utils
from bertviz import model_view
utils.logging.set_verbosity_error()  # Suppress standard warnings

model_name = "microsoft/xtremedistil-l12-h384-uncased"  # Find popular HuggingFace models here: https://huggingface.co/models
input_text = "The cat sat on the mat"  
model = AutoModel.from_pretrained(model_name, output_attentions=True)  # Configure model to return attention values
tokenizer = AutoTokenizer.from_pretrained(model_name)
inputs = tokenizer.encode(input_text, return_tensors='pt')  # Tokenize input text
outputs = model(inputs)  # Run model
attention = outputs[-1]  # Retrieve attention from model outputs
tokens = tokenizer.convert_ids_to_tokens(inputs[0])  # Convert input ids to token strings
model_view(attention, tokens,display_mode="light")  # Display model view

<IPython.core.display.Javascript object>

In [3]:
print(outputs.keys())

odict_keys(['last_hidden_state', 'pooler_output', 'attentions'])


In [4]:
for i in range(len(outputs)):
    print(outputs[i].shape)

torch.Size([1, 8, 384])
torch.Size([1, 384])


AttributeError: 'tuple' object has no attribute 'shape'

In [7]:
# print(outputs["attentions"].keys())
for i in range(len(outputs["attentions"])):
    print(outputs["attentions"][i].shape)

torch.Size([1, 12, 8, 8])
torch.Size([1, 12, 8, 8])
torch.Size([1, 12, 8, 8])
torch.Size([1, 12, 8, 8])
torch.Size([1, 12, 8, 8])
torch.Size([1, 12, 8, 8])
torch.Size([1, 12, 8, 8])
torch.Size([1, 12, 8, 8])
torch.Size([1, 12, 8, 8])
torch.Size([1, 12, 8, 8])
torch.Size([1, 12, 8, 8])
torch.Size([1, 12, 8, 8])


In [8]:
from bertviz.transformers_neuron_view import GPT2Model, GPT2Tokenizer
from bertviz.neuron_view import show

In [None]:
model_type = 'gpt2'
model_version = 'gpt2'
# model = GPT2Model.from_pretrained(model_version)
# tokenizer = GPT2Tokenizer.from_pretrained(model_version)
# text = "At the store, she bought apples, oranges, bananas,"
# show(model, model_type, tokenizer, text, display_mode='light')

In [28]:
# Import specialized versions of models (that return query/key vectors)
from bertviz.transformers_neuron_view import BertModel, BertTokenizer
from bertviz.neuron_view import show
# 一样的跑不了
# model_type = 'bert'
# model_version = 'bert-base-uncased'
# do_lower_case = True
# sentence_a = "The cat sat on the mat"
# sentence_b = "The cat lay on the rug"
# model = BertModel.from_pretrained(model_version, output_attentions=True)
# tokenizer = BertTokenizer.from_pretrained(model_version, do_lower_case=do_lower_case)
# show(model, model_type, tokenizer, sentence_a, sentence_b, layer=2, head=0)

100%|██████████| 433/433 [00:00<00:00, 284839.03B/s]
100%|██████████| 440473133/440473133 [01:27<00:00, 5010471.34B/s]


ModuleNotFoundError: No module named 'fused_layer_norm_cuda'

In [12]:
from transformers import AutoTokenizer, AutoModel

tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-de")
model = AutoModel.from_pretrained("Helsinki-NLP/opus-mt-en-de", output_attentions=True)

Downloading (…)okenizer_config.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.33k [00:00<?, ?B/s]

Downloading (…)olve/main/source.spm:   0%|          | 0.00/768k [00:00<?, ?B/s]

Downloading (…)olve/main/target.spm:   0%|          | 0.00/797k [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.27M [00:00<?, ?B/s]



Downloading pytorch_model.bin:   0%|          | 0.00/298M [00:00<?, ?B/s]

In [14]:
encoder_input_ids = tokenizer("She sees the small elephant.", return_tensors="pt", add_special_tokens=True).input_ids
with tokenizer.as_target_tokenizer():
    decoder_input_ids = tokenizer("Sie sieht den kleinen Elefanten.", return_tensors="pt", add_special_tokens=True).input_ids

outputs = model(input_ids=encoder_input_ids, decoder_input_ids=decoder_input_ids)

encoder_text = tokenizer.convert_ids_to_tokens(encoder_input_ids[0])
decoder_text = tokenizer.convert_ids_to_tokens(decoder_input_ids[0])



In [15]:
from bertviz import model_view
model_view(
    encoder_attention=outputs.encoder_attentions,
    decoder_attention=outputs.decoder_attentions,
    cross_attention=outputs.cross_attentions,
    encoder_tokens= encoder_text,
    decoder_tokens = decoder_text
)

<IPython.core.display.Javascript object>

In [16]:
for i in range(len(outputs.cross_attentions)):
    print(outputs.cross_attentions[i].shape)

torch.Size([1, 8, 7, 7])
torch.Size([1, 8, 7, 7])
torch.Size([1, 8, 7, 7])
torch.Size([1, 8, 7, 7])
torch.Size([1, 8, 7, 7])
torch.Size([1, 8, 7, 7])


In [17]:
from bertviz import head_view
from transformers import AutoTokenizer, AutoModel, utils
utils.logging.set_verbosity_error()  # Suppress standard warnings

# NOTE: This code is model-specific
model_version = 'bert-base-uncased'
model = AutoModel.from_pretrained(model_version, output_attentions=True)
tokenizer = AutoTokenizer.from_pretrained(model_version)
sentence_a = "the rabbit quickly hopped"
sentence_b = "The turtle slowly crawled"
inputs = tokenizer.encode_plus(sentence_a, sentence_b, return_tensors='pt')
input_ids = inputs['input_ids']
token_type_ids = inputs['token_type_ids'] # token type id is 0 for Sentence A and 1 for Sentence B
attention = model(input_ids, token_type_ids=token_type_ids)[-1]
sentence_b_start = token_type_ids[0].tolist().index(1) # Sentence B starts at first index of token type id 1
token_ids = input_ids[0].tolist() # Batch index 0
tokens = tokenizer.convert_ids_to_tokens(token_ids)    
head_view(attention, tokens, sentence_b_start)

Downloading pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

<IPython.core.display.Javascript object>

In [20]:
# for i in range(len(attention["attention"])):
print(attention[1].shape)
    

torch.Size([1, 12, 11, 11])


In [26]:
import torch
torch.set_printoptions(linewidth=150)

In [27]:
print(attention[1][0][1])

tensor([[6.7250e-01, 4.3021e-02, 1.6974e-02, 1.3577e-02, 1.4933e-02, 9.0494e-02, 4.0904e-02, 1.4257e-02, 6.3068e-03, 1.7775e-02, 6.9255e-02],
        [3.7976e-01, 4.9721e-02, 4.7012e-01, 1.4146e-02, 1.1501e-03, 6.6201e-02, 2.6593e-03, 8.7894e-03, 4.2824e-04, 1.0521e-04, 6.9236e-03],
        [7.9809e-01, 2.4899e-02, 2.7530e-02, 3.8013e-02, 5.1553e-03, 7.5608e-02, 2.7252e-02, 1.1500e-03, 1.4712e-04, 7.6194e-04, 1.3916e-03],
        [3.8716e-01, 2.6368e-02, 3.3290e-03, 1.6169e-03, 4.2832e-01, 1.2585e-01, 3.8769e-03, 6.8004e-03, 5.1286e-05, 1.3299e-02, 3.3251e-03],
        [9.9050e-02, 3.0018e-03, 7.0943e-03, 1.8442e-04, 3.1793e-03, 8.7541e-01, 5.7003e-03, 8.0954e-04, 4.8052e-05, 4.4462e-05, 5.4766e-03],
        [8.8026e-01, 3.0447e-03, 5.2380e-04, 1.1847e-03, 1.3358e-03, 7.0330e-02, 3.5889e-02, 7.6794e-04, 4.3776e-04, 1.3062e-03, 4.9183e-03],
        [2.3321e-01, 1.3153e-03, 7.8708e-03, 5.9084e-04, 6.0747e-04, 3.0919e-02, 6.1336e-02, 6.0128e-01, 1.2708e-02, 7.6619e-03, 4.2500e-02],
      