## Model View

In [23]:
import torch
import torch.nn as nn
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from bertviz import head_view, model_view

text = "The quick brown fox jumps over the lazy dog"

### 1. karpathy random init param

In [24]:
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")

def random_init_weights(module):
    if isinstance(module, (torch.nn.Linear, torch.nn.Embedding)):
        module.weight.data.normal_(mean=0.0, std=0.02)
    if isinstance(module, torch.nn.Linear) and module.bias is not None:
        module.bias.data.zero_()

model.apply(random_init_weights)

inputs = tokenizer(text, return_tensors="pt")
with torch.no_grad():
    outputs = model(**inputs, output_attentions=True)
attentions = outputs.attentions

input_ids = inputs['input_ids']
tokens = tokenizer.convert_ids_to_tokens(input_ids[0])

model_view(attentions, tokens)

<IPython.core.display.Javascript object>

### 2. huggingface GPT2 pretrained model

In [25]:
model = GPT2LMHeadModel.from_pretrained("gpt2", output_attentions=True)  # Configure model to return attention values
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

inputs = tokenizer.encode(text, return_tensors='pt')  # Tokenize input text
outputs = model(inputs)  # Run model
attention = outputs[-1]  # Retrieve attention from model outputs
tokens = tokenizer.convert_ids_to_tokens(inputs[0])  # Convert input ids to token strings

model_view(attention, tokens)  # Display model view

<IPython.core.display.Javascript object>

### 3. huggingface GPT2 instruction model

In [26]:
model = GPT2LMHeadModel.from_pretrained("vicgalle/gpt2-open-instruct-v1", output_attentions=True)  # Configure model to return attention values
tokenizer = GPT2Tokenizer.from_pretrained("vicgalle/gpt2-open-instruct-v1")
inputs = tokenizer.encode(text, return_tensors='pt')  # Tokenize input text
outputs = model(inputs)  # Run model
attention = outputs[-1]  # Retrieve attention from model outputs
tokens = tokenizer.convert_ids_to_tokens(inputs[0])  # Convert input ids to token strings
model_view(attention, tokens)  # Display model view

<IPython.core.display.Javascript object>

### 4. kv cache attention weight

In [27]:
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2", output_attentions=True, use_cache=True) # kv cache use

inputs = tokenizer(text, return_tensors='pt')
with torch.no_grad():
    outputs = model(**inputs, use_cache=True)
attentions = outputs.attentions

tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'][0])

model_view(attentions, tokens)

<IPython.core.display.Javascript object>

## Neuron View
The neuron view is currently limited to custom versions of BERT, GPT-2, and RoBERTa included with BertViz.

In [28]:
# Import specialized versions of models (that return query/key vectors)
from bertviz.transformers_neuron_view import GPT2Model, GPT2Tokenizer
from bertviz.neuron_view import show

model = GPT2Model.from_pretrained("gpt2", output_attentions=True)
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", do_lower_case=True)
show(model, 'gpt2', tokenizer, text)

  state_dict = torch.load(resolved_archive_file, map_location='cpu')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>