# Summary
- **Projector** is a studying tool to project the 768 high dimensional vectors onto the 3D space to visualize the tranformation operations through a transformer.
- This notebook visualizes how GPT2 transformer transforms a simple prompt through 12 transformer block layers to visualize the production of the predicted token.
  - This notebook is showing how to create the visualization on the front page.
  - Other results will be posted to demonstrate our studies and the uses of different **projector** features.
- The prompt we use is "Alan Turing theorized that the", the next token would be "universe". The 5 words prompt will be tokenized to 6 tokens prompt as "theorized" turns into "theor" and "ized".
- Below we will see two projections.
  - First is seeing how each token got transfomed through 12 layers, so the final prediction would be the last 6th token at layer 12. We will see "universe" got generated.
  - Second is to see the transformation of 6 tokens as a group. This helps to see how the embedding tokens transformed together. So the prediction is shown at the 12th transformation at the 6th token.

In [None]:
import sys
import os
import torch
from PIL import Image
sys.path.append('..')
sys.stderr = open('/dev/null', 'w')    # suppress warnings

## Embedding Projection

In [None]:
from projector.Projector import Projector
from projector.operator.GPT2Operator import GPT2Operator
from smallscript import *

In [None]:
# Load smallscript 'projector' package
sscontext.loadPackage('projector'); 

In [None]:
# Load Huggingface GPT2 model
model = GPT2Operator().name("gpt2").downloadModel()

In [None]:
# Create a Projector instance called 'projector'
pj = Projector().name('projector').model(model)

In [None]:
# Specifically select 6 distinct color for each token
colorIdx = [1,0,6,7,23,24]
colors = [pj.colorShape().defaults()[i] for i in colorIdx]
cs = pj.colorShape().clone().colors(colors).reset();

In [None]:
# Show the colorband used for each token in the prompt.
fig = cs.show()
fig.data[0].text = [['Alan','Turing','theor','ized','that','the']]
fig.write_image('colorband.png')
fig

In [None]:
# Calculate the 3d projection of 50257 embedded tokens.
pj.project();

In [None]:
# Show the 3d cloud of 
pj.showEmbedding()

## Transformer - Token-by-Token

In [None]:
# Retrieve model parameters
nHead = pj.model().modelParams()['n_head'].value()
lnfw = pj.model().modelParams().getValue('ln_f.w')
lnfb = pj.model().modelParams().getValue('ln_f.b')
pj.wOffset(lnfw); pj.bOffset(lnfb);  # lnfw.norm 56.8

In [None]:
# Show the projected origin
pj.clearTraces();
prompt = "Alan Turing theorized that the"
origin = pj.newTrace().name('origin').label('origin').fromVectors(0).color('black').show()

In [None]:
# Execute the transformer using an inference object and collect its output
# Python: infer.prompt(prompt).wte().wpe().layer(0).layer(1)...layer(11)
# Smallscript: infer wte wpe | layer: 0 | layer: 1 | ... | layer: 11
vectors = []
infer = model.inference().prompt(prompt)
wte = infer.ssrun("self wte | x")
wpe = infer.ssrun("self wpe | x")
for n in range(12):
    vector = infer.ssrun(f"self layer: {n} | x")
    vectors.append(vector)

In [None]:
# Visualize the output vectors as trace
twtes = []
twpes = []
blocks = []
for t in range(wte.shape[0]):
    twte = pj.newTrace().name(f"te{t}").label(f"te{t}").color(colors[t]).fromVectors(wte[t])
    twtes.append(twte)
    twpe = pj.newTrace().name(f"pe{t}").label(f"pe{t}").color(colors[t]).fromVectors(wpe[t])
    twpes.append(twpe)
    points = []
    for v in vectors:
        points.append(v[t])
    transitions = torch.stack(points)
    trace = pj.newTrace().name(f"token{t}").color(colors[t]).fromVectors(transitions).wbnorm()
    blocks.append(trace)
    pj.nextColor()

In [None]:
# Show the traces token by token
for t in range(wte.shape[0]):
    twtes[t].show();
    twpes[t].show();
    blocks[t].show();

In [None]:
# Reset the view as "cube" and please align the interactive plot for better viewing angle.
vw = pj.getView();
vw.aspectmode('cube');
pj.updateView(vw);

In [None]:
# Save the final result as plot.html
pj.figure().write_html('plot.html')

# Add the colorband.png
with open('plot.html', 'r') as file:
    lines = file.readlines()
lines.insert(3, "    <img src='colorband.png'></img>\n")
with open('plot.html', 'w') as file:
    file.writelines(lines)

In [None]:
# Since the camera is set, let's regenerate each transformation token by token again.
for t in range(wte.shape[0]):
    twtes[t].remove();
    twpes[t].remove();
    blocks[t].remove();

In [None]:
# Save each transformation as an image and combine them as gif
pj.figure().write_image(f"token0.png");
for t in range(wte.shape[0]):
    twtes[t].show();
    twpes[t].show();
    blocks[t].show();
    pj.figure().write_image(f"token{t+1}.png");

In [None]:
images = []
for t in range(wte.shape[0]+1):
    file= f"token{t}.png"
    image = Image.open(file).convert('RGB')
    os.remove(file)
    images.append(image)
images[0].save('plot.gif', save_all=True, append_images=images[1:], duration=1000, loop=0)

In [None]:
# Get the camera and save it as cache project.zip with other internal projector paramaters
camera = pj.getCamera()
pj.saveCache();

## Transformer - Layer-by-Layer

In [None]:
# Create a new projector
pj = Projector().name('projector').model(model)
pj.wOffset(lnfw); pj.bOffset(lnfb);  # lnfw.norm 56.8

In [None]:
# Load the cached camera and reorient the plot like before
pj.loadCache();
pj.updateCamera()
pj.showEmbedding()

In [None]:
# Show the projected origin
layers = []
pj.clearTraces();
origin = pj.newTrace().name('origin').label('origin').fromVectors(0).color('black').show()
layers.append(origin)

# Execute the transformer using an inference object and collect its output
prompt = "Alan Turing theorized that the"
infer = model.inference().prompt(prompt)
x = infer.ssrun("self wte | x")
wte = pj.newTrace().name('te').fromVectors(x)
layers.append(wte)
x = infer.ssrun("self wpe | x")
wpe = pj.nextColor().newTrace().name('pe').fromVectors(x)
layers.append(wpe)
pj.nextColor();

In [None]:
# Inference through layers
for n in range(12):
    pj.nextColor()
    layer = infer.ssrun(f"self layer: {n} | x")
    trace = pj.newTrace().name(f"layer{n}").fromVectors(layer).wbnorm()
    layers.append(trace)

In [None]:
# Show the traces layer by layer
for n in range(len(layers)):
    layers[n].show();