# Setup
(No need to read)

In [8]:
# Janky code to do different setup when run in a Colab notebook vs VSCode
DEBUG_MODE = False
try:
    import google.colab
    IN_COLAB = True
    print("Running as a Colab notebook")
    %pip install git+https://github.com/neelnanda-io/TransformerLens.git
    # Install another version of node that makes PySvelte work way faster
    !curl -fsSL https://deb.nodesource.com/setup_16.x | sudo -E bash -; sudo apt-get install -y nodejs
    %pip install git+https://github.com/neelnanda-io/PySvelte.git
except:
    IN_COLAB = False
    print("Running as a Jupyter notebook - intended for development only!")
    from IPython import get_ipython

    ipython = get_ipython()
    # Code to automatically update the HookedTransformer code as its edited without restarting the kernel
    ipython.magic("load_ext autoreload")
    ipython.magic("autoreload 2")

Running as a Colab notebook
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting git+https://github.com/neelnanda-io/TransformerLens.git
  Cloning https://github.com/neelnanda-io/TransformerLens.git to /tmp/pip-req-build-jw82au0y
  Running command git clone --filter=blob:none --quiet https://github.com/neelnanda-io/TransformerLens.git /tmp/pip-req-build-jw82au0y
  Resolved https://github.com/neelnanda-io/TransformerLens.git to commit 6983358b24c933e8787be8ffd3f518d0374fdd09
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting fancy-einsum>=0.0.3 (from transformer-lens==0.0.0)
  Downloading fancy_einsum-0.0.3-py3-none-any.whl (6.2 kB)
Collecting jaxtyping>=0.2.11 (from transformer-lens==0.0.0)
  Downloading jaxtyping-0.2.20-py3-none-any.whl (24 kB)
Collecting numpy>=1.23 (from transformer-lens==0.0.0)
  


## Installing the NodeSource Node.js 16.x repo...


## Populating apt-get cache...

+ apt-get update
0% [Working]            Hit:1 https://deb.nodesource.com/node_16.x focal InRelease
0% [Connecting to archive.ubuntu.com] [Waiting for headers] [Connecting to clou                                                                               Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64  InRelease
0% [Connecting to archive.ubuntu.com] [Waiting for headers] [Connected to cloud                                                                               Hit:3 https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/ InRelease
Hit:4 http://security.ubuntu.com/ubuntu focal-security InRelease
Hit:5 http://archive.ubuntu.com/ubuntu focal InRelease
Hit:6 http://ppa.launchpad.net/c2d4u.team/c2d4u4.0+/ubuntu focal InRelease
Hit:7 http://archive.ubuntu.com/ubuntu focal-updates InRelease
Hit:8 http://archive.ubuntu.com/ubuntu focal-backports InRelease


In [9]:
# Plotly needs a different renderer for VSCode/Notebooks vs Colab argh
import plotly.io as pio

if IN_COLAB or not DEBUG_MODE:
    # Thanks to annoying rendering issues, Plotly graphics will either show up in colab OR Vscode depending on the renderer - this is bad for developing demos! Thus creating a debug mode.
    pio.renderers.default = "colab"
else:
    pio.renderers.default = "png"

In [10]:
# Import stuff
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import einops
from fancy_einsum import einsum
import tqdm.notebook as tqdm
import random
from pathlib import Path
import plotly.express as px
from torch.utils.data import DataLoader

from jaxtyping import Float, Int
from typing import List, Union, Optional
from functools import partial
import copy

import itertools
from transformers import AutoModelForCausalLM, AutoConfig, AutoTokenizer
import dataclasses
import datasets
from IPython.display import HTML

In [11]:
import pysvelte

import transformer_lens
import transformer_lens.utils as utils
from transformer_lens.hook_points import (
    HookedRootModule,
    HookPoint,
)  # Hooking utilities
from transformer_lens import HookedTransformer, HookedTransformerConfig, FactoredMatrix, ActivationCache

In [12]:
model = HookedTransformer.from_pretrained("gpt2-small")

Downloading (…)lve/main/config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.


Loaded pretrained model gpt2-small into HookedTransformer


# Test prompts

We're designing prompts that expect the next word to be a subject.

In [None]:
example_prompt = "The student is John. The pet is Mary. Connor went to the store. The human is"
example_answer = " Connor"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'The', ' student', ' is', ' John', '.', ' The', ' pet', ' is', ' Mary', '.', ' Connor', ' went', ' to', ' the', ' store', '.', ' The', ' human', ' is']
Tokenized answer: [' Connor']


Top 0th token. Logit: 14.37 Prob: 14.82% Token: | John|
Top 1th token. Logit: 12.88 Prob:  3.35% Token: | Connor|
Top 2th token. Logit: 12.73 Prob:  2.89% Token: | the|
Top 3th token. Logit: 12.38 Prob:  2.02% Token: | a|
Top 4th token. Logit: 12.02 Prob:  1.42% Token: | James|
Top 5th token. Logit: 11.74 Prob:  1.07% Token: | Mary|
Top 6th token. Logit: 11.42 Prob:  0.78% Token: | Jack|
Top 7th token. Logit: 11.25 Prob:  0.66% Token: | Michael|
Top 8th token. Logit: 11.17 Prob:  0.60% Token: | Sarah|
Top 9th token. Logit: 11.07 Prob:  0.55% Token: | Sam|


This predicts “John” over “Connor”. This means it’s not ALWAYS doing “most recent subject”. So what makes it choose “which” subject?

Generalize IOI and ‘most recent subject’ to “Subject Choice Circuits”. Perhaps there is a consistent pattern, perhaps not.

In [None]:
example_prompt = "The student is John. The pet is Mary. The king is Connor. The human is"
example_answer = " Connor"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'The', ' student', ' is', ' John', '.', ' The', ' pet', ' is', ' Mary', '.', ' The', ' king', ' is', ' Connor', '.', ' The', ' human', ' is']
Tokenized answer: [' Connor']


Top 0th token. Logit: 13.55 Prob:  6.57% Token: | John|
Top 1th token. Logit: 12.63 Prob:  2.61% Token: | the|
Top 2th token. Logit: 11.99 Prob:  1.37% Token: | Connor|
Top 3th token. Logit: 11.70 Prob:  1.03% Token: | Mary|
Top 4th token. Logit: 11.66 Prob:  0.99% Token: | a|
Top 5th token. Logit: 11.47 Prob:  0.82% Token: | Jack|
Top 6th token. Logit: 11.45 Prob:  0.80% Token: | James|
Top 7th token. Logit: 11.45 Prob:  0.80% Token: | Sam|
Top 8th token. Logit: 11.28 Prob:  0.68% Token: | Sarah|
Top 9th token. Logit: 11.07 Prob:  0.55% Token: | David|


This predicts John over Connor again. Why did previous prompts use most recent subject, over earliest subject?

In [None]:
example_prompt = "Alice is a teacher. Bob is a student. The child is Bob. Carol is a teacher. David is a student. The child is"
example_answer = " David"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'Alice', ' is', ' a', ' teacher', '.', ' Bob', ' is', ' a', ' student', '.', ' The', ' child', ' is', ' Bob', '.', ' Carol', ' is', ' a', ' teacher', '.', ' David', ' is', ' a', ' student', '.', ' The', ' child', ' is']
Tokenized answer: [' David']


Top 0th token. Logit: 19.13 Prob: 90.02% Token: | David|
Top 1th token. Logit: 14.97 Prob:  1.41% Token: | Dave|
Top 2th token. Logit: 13.91 Prob:  0.49% Token: | Bob|
Top 3th token. Logit: 13.42 Prob:  0.30% Token: | Carol|
Top 4th token. Logit: 13.19 Prob:  0.24% Token: | D|
Top 5th token. Logit: 13.17 Prob:  0.23% Token: | Dr|
Top 6th token. Logit: 13.04 Prob:  0.20% Token: | Ed|
Top 7th token. Logit: 13.00 Prob:  0.20% Token: | Don|
Top 8th token. Logit: 12.82 Prob:  0.16% Token: | the|
Top 9th token. Logit: 12.54 Prob:  0.12% Token: | Ted|


In [None]:
example_prompt = "Alice is a teacher. Bob is a student. The child is Alice. Carol is a teacher. David is a student. The child is"
example_answer = " David"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'Alice', ' is', ' a', ' teacher', '.', ' Bob', ' is', ' a', ' student', '.', ' The', ' child', ' is', ' Alice', '.', ' Carol', ' is', ' a', ' teacher', '.', ' David', ' is', ' a', ' student', '.', ' The', ' child', ' is']
Tokenized answer: [' David']


Top 0th token. Logit: 19.11 Prob: 90.18% Token: | David|
Top 1th token. Logit: 14.55 Prob:  0.94% Token: | Carol|
Top 2th token. Logit: 14.46 Prob:  0.86% Token: | Dave|
Top 3th token. Logit: 13.69 Prob:  0.40% Token: | Alice|
Top 4th token. Logit: 13.35 Prob:  0.28% Token: | Bob|
Top 5th token. Logit: 12.77 Prob:  0.16% Token: | the|
Top 6th token. Logit: 12.71 Prob:  0.15% Token: | Charles|
Top 7th token. Logit: 12.65 Prob:  0.14% Token: | Ted|
Top 8th token. Logit: 12.64 Prob:  0.14% Token: | Don|
Top 9th token. Logit: 12.63 Prob:  0.14% Token: | Ed|


From what we’ve found, the “source sentence” doesn’t matter. So “The child is Bob” or “The child is Alice” doesn’t matter. These types of sentences are very sure in outputting “David” (90%) over second place token (<1%).

A difference of these types of sentences with the ones that predict the earliest subject is that this uses “[Subject] is a [word]. The [word_2] is”, whereas previous ones use “[Word] is subject. [Word_2] is”. Now, is it doing this because the to-output sentence is of different format, or because subject or word comes first in the sentence (in-context or to-output)?

1) Try changing all subject-description ordering to match the to-output format (same format):

## Tangent expm

In [None]:
example_prompt = "Alice is a teacher. Bob is a student. Connor is a student. The child is"
example_answer = " Connor"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'Alice', ' is', ' a', ' teacher', '.', ' Bob', ' is', ' a', ' student', '.', ' Connor', ' is', ' a', ' student', '.', ' The', ' child', ' is']
Tokenized answer: [' Connor']


Top 0th token. Logit: 15.04 Prob: 20.14% Token: | a|
Top 1th token. Logit: 13.60 Prob:  4.78% Token: | Alice|
Top 2th token. Logit: 13.60 Prob:  4.76% Token: | the|
Top 3th token. Logit: 13.15 Prob:  3.04% Token: | an|
Top 4th token. Logit: 12.87 Prob:  2.30% Token: | not|
Top 5th token. Logit: 12.70 Prob:  1.93% Token: | always|
Top 6th token. Logit: 12.64 Prob:  1.82% Token: | in|
Top 7th token. Logit: 12.17 Prob:  1.14% Token: | named|
Top 8th token. Logit: 12.15 Prob:  1.12% Token: | her|
Top 9th token. Logit: 11.92 Prob:  0.89% Token: | very|


In [None]:
example_prompt = "Alice is teacher. Bob is student. Connor is student. The child is"
example_answer = " Connor"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'Alice', ' is', ' teacher', '.', ' Bob', ' is', ' student', '.', ' Connor', ' is', ' student', '.', ' The', ' child', ' is']
Tokenized answer: [' Connor']


Top 0th token. Logit: 13.67 Prob: 11.01% Token: | a|
Top 1th token. Logit: 13.21 Prob:  6.98% Token: | the|
Top 2th token. Logit: 12.13 Prob:  2.37% Token: | not|
Top 3th token. Logit: 11.96 Prob:  2.00% Token: | an|
Top 4th token. Logit: 11.90 Prob:  1.89% Token: | always|
Top 5th token. Logit: 11.81 Prob:  1.73% Token: | in|
Top 6th token. Logit: 11.35 Prob:  1.09% Token: | Alice|
Top 7th token. Logit: 11.25 Prob:  0.99% Token: | his|
Top 8th token. Logit: 11.15 Prob:  0.88% Token: | her|
Top 9th token. Logit: 11.02 Prob:  0.78% Token: | just|


In [None]:
example_prompt = "The teacher is Alice. The teacher is Bob. The teacher is David. The teacher is"
example_answer = " David"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'The', ' teacher', ' is', ' Alice', '.', ' The', ' teacher', ' is', ' Bob', '.', ' The', ' teacher', ' is', ' David', '.', ' The', ' teacher', ' is']
Tokenized answer: [' David']


Top 0th token. Logit: 13.54 Prob:  3.15% Token: | Alice|
Top 1th token. Logit: 13.21 Prob:  2.25% Token: | the|
Top 2th token. Logit: 12.90 Prob:  1.65% Token: | David|
Top 3th token. Logit: 12.70 Prob:  1.36% Token: | John|
Top 4th token. Logit: 12.60 Prob:  1.23% Token: | Jane|
Top 5th token. Logit: 12.58 Prob:  1.21% Token: | Mary|
Top 6th token. Logit: 12.38 Prob:  0.99% Token: | Bob|
Top 7th token. Logit: 12.35 Prob:  0.96% Token: | a|
Top 8th token. Logit: 12.23 Prob:  0.85% Token: | James|
Top 9th token. Logit: 12.07 Prob:  0.72% Token: | Peter|


In [None]:
example_prompt = "John and Mary went to the store. The student is"
example_answer = " John"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'John', ' and', ' Mary', ' went', ' to', ' the', ' store', '.', ' The', ' student', ' is']
Tokenized answer: [' John']


Top 0th token. Logit: 15.49 Prob: 12.02% Token: | a|
Top 1th token. Logit: 14.12 Prob:  3.06% Token: | in|
Top 2th token. Logit: 13.96 Prob:  2.61% Token: | very|
Top 3th token. Logit: 13.87 Prob:  2.39% Token: | wearing|
Top 4th token. Logit: 13.84 Prob:  2.31% Token: | the|
Top 5th token. Logit: 13.67 Prob:  1.95% Token: | not|
Top 6th token. Logit: 13.55 Prob:  1.74% Token: | an|
Top 7th token. Logit: 13.45 Prob:  1.57% Token: | upset|
Top 8th token. Logit: 13.36 Prob:  1.43% Token: | sitting|
Top 9th token. Logit: 13.34 Prob:  1.40% Token: | still|


In [None]:
example_prompt = "The teacher is Alice. The teacher is Bob. The teacher is Alice. The teacher is"
example_answer = " Bob"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'The', ' teacher', ' is', ' Alice', '.', ' The', ' teacher', ' is', ' Bob', '.', ' The', ' teacher', ' is', ' Alice', '.', ' The', ' teacher', ' is']
Tokenized answer: [' Bob']


Top 0th token. Logit: 16.35 Prob: 45.84% Token: | Bob|
Top 1th token. Logit: 16.11 Prob: 35.80% Token: | Alice|
Top 2th token. Logit: 12.31 Prob:  0.80% Token: | the|
Top 3th token. Logit: 11.98 Prob:  0.58% Token: | a|
Top 4th token. Logit: 11.52 Prob:  0.36% Token: | Jack|
Top 5th token. Logit: 11.07 Prob:  0.23% Token: | not|
Top 6th token. Logit: 10.87 Prob:  0.19% Token: | John|
Top 7th token. Logit: 10.85 Prob:  0.19% Token: | Jim|
Top 8th token. Logit: 10.79 Prob:  0.18% Token: | Charlie|
Top 9th token. Logit: 10.69 Prob:  0.16% Token: | Ron|


As expected with induction heads or “duplicate identifiers”, this would give “Bob” because of the pattern that Alice was repeated before.

In [None]:
example_prompt = "The teacher is Alice. The teacher is Bob. The teacher is Bob. The teacher is"
example_answer = " Bob"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'The', ' teacher', ' is', ' Alice', '.', ' The', ' teacher', ' is', ' Bob', '.', ' The', ' teacher', ' is', ' Bob', '.', ' The', ' teacher', ' is']
Tokenized answer: [' Bob']


Top 0th token. Logit: 16.69 Prob: 61.43% Token: | Bob|
Top 1th token. Logit: 15.62 Prob: 21.15% Token: | Alice|
Top 2th token. Logit: 12.49 Prob:  0.93% Token: | the|
Top 3th token. Logit: 12.25 Prob:  0.73% Token: | a|
Top 4th token. Logit: 11.37 Prob:  0.30% Token: | Jack|
Top 5th token. Logit: 11.16 Prob:  0.24% Token: | not|
Top 6th token. Logit: 10.81 Prob:  0.17% Token: | John|
Top 7th token. Logit: 10.81 Prob:  0.17% Token: | bob|
Top 8th token. Logit: 10.63 Prob:  0.14% Token: | Charlie|
Top 9th token. Logit: 10.56 Prob:  0.13% Token: | Robert|


In [None]:
example_prompt = "Alice is a teacher. Bob is a student. Carol is a teacher. David is a student. The child is"
example_answer = " Alice"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'Alice', ' is', ' a', ' teacher', '.', ' Bob', ' is', ' a', ' student', '.', ' Carol', ' is', ' a', ' teacher', '.', ' David', ' is', ' a', ' student', '.', ' The', ' child', ' is']
Tokenized answer: [' Alice']


Top 0th token. Logit: 15.05 Prob: 23.89% Token: | a|
Top 1th token. Logit: 13.61 Prob:  5.65% Token: | Alice|
Top 2th token. Logit: 13.41 Prob:  4.62% Token: | the|
Top 3th token. Logit: 13.11 Prob:  3.43% Token: | an|
Top 4th token. Logit: 12.65 Prob:  2.16% Token: | not|
Top 5th token. Logit: 12.55 Prob:  1.97% Token: | named|
Top 6th token. Logit: 12.36 Prob:  1.62% Token: | in|
Top 7th token. Logit: 12.15 Prob:  1.32% Token: | always|
Top 8th token. Logit: 11.54 Prob:  0.72% Token: | one|
Top 9th token. Logit: 11.49 Prob:  0.68% Token: | also|


In [None]:
example_prompt = "Alice is teacher. Bob is student. Carol is teacher. David is student. The child is"
example_answer = " Alice"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'Alice', ' is', ' teacher', '.', ' Bob', ' is', ' student', '.', ' Carol', ' is', ' teacher', '.', ' David', ' is', ' student', '.', ' The', ' child', ' is']
Tokenized answer: [' Alice']


Top 0th token. Logit: 13.42 Prob: 11.88% Token: | a|
Top 1th token. Logit: 12.89 Prob:  7.01% Token: | the|
Top 2th token. Logit: 11.76 Prob:  2.26% Token: | not|
Top 3th token. Logit: 11.69 Prob:  2.12% Token: | an|
Top 4th token. Logit: 11.48 Prob:  1.71% Token: | in|
Top 5th token. Logit: 11.16 Prob:  1.25% Token: | named|
Top 6th token. Logit: 11.16 Prob:  1.24% Token: | always|
Top 7th token. Logit: 10.99 Prob:  1.05% Token: | his|
Top 8th token. Logit: 10.88 Prob:  0.95% Token: | teacher|
Top 9th token. Logit: 10.67 Prob:  0.76% Token: | her|


In [None]:
example_prompt = "The student is Alice. The pet is Bob. Connor went to the store. The human is"
example_answer = " Alice"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'The', ' student', ' is', ' Alice', '.', ' The', ' pet', ' is', ' Bob', '.', ' Connor', ' went', ' to', ' the', ' store', '.', ' The', ' human', ' is']
Tokenized answer: [' Alice']


Top 0th token. Logit: 13.62 Prob:  6.41% Token: | Alice|
Top 1th token. Logit: 12.69 Prob:  2.53% Token: | the|
Top 2th token. Logit: 12.54 Prob:  2.18% Token: | Bob|
Top 3th token. Logit: 12.33 Prob:  1.76% Token: | a|
Top 4th token. Logit: 12.13 Prob:  1.45% Token: | Jack|
Top 5th token. Logit: 11.99 Prob:  1.26% Token: | Connor|
Top 6th token. Logit: 11.95 Prob:  1.20% Token: | Jake|
Top 7th token. Logit: 11.89 Prob:  1.14% Token: | Charlie|
Top 8th token. Logit: 11.87 Prob:  1.11% Token: | John|
Top 9th token. Logit: 11.39 Prob:  0.69% Token: | James|


In [None]:
example_prompt = "Alice is a teacher. Bob is a student. Carol is a teacher. David is a student. The child is a"
example_answer = " Alice"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'Alice', ' is', ' a', ' teacher', '.', ' Bob', ' is', ' a', ' student', '.', ' Carol', ' is', ' a', ' teacher', '.', ' David', ' is', ' a', ' student', '.', ' The', ' child', ' is', ' a']
Tokenized answer: [' Alice']


Top 0th token. Logit: 14.92 Prob: 24.77% Token: | teacher|
Top 1th token. Logit: 13.31 Prob:  4.99% Token: | student|
Top 2th token. Logit: 12.55 Prob:  2.31% Token: | child|
Top 3th token. Logit: 12.09 Prob:  1.46% Token: | boy|
Top 4th token. Logit: 11.85 Prob:  1.15% Token: | girl|
Top 5th token. Logit: 11.84 Prob:  1.14% Token: | friend|
Top 6th token. Logit: 11.77 Prob:  1.06% Token: | parent|
Top 7th token. Logit: 11.70 Prob:  0.99% Token: | member|
Top 8th token. Logit: 11.48 Prob:  0.80% Token: | school|
Top 9th token. Logit: 11.44 Prob:  0.76% Token: | little|


## Try to find more ways to get output most recent subject

In [None]:
example_prompt = "Alice Bob David Alice Bob"
example_answer = " David"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'Alice', ' Bob', ' David', ' Alice', ' Bob']
Tokenized answer: [' David']


Top 0th token. Logit: 13.88 Prob: 19.36% Token: | David|
Top 1th token. Logit: 13.57 Prob: 14.22% Token: | Bob|
Top 2th token. Logit: 12.25 Prob:  3.80% Token: | is|
Top 3th token. Logit: 11.96 Prob:  2.83% Token: | Dave|
Top 4th token. Logit: 11.75 Prob:  2.31% Token: |'s|
Top 5th token. Logit: 11.32 Prob:  1.50% Token: |,|
Top 6th token. Logit: 11.30 Prob:  1.47% Token: | Dylan|
Top 7th token. Logit: 11.10 Prob:  1.20% Token: | and|
Top 8th token. Logit: 10.87 Prob:  0.95% Token: | House|
Top 9th token. Logit: 10.67 Prob:  0.78% Token: |:|


In [None]:
example_prompt = "Alice. Bob. David. Alice. Bob."
example_answer = " David"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'Alice', '.', ' Bob', '.', ' David', '.', ' Alice', '.', ' Bob', '.']
Tokenized answer: [' David']


Top 0th token. Logit: 17.00 Prob: 63.72% Token: | David|
Top 1th token. Logit: 14.01 Prob:  3.21% Token: |
|
Top 2th token. Logit: 12.72 Prob:  0.88% Token: | Dave|
Top 3th token. Logit: 12.24 Prob:  0.54% Token: | D|
Top 4th token. Logit: 12.19 Prob:  0.52% Token: | Bob|
Top 5th token. Logit: 12.19 Prob:  0.52% Token: | John|
Top 6th token. Logit: 12.18 Prob:  0.51% Token: | The|
Top 7th token. Logit: 12.07 Prob:  0.46% Token: | Don|
Top 8th token. Logit: 11.93 Prob:  0.40% Token: | I|
Top 9th token. Logit: 11.80 Prob:  0.35% Token: |<|endoftext|>|


In [None]:
example_prompt = "Alice. Bob. David. Alic. Bob."
example_answer = " David"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'Alice', '.', ' Bob', '.', ' David', '.', ' Alic', '.', ' Bob', '.']
Tokenized answer: [' David']


Top 0th token. Logit: 15.48 Prob: 24.85% Token: | David|
Top 1th token. Logit: 13.33 Prob:  2.89% Token: | Bob|
Top 2th token. Logit: 12.83 Prob:  1.75% Token: |
|
Top 3th token. Logit: 12.70 Prob:  1.54% Token: | John|
Top 4th token. Logit: 12.16 Prob:  0.90% Token: | James|
Top 5th token. Logit: 12.07 Prob:  0.82% Token: | Dave|
Top 6th token. Logit: 12.07 Prob:  0.82% Token: | Alice|
Top 7th token. Logit: 11.93 Prob:  0.71% Token: | D|
Top 8th token. Logit: 11.85 Prob:  0.65% Token: | Richard|
Top 9th token. Logit: 11.73 Prob:  0.58% Token: | Ed|


In [None]:
example_prompt = "Alice. Bob. David."
example_answer = " David"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'Alice', '.', ' Bob', '.', ' David', '.']
Tokenized answer: [' David']


Top 0th token. Logit: 14.26 Prob:  6.01% Token: | Alice|
Top 1th token. Logit: 14.12 Prob:  5.24% Token: |
|
Top 2th token. Logit: 14.07 Prob:  4.99% Token: | Bob|
Top 3th token. Logit: 13.04 Prob:  1.77% Token: | The|
Top 4th token. Logit: 12.92 Prob:  1.57% Token: | John|
Top 5th token. Logit: 12.76 Prob:  1.34% Token: | I|
Top 6th token. Logit: 12.41 Prob:  0.95% Token: | Jack|
Top 7th token. Logit: 12.40 Prob:  0.94% Token: | Bill|
Top 8th token. Logit: 12.25 Prob:  0.81% Token: | And|
Top 9th token. Logit: 12.24 Prob:  0.79% Token: | David|


In [None]:
example_prompt = "Alice went shopping with Bob and David."
example_answer = " David"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'Alice', ' went', ' shopping', ' with', ' Bob', ' and', ' David', '.']
Tokenized answer: [' David']


Top 0th token. Logit: 19.72 Prob: 13.71% Token: | Bob|
Top 1th token. Logit: 19.66 Prob: 12.93% Token: | Alice|
Top 2th token. Logit: 19.41 Prob: 10.14% Token: | They|
Top 3th token. Logit: 19.17 Prob:  7.93% Token: |
|
Top 4th token. Logit: 19.16 Prob:  7.84% Token: | She|
Top 5th token. Logit: 18.59 Prob:  4.45% Token: | The|
Top 6th token. Logit: 18.23 Prob:  3.09% Token: | After|
Top 7th token. Logit: 17.94 Prob:  2.32% Token: | When|
Top 8th token. Logit: 17.82 Prob:  2.07% Token: | It|
Top 9th token. Logit: 17.63 Prob:  1.70% Token: | He|


In [None]:
example_prompt = "Alice, Bob, David went shopping."
example_answer = " David"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'Alice', ',', ' Bob', ',', ' David', ' went', ' shopping', '.']
Tokenized answer: [' David']


Top 0th token. Logit: 18.42 Prob: 11.65% Token: |
|
Top 1th token. Logit: 18.27 Prob: 10.06% Token: | She|
Top 2th token. Logit: 17.97 Prob:  7.47% Token: | Bob|
Top 3th token. Logit: 17.85 Prob:  6.63% Token: | They|
Top 4th token. Logit: 17.71 Prob:  5.73% Token: | I|
Top 5th token. Logit: 17.70 Prob:  5.67% Token: | Alice|
Top 6th token. Logit: 17.34 Prob:  3.97% Token: | He|
Top 7th token. Logit: 17.02 Prob:  2.90% Token: | The|
Top 8th token. Logit: 16.60 Prob:  1.89% Token: | "|
Top 9th token. Logit: 16.53 Prob:  1.76% Token: | It|


In [None]:
example_prompt = "Alice is king. Bob is queen. The lion is Alice. Carol is king. David is queen. The lion is"
example_answer = " David"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'Alice', ' is', ' king', '.', ' Bob', ' is', ' queen', '.', ' The', ' lion', ' is', ' Alice', '.', ' Carol', ' is', ' king', '.', ' David', ' is', ' queen', '.', ' The', ' lion', ' is']
Tokenized answer: [' David']


Top 0th token. Logit: 16.82 Prob: 67.79% Token: | David|
Top 1th token. Logit: 14.42 Prob:  6.12% Token: | Alice|
Top 2th token. Logit: 14.33 Prob:  5.59% Token: | Carol|
Top 3th token. Logit: 12.67 Prob:  1.06% Token: | the|
Top 4th token. Logit: 12.65 Prob:  1.05% Token: | Bob|
Top 5th token. Logit: 12.03 Prob:  0.56% Token: | King|
Top 6th token. Logit: 11.98 Prob:  0.54% Token: | Charles|
Top 7th token. Logit: 11.57 Prob:  0.36% Token: | not|
Top 8th token. Logit: 11.52 Prob:  0.34% Token: | a|
Top 9th token. Logit: 11.28 Prob:  0.26% Token: | Queen|


This shows the “description words” don’t matter about their context; lion has nothing to do with king or queen. So don’t test anymore for “content words with external knowledge”.

Still, they do seem to do something because “Alice. Bob. David. Alice. Bob.” doesn’t have as strong as prob for “David” as with descirption words.

Now, try changing up the description words so there’s no induction “mirroring”.

In [None]:
example_prompt = "Alice is king. Bob is queen. The lion is Alice. Carol is red. David is lamp. The soldier is"
example_answer = " David"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'Alice', ' is', ' king', '.', ' Bob', ' is', ' queen', '.', ' The', ' lion', ' is', ' Alice', '.', ' Carol', ' is', ' red', '.', ' David', ' is', ' lamp', '.', ' The', ' soldier', ' is']
Tokenized answer: [' David']


Top 0th token. Logit: 13.11 Prob:  8.17% Token: | red|
Top 1th token. Logit: 12.75 Prob:  5.66% Token: | the|
Top 2th token. Logit: 12.44 Prob:  4.16% Token: | Alice|
Top 3th token. Logit: 12.11 Prob:  3.00% Token: | David|
Top 4th token. Logit: 11.91 Prob:  2.45% Token: | a|
Top 5th token. Logit: 11.88 Prob:  2.39% Token: | white|
Top 6th token. Logit: 11.77 Prob:  2.14% Token: | king|
Top 7th token. Logit: 11.65 Prob:  1.89% Token: | gold|
Top 8th token. Logit: 11.34 Prob:  1.39% Token: | black|
Top 9th token. Logit: 11.15 Prob:  1.15% Token: | blue|


In [None]:
example_prompt = "Alice is king. Bob is queen. Alice. Carol is red. David is queen."
example_answer = " David"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'Alice', ' is', ' king', '.', ' Bob', ' is', ' queen', '.', ' Alice', '.', ' Carol', ' is', ' red', '.', ' David', ' is', ' queen', '.']
Tokenized answer: [' David']


Top 0th token. Logit: 17.02 Prob:  9.45% Token: | Alice|
Top 1th token. Logit: 16.82 Prob:  7.76% Token: |
|
Top 2th token. Logit: 16.12 Prob:  3.83% Token: | David|
Top 3th token. Logit: 15.91 Prob:  3.11% Token: | Bob|
Top 4th token. Logit: 15.28 Prob:  1.66% Token: | The|
Top 5th token. Logit: 14.91 Prob:  1.14% Token: | Carol|
Top 6th token. Logit: 14.83 Prob:  1.05% Token: | John|
Top 7th token. Logit: 14.75 Prob:  0.98% Token: | And|
Top 8th token. Logit: 14.59 Prob:  0.84% Token: | Jack|
Top 9th token. Logit: 14.55 Prob:  0.80% Token: | Mary|


So having the “same” words does matter, probably due to induction head patterns needing to recognize previous patterns.

In [None]:
example_prompt = "Alice is king. Bob is queen. Alice. Carol is king. David is queen."
example_answer = " David"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'Alice', ' is', ' king', '.', ' Bob', ' is', ' queen', '.', ' Alice', '.', ' Carol', ' is', ' king', '.', ' David', ' is', ' queen', '.']
Tokenized answer: [' David']


Top 0th token. Logit: 17.33 Prob:  9.44% Token: |
|
Top 1th token. Logit: 17.03 Prob:  7.01% Token: | Alice|
Top 2th token. Logit: 16.23 Prob:  3.16% Token: | David|
Top 3th token. Logit: 15.82 Prob:  2.10% Token: | The|
Top 4th token. Logit: 15.69 Prob:  1.83% Token: | Bob|
Top 5th token. Logit: 15.35 Prob:  1.31% Token: | John|
Top 6th token. Logit: 15.35 Prob:  1.31% Token: | And|
Top 7th token. Logit: 15.06 Prob:  0.98% Token: | James|
Top 8th token. Logit: 15.03 Prob:  0.95% Token: | George|
Top 9th token. Logit: 14.99 Prob:  0.91% Token: | Mary|


So "the child is" somehow is an important in-context phrase that influences "latest S" to be chosen. So it's not arbitrary. Try more of these in-context phrases.

In [None]:
example_prompt = "Alice is king. Bob is queen. Alice. Bob. Carol is red. David is queen."
example_answer = " David"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'Alice', ' is', ' king', '.', ' Bob', ' is', ' queen', '.', ' Alice', '.', ' Bob', '.', ' Carol', ' is', ' red', '.', ' David', ' is', ' queen', '.']
Tokenized answer: [' David']


Top 0th token. Logit: 16.90 Prob: 10.72% Token: | David|
Top 1th token. Logit: 16.65 Prob:  8.34% Token: | Alice|
Top 2th token. Logit: 16.22 Prob:  5.46% Token: | Bob|
Top 3th token. Logit: 15.86 Prob:  3.79% Token: |
|
Top 4th token. Logit: 15.64 Prob:  3.05% Token: | Carol|
Top 5th token. Logit: 14.57 Prob:  1.05% Token: | John|
Top 6th token. Logit: 14.49 Prob:  0.96% Token: | The|
Top 7th token. Logit: 14.31 Prob:  0.80% Token: | Mary|
Top 8th token. Logit: 14.27 Prob:  0.77% Token: | Jack|
Top 9th token. Logit: 14.19 Prob:  0.71% Token: | D|


In [None]:
example_prompt = "Alice is king. Bob is queen. The lion is Carol. Carol is red. David is queen."
example_answer = " David"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'Alice', ' is', ' king', '.', ' Bob', ' is', ' queen', '.', ' The', ' lion', ' is', ' Carol', '.', ' Carol', ' is', ' red', '.', ' David', ' is', ' queen', '.']
Tokenized answer: [' David']


Top 0th token. Logit: 17.86 Prob: 21.87% Token: | The|
Top 1th token. Logit: 16.95 Prob:  8.78% Token: |
|
Top 2th token. Logit: 15.55 Prob:  2.17% Token: | Bob|
Top 3th token. Logit: 15.47 Prob:  2.01% Token: | David|
Top 4th token. Logit: 15.23 Prob:  1.57% Token: | Alice|
Top 5th token. Logit: 15.00 Prob:  1.25% Token: | And|
Top 6th token. Logit: 14.81 Prob:  1.04% Token: | She|
Top 7th token. Logit: 14.64 Prob:  0.87% Token: | I|
Top 8th token. Logit: 14.58 Prob:  0.82% Token: | He|
Top 9th token. Logit: 14.45 Prob:  0.72% Token: | A|


In [None]:
example_prompt = "Alice is king. Bob is queen. The lion is Carol. Carol is king. David is queen."
example_answer = " David"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'Alice', ' is', ' king', '.', ' Bob', ' is', ' queen', '.', ' The', ' lion', ' is', ' Carol', '.', ' Carol', ' is', ' king', '.', ' David', ' is', ' queen', '.']
Tokenized answer: [' David']


Top 0th token. Logit: 17.88 Prob: 18.58% Token: | The|
Top 1th token. Logit: 17.42 Prob: 11.76% Token: |
|
Top 2th token. Logit: 15.69 Prob:  2.09% Token: | David|
Top 3th token. Logit: 15.56 Prob:  1.84% Token: | Alice|
Top 4th token. Logit: 15.44 Prob:  1.62% Token: | And|
Top 5th token. Logit: 15.31 Prob:  1.43% Token: | Bob|
Top 6th token. Logit: 14.87 Prob:  0.92% Token: | She|
Top 7th token. Logit: 14.86 Prob:  0.91% Token: | I|
Top 8th token. Logit: 14.81 Prob:  0.87% Token: | All|
Top 9th token. Logit: 14.79 Prob:  0.85% Token: | John|


In [None]:
example_prompt = "Alice is king. Bob is queen. Alice is king. Carol is king. David is queen."
example_answer = " David"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'Alice', ' is', ' king', '.', ' Bob', ' is', ' queen', '.', ' Alice', ' is', ' king', '.', ' Carol', ' is', ' king', '.', ' David', ' is', ' queen', '.']
Tokenized answer: [' David']


Top 0th token. Logit: 17.40 Prob:  9.59% Token: |
|
Top 1th token. Logit: 17.18 Prob:  7.76% Token: | Alice|
Top 2th token. Logit: 16.76 Prob:  5.07% Token: | David|
Top 3th token. Logit: 15.83 Prob:  2.00% Token: | Bob|
Top 4th token. Logit: 15.73 Prob:  1.81% Token: | The|
Top 5th token. Logit: 15.43 Prob:  1.34% Token: | John|
Top 6th token. Logit: 15.17 Prob:  1.03% Token: | Mary|
Top 7th token. Logit: 15.15 Prob:  1.01% Token: | Carol|
Top 8th token. Logit: 15.08 Prob:  0.94% Token: | George|
Top 9th token. Logit: 15.05 Prob:  0.92% Token: | James|


In [None]:
example_prompt = "Alice is king. Bob is queen. The lion is Bob. Carol is king. David is queen. The lion is"
example_answer = " David"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'Alice', ' is', ' king', '.', ' Bob', ' is', ' queen', '.', ' The', ' lion', ' is', ' Bob', '.', ' Carol', ' is', ' king', '.', ' David', ' is', ' queen', '.', ' The', ' lion', ' is']
Tokenized answer: [' David']


Top 0th token. Logit: 17.96 Prob: 86.05% Token: | David|
Top 1th token. Logit: 14.27 Prob:  2.14% Token: | Bob|
Top 2th token. Logit: 13.07 Prob:  0.64% Token: | Carol|
Top 3th token. Logit: 12.84 Prob:  0.51% Token: | King|
Top 4th token. Logit: 12.45 Prob:  0.35% Token: | the|
Top 5th token. Logit: 12.41 Prob:  0.33% Token: | Dave|
Top 6th token. Logit: 11.92 Prob:  0.20% Token: | Charles|
Top 7th token. Logit: 11.82 Prob:  0.18% Token: | Robert|
Top 8th token. Logit: 11.75 Prob:  0.17% Token: | king|
Top 9th token. Logit: 11.65 Prob:  0.16% Token: | Ed|


In [None]:
example_prompt = "Alice is king. Bob is queen. The child is David. Carol is king. David is queen. The lion is"
example_answer = " David"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'Alice', ' is', ' king', '.', ' Bob', ' is', ' queen', '.', ' The', ' child', ' is', ' David', '.', ' Carol', ' is', ' king', '.', ' David', ' is', ' queen', '.', ' The', ' lion', ' is']
Tokenized answer: [' David']


Top 0th token. Logit: 15.10 Prob: 27.53% Token: | king|
Top 1th token. Logit: 13.55 Prob:  5.84% Token: | King|
Top 2th token. Logit: 13.42 Prob:  5.11% Token: | David|
Top 3th token. Logit: 13.31 Prob:  4.57% Token: | the|
Top 4th token. Logit: 12.67 Prob:  2.41% Token: | a|
Top 5th token. Logit: 12.50 Prob:  2.03% Token: | queen|
Top 6th token. Logit: 11.95 Prob:  1.18% Token: | dead|
Top 7th token. Logit: 11.95 Prob:  1.17% Token: | Queen|
Top 8th token. Logit: 11.52 Prob:  0.77% Token: | lion|
Top 9th token. Logit: 11.50 Prob:  0.75% Token: | God|


In [None]:
example_prompt = "Alice is king. The child is Alice. Carol is king. The lion is"
example_answer = " Carol"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'Alice', ' is', ' king', '.', ' The', ' child', ' is', ' Alice', '.', ' Carol', ' is', ' king', '.', ' The', ' lion', ' is']
Tokenized answer: [' David']


Top 0th token. Logit: 15.23 Prob: 33.75% Token: | king|
Top 1th token. Logit: 14.19 Prob: 11.96% Token: | King|
Top 2th token. Logit: 13.59 Prob:  6.55% Token: | the|
Top 3th token. Logit: 13.07 Prob:  3.90% Token: | Alice|
Top 4th token. Logit: 12.42 Prob:  2.03% Token: | a|
Top 5th token. Logit: 12.37 Prob:  1.94% Token: | queen|
Top 6th token. Logit: 12.36 Prob:  1.90% Token: | Queen|
Top 7th token. Logit: 12.30 Prob:  1.80% Token: | lion|
Top 8th token. Logit: 12.06 Prob:  1.42% Token: | Carol|
Top 9th token. Logit: 11.81 Prob:  1.11% Token: | not|


In [None]:
example_prompt = "Alice is king. Bob is queen. The child is Bob. Carol is a teacher. David is a student. The child is"
example_answer = " David"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'Alice', ' is', ' king', '.', ' Bob', ' is', ' queen', '.', ' The', ' child', ' is', ' Bob', '.', ' Carol', ' is', ' a', ' teacher', '.', ' David', ' is', ' a', ' student', '.', ' The', ' child', ' is']
Tokenized answer: [' David']


Top 0th token. Logit: 17.74 Prob: 85.66% Token: | David|
Top 1th token. Logit: 13.01 Prob:  0.76% Token: | the|
Top 2th token. Logit: 12.94 Prob:  0.71% Token: | Dave|
Top 3th token. Logit: 12.89 Prob:  0.67% Token: | a|
Top 4th token. Logit: 12.28 Prob:  0.37% Token: | Bob|
Top 5th token. Logit: 12.11 Prob:  0.31% Token: | Carol|
Top 6th token. Logit: 12.02 Prob:  0.28% Token: | Ed|
Top 7th token. Logit: 11.98 Prob:  0.27% Token: | D|
Top 8th token. Logit: 11.79 Prob:  0.22% Token: | Dr|
Top 9th token. Logit: 11.69 Prob:  0.20% Token: | Don|


In [None]:
example_prompt = "Alice is king. Bob is queen. The child is Bob. Carol is a teacher. David is a student. The lion is"
example_answer = " David"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'Alice', ' is', ' king', '.', ' Bob', ' is', ' queen', '.', ' The', ' child', ' is', ' Bob', '.', ' Carol', ' is', ' a', ' teacher', '.', ' David', ' is', ' a', ' student', '.', ' The', ' lion', ' is']
Tokenized answer: [' David']


Top 0th token. Logit: 15.86 Prob: 45.14% Token: | a|
Top 1th token. Logit: 13.80 Prob:  5.74% Token: | the|
Top 2th token. Logit: 13.62 Prob:  4.80% Token: | David|
Top 3th token. Logit: 13.18 Prob:  3.09% Token: | an|
Top 4th token. Logit: 12.67 Prob:  1.84% Token: | King|
Top 5th token. Logit: 12.53 Prob:  1.61% Token: | king|
Top 6th token. Logit: 12.22 Prob:  1.17% Token: | not|
Top 7th token. Logit: 12.11 Prob:  1.06% Token: | lion|
Top 8th token. Logit: 11.93 Prob:  0.88% Token: | Bob|
Top 9th token. Logit: 11.74 Prob:  0.73% Token: | dead|


In [None]:
example_prompt = "Alice is king. Bob is queen. The child is Bob. David is king. Carol is queen. The child is"
example_answer = " Carol"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'Alice', ' is', ' king', '.', ' Bob', ' is', ' queen', '.', ' The', ' child', ' is', ' Bob', '.', ' David', ' is', ' king', '.', ' Carol', ' is', ' queen', '.', ' The', ' child', ' is']
Tokenized answer: [' Carol']


Top 0th token. Logit: 18.69 Prob: 89.58% Token: | Carol|
Top 1th token. Logit: 15.01 Prob:  2.26% Token: | David|
Top 2th token. Logit: 13.88 Prob:  0.73% Token: | Bob|
Top 3th token. Logit: 13.85 Prob:  0.71% Token: | Car|
Top 4th token. Logit: 13.00 Prob:  0.30% Token: | Carolina|
Top 5th token. Logit: 12.65 Prob:  0.21% Token: | Caroline|
Top 6th token. Logit: 12.21 Prob:  0.14% Token: | Charlotte|
Top 7th token. Logit: 12.19 Prob:  0.13% Token: | Carl|
Top 8th token. Logit: 11.90 Prob:  0.10% Token: | Kim|
Top 9th token. Logit: 11.89 Prob:  0.10% Token: | Charles|


In [None]:
example_prompt = "Alice is king. The child is Alice. Carol is queen. The child is"
example_answer = " Carol"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'Alice', ' is', ' king', '.', ' The', ' child', ' is', ' Alice', '.', ' Carol', ' is', ' queen', '.', ' The', ' child', ' is']
Tokenized answer: [' Carol']


Top 0th token. Logit: 16.72 Prob: 75.90% Token: | Carol|
Top 1th token. Logit: 13.50 Prob:  3.04% Token: | Alice|
Top 2th token. Logit: 12.62 Prob:  1.27% Token: | the|
Top 3th token. Logit: 11.86 Prob:  0.59% Token: | Charlotte|
Top 4th token. Logit: 11.79 Prob:  0.55% Token: | Car|
Top 5th token. Logit: 11.56 Prob:  0.44% Token: | her|
Top 6th token. Logit: 11.55 Prob:  0.43% Token: | a|
Top 7th token. Logit: 11.51 Prob:  0.42% Token: | Carolina|
Top 8th token. Logit: 11.49 Prob:  0.41% Token: | Bob|
Top 9th token. Logit: 11.11 Prob:  0.28% Token: | not|


In [None]:
example_prompt = "Alice is king. Carol is queen. The child is"
example_answer = " Carol"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'Alice', ' is', ' king', '.', ' Carol', ' is', ' queen', '.', ' The', ' child', ' is']
Tokenized answer: [' Carol']


Top 0th token. Logit: 14.09 Prob:  9.80% Token: | king|
Top 1th token. Logit: 13.88 Prob:  7.94% Token: | the|
Top 2th token. Logit: 13.65 Prob:  6.30% Token: | a|
Top 3th token. Logit: 13.00 Prob:  3.31% Token: | queen|
Top 4th token. Logit: 12.70 Prob:  2.44% Token: | born|
Top 5th token. Logit: 12.35 Prob:  1.72% Token: | not|
Top 6th token. Logit: 12.05 Prob:  1.28% Token: | her|
Top 7th token. Logit: 11.98 Prob:  1.18% Token: | in|
Top 8th token. Logit: 11.97 Prob:  1.17% Token: | always|
Top 9th token. Logit: 11.80 Prob:  0.99% Token: | an|


In [None]:
example_prompt = "Alice is king. Carol is queen. The queen is"
example_answer = " Carol"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'Alice', ' is', ' king', '.', ' Carol', ' is', ' queen', '.', ' The', ' queen', ' is']
Tokenized answer: [' Carol']


Top 0th token. Logit: 13.69 Prob: 12.23% Token: | the|
Top 1th token. Logit: 13.08 Prob:  6.66% Token: | a|
Top 2th token. Logit: 12.59 Prob:  4.05% Token: | king|
Top 3th token. Logit: 12.10 Prob:  2.49% Token: | not|
Top 4th token. Logit: 11.85 Prob:  1.94% Token: | queen|
Top 5th token. Logit: 11.71 Prob:  1.69% Token: | in|
Top 6th token. Logit: 11.67 Prob:  1.62% Token: | dead|
Top 7th token. Logit: 11.42 Prob:  1.26% Token: | her|
Top 8th token. Logit: 11.35 Prob:  1.18% Token: | always|
Top 9th token. Logit: 11.15 Prob:  0.96% Token: | an|


In [None]:
example_prompt = "Alice is king. Bob is queen. The lion is Alice. Carol is a teacher. David is lamp. The lion is"
example_answer = " David"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'Alice', ' is', ' king', '.', ' Bob', ' is', ' queen', '.', ' The', ' lion', ' is', ' Alice', '.', ' Carol', ' is', ' a', ' teacher', '.', ' David', ' is', ' lamp', '.', ' The', ' lion', ' is']
Tokenized answer: [' David']


Top 0th token. Logit: 14.38 Prob: 17.50% Token: | David|
Top 1th token. Logit: 14.35 Prob: 16.94% Token: | a|
Top 2th token. Logit: 14.25 Prob: 15.33% Token: | Alice|
Top 3th token. Logit: 13.24 Prob:  5.60% Token: | the|
Top 4th token. Logit: 13.07 Prob:  4.71% Token: | Carol|
Top 5th token. Logit: 12.44 Prob:  2.51% Token: | Bob|
Top 6th token. Logit: 12.04 Prob:  1.68% Token: | an|
Top 7th token. Logit: 11.57 Prob:  1.05% Token: | not|
Top 8th token. Logit: 11.25 Prob:  0.76% Token: | King|
Top 9th token. Logit: 10.63 Prob:  0.41% Token: | Lucy|


In [None]:
example_prompt = "Alice. Bob. Bob. Carol. David."
example_answer = " David"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'Alice', '.', ' Bob', '.', ' Bob', '.', ' Carol', '.', ' David', '.']
Tokenized answer: [' David']


Top 0th token. Logit: 16.59 Prob: 28.75% Token: | David|
Top 1th token. Logit: 14.33 Prob:  2.98% Token: | D|
Top 2th token. Logit: 14.11 Prob:  2.39% Token: | Dave|
Top 3th token. Logit: 13.93 Prob:  2.01% Token: | Don|
Top 4th token. Logit: 13.57 Prob:  1.40% Token: | E|
Top 5th token. Logit: 13.26 Prob:  1.03% Token: | John|
Top 6th token. Logit: 13.23 Prob:  0.99% Token: | Dr|
Top 7th token. Logit: 13.08 Prob:  0.86% Token: | George|
Top 8th token. Logit: 13.01 Prob:  0.80% Token: | Dan|
Top 9th token. Logit: 12.99 Prob:  0.78% Token: | Bob|


## Different number of names, varying in source and target

In [14]:
example_prompt = "Alice is king. Bob is queen. Paul is prince. The child is Bob. Carol is a teacher. David is a student. The child is"
example_answer = " David"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'Alice', ' is', ' king', '.', ' Bob', ' is', ' queen', '.', ' Paul', ' is', ' prince', '.', ' The', ' child', ' is', ' Bob', '.', ' Carol', ' is', ' a', ' teacher', '.', ' David', ' is', ' a', ' student', '.', ' The', ' child', ' is']
Tokenized answer: [' David']


Top 0th token. Logit: 16.61 Prob: 50.09% Token: | David|
Top 1th token. Logit: 14.90 Prob:  9.11% Token: | Carol|
Top 2th token. Logit: 14.11 Prob:  4.13% Token: | Bob|
Top 3th token. Logit: 13.55 Prob:  2.37% Token: | Paul|
Top 4th token. Logit: 13.15 Prob:  1.58% Token: | a|
Top 5th token. Logit: 13.02 Prob:  1.38% Token: | the|
Top 6th token. Logit: 12.55 Prob:  0.87% Token: | Dave|
Top 7th token. Logit: 12.42 Prob:  0.76% Token: | Alice|
Top 8th token. Logit: 12.22 Prob:  0.63% Token: | Charles|
Top 9th token. Logit: 12.06 Prob:  0.53% Token: | Ed|


Adding a name in the "source", such that (# names source = 3) > (# names target = 2) reduces the output probability.

In [15]:
example_prompt = "Alice is king. Bob is queen. Paul is prince. The child is Bob. Carol is a teacher. David is a student. John is a janitor. The child is"
example_answer = " David"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'Alice', ' is', ' king', '.', ' Bob', ' is', ' queen', '.', ' Paul', ' is', ' prince', '.', ' The', ' child', ' is', ' Bob', '.', ' Carol', ' is', ' a', ' teacher', '.', ' David', ' is', ' a', ' student', '.', ' John', ' is', ' a', ' jan', 'itor', '.', ' The', ' child', ' is']
Tokenized answer: [' David']


Top 0th token. Logit: 16.85 Prob: 40.96% Token: | John|
Top 1th token. Logit: 15.92 Prob: 16.11% Token: | David|
Top 2th token. Logit: 14.54 Prob:  4.06% Token: | Bob|
Top 3th token. Logit: 13.96 Prob:  2.28% Token: | Paul|
Top 4th token. Logit: 13.68 Prob:  1.71% Token: | the|
Top 5th token. Logit: 13.63 Prob:  1.64% Token: | a|
Top 6th token. Logit: 13.44 Prob:  1.35% Token: | Carol|
Top 7th token. Logit: 13.00 Prob:  0.87% Token: | Jane|
Top 8th token. Logit: 12.94 Prob:  0.82% Token: | Peter|
Top 9th token. Logit: 12.93 Prob:  0.81% Token: | Alice|


Adding a name in the "target", such that (# names target = 3) > (# names source = 3) reduces the output probability AND still has the "most recent subject" being outputted, this time the subject being the "newly added" one.

In [16]:
example_prompt = "Alice is king. Bob is queen. The child is Bob. Carol is a teacher. David is a student. John is a janitor. The child is"
example_answer = " David"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'Alice', ' is', ' king', '.', ' Bob', ' is', ' queen', '.', ' The', ' child', ' is', ' Bob', '.', ' Carol', ' is', ' a', ' teacher', '.', ' David', ' is', ' a', ' student', '.', ' John', ' is', ' a', ' jan', 'itor', '.', ' The', ' child', ' is']
Tokenized answer: [' David']


Top 0th token. Logit: 17.59 Prob: 66.45% Token: | John|
Top 1th token. Logit: 15.77 Prob: 10.74% Token: | David|
Top 2th token. Logit: 13.99 Prob:  1.82% Token: | the|
Top 3th token. Logit: 13.60 Prob:  1.23% Token: | Bob|
Top 4th token. Logit: 13.57 Prob:  1.19% Token: | a|
Top 5th token. Logit: 12.79 Prob:  0.55% Token: | Jane|
Top 6th token. Logit: 12.50 Prob:  0.41% Token: | James|
Top 7th token. Logit: 12.41 Prob:  0.37% Token: | Robert|
Top 8th token. Logit: 12.26 Prob:  0.32% Token: | Jim|
Top 9th token. Logit: 12.20 Prob:  0.30% Token: | J|


Just adding a subject to target doesn’t reduce top prob as much, but still has the “latest subject” be outputted

In [17]:
example_prompt = "Alice is king. Bob is queen. The child is Bob. Carol is a teacher. John is a janitor. David is a student. The child is"
example_answer = " David"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'Alice', ' is', ' king', '.', ' Bob', ' is', ' queen', '.', ' The', ' child', ' is', ' Bob', '.', ' Carol', ' is', ' a', ' teacher', '.', ' John', ' is', ' a', ' jan', 'itor', '.', ' David', ' is', ' a', ' student', '.', ' The', ' child', ' is']
Tokenized answer: [' David']


Top 0th token. Logit: 17.74 Prob: 78.83% Token: | David|
Top 1th token. Logit: 14.46 Prob:  2.95% Token: | John|
Top 2th token. Logit: 13.40 Prob:  1.02% Token: | the|
Top 3th token. Logit: 13.20 Prob:  0.84% Token: | a|
Top 4th token. Logit: 12.96 Prob:  0.66% Token: | Dave|
Top 5th token. Logit: 12.94 Prob:  0.65% Token: | Bob|
Top 6th token. Logit: 12.31 Prob:  0.34% Token: | Jane|
Top 7th token. Logit: 12.27 Prob:  0.33% Token: | Ed|
Top 8th token. Logit: 12.07 Prob:  0.27% Token: | Charles|
Top 9th token. Logit: 11.99 Prob:  0.25% Token: | Don|


Try a bigger number like 6. Then infer that all between would have the same pattern, as there's no reason they seemingly wouldn't.

In [18]:
example_prompt = "Alice is king. Bob is queen. Dennis is mouse. Harold is otter. Julie is lamp. Adam is king. The child is Bob. Carol is a teacher. John is a janitor. Mary is mouse. Rol is janitor. Susan is name. David is a student. The child is"
example_answer = " David"
utils.test_prompt(example_prompt, example_answer, model, prepend_bos=True)

Tokenized prompt: ['<|endoftext|>', 'Alice', ' is', ' king', '.', ' Bob', ' is', ' queen', '.', ' Dennis', ' is', ' mouse', '.', ' Harold', ' is', ' ot', 'ter', '.', ' Julie', ' is', ' lamp', '.', ' Adam', ' is', ' king', '.', ' The', ' child', ' is', ' Bob', '.', ' Carol', ' is', ' a', ' teacher', '.', ' John', ' is', ' a', ' jan', 'itor', '.', ' Mary', ' is', ' mouse', '.', ' R', 'ol', ' is', ' jan', 'itor', '.', ' Susan', ' is', ' name', '.', ' David', ' is', ' a', ' student', '.', ' The', ' child', ' is']
Tokenized answer: [' David']


Top 0th token. Logit: 14.44 Prob: 13.01% Token: | Bob|
Top 1th token. Logit: 13.69 Prob:  6.13% Token: | David|
Top 2th token. Logit: 13.52 Prob:  5.21% Token: | R|
Top 3th token. Logit: 13.11 Prob:  3.45% Token: | John|
Top 4th token. Logit: 12.62 Prob:  2.12% Token: | Adam|
Top 5th token. Logit: 12.46 Prob:  1.79% Token: | Alice|
Top 6th token. Logit: 12.40 Prob:  1.69% Token: | Mary|
Top 7th token. Logit: 12.35 Prob:  1.61% Token: | a|
Top 8th token. Logit: 12.14 Prob:  1.30% Token: | the|
Top 9th token. Logit: 12.02 Prob:  1.16% Token: | Julie|
