# M1 useful nodes (Broken)
This notebook identifies M1 nodes (attention heads and MLPs) that, when ablated, cause a decrease in model prediction accuracy. That is these nodes are needed (aka useful) for accurate predictions.


This notebook was:
- Developed on Google Colab using an A100 with 40GB GPU and 80GB system RAM.
- Runs with GPT2/TinyStories/Qwen/Llama/Granite/SmolLM with base/CS1/CS2/CS3.
- Requires a GITHUB_TOKEN secret to access Martian quanta_text_to_sql code repository.
- Requires a HF_TOKEN secret to access Martian HuggingFace repository.

This notebook relies on nnsight. Refer:
- https://nnsight.net/notebooks/tutorials/walkthrough/#Batching
- https://nnsight.net/notebooks/tutorials/walkthrough/#Looping

# Import libraries
Imports standard libraries. Do not read.

In [None]:
# https://nnsight.net/
# Access 0.4 prerelease version (as at Dec 2024)
# !pip install nnsight==0.4.0.dev0
!pip install -U nnsight

In [None]:
from IPython.display import clear_output
import einops
import torch
import numpy as np
import plotly.express as px
import plotly.io as pio
pio.renderers.default = "colab"
import matplotlib.pyplot as plt
import tqdm.auto as tqdm

import nnsight
from nnsight import LanguageModel, util

In [None]:
from getpass import getpass
from google.colab import userdata
import gc
import weakref

In [None]:
github_token = userdata.get("GITHUB_TOKEN")

# Install the private repository using the token
!pip install --upgrade git+https://{github_token}@github.com/withmartian/quanta_text_to_sql.git

import QuantaTextToSql as qts

# Select model, command set and feature to investigate


In [None]:
model_num = 1                 # 0=GPT2, 1=TinyStories, 2=Qwen, 3=Llama, 4=Granite, 5=SmolLM
cs_num = 1                    # 0=BaseModel, 1=CS1, 2=CS2 or 3=CS3
max_new_tokens = 100          # Max number of tokens to generate

# Run m1, m2 and m3 models

In [None]:
if model_num > 0:

    if model_num == 1:
        the_tokenizer, the_model = qts.load_sql_interp_model(model_num, cs_num, auth_token=userdata.get("HF_TOKEN"), use_flash_attention=False)
        model = LanguageModel(the_model, the_tokenizer)
        model.tokenizer = the_tokenizer
    else:
        model = LanguageModel(qts.sql_interp_model_location(model_num, cs_num), device_map="auto")

    clear_output()
    print(model)

In [None]:
N_BATCH = 3

if model_num > 0:
    # Generate a batch of prompts
    if cs_num == 0 or cs_num == 1:
      examples = qts.generate_cs1(N_BATCH)
    elif cs_num == 2:
      examples = qts.generate_cs2(N_BATCH)
    elif cs_num == 3:
      examples = qts.generate_cs3(N_BATCH)

In [None]:
def calc_max_prompt_tokens(examples):

    max_prompt_tokens = 0
    for example in examples:
        prompt = example.get_alpaca_prompt()
        tokens = model.tokenizer(prompt)["input_ids"]
        max_prompt_tokens = max(max_prompt_tokens, len(tokens))

    return max_prompt_tokens

max_prompt_tokens = calc_max_prompt_tokens(examples)

In [None]:
N_LAYERS = len(model.transformer.h)
N_HEADS = model.config.num_attention_heads # 16 if model_num == 1 else 7 if model_num == 2 else 16
D_MODEL = model.config.hidden_size
D_HEAD = D_MODEL // N_HEADS # 64 if model_num == 1 else 128 if model_num == 2 else 128

In [None]:
print("N_BATCH:", N_BATCH, "N_LAYERS:", N_LAYERS, "N_HEADS:", N_HEADS, "D_MODEL:", D_MODEL, "D_HEAD:", D_HEAD, "max_prompt_tokens:", max_prompt_tokens)

In [None]:
# Generate the list of experiments to run
g_run_list = []
for example in examples:
    prompt = example.get_alpaca_prompt()
    answer = example.sql_statement
    num_prompt_tokens = len(model.tokenizer(prompt)["input_ids"])
    for layer_idx in range(N_LAYERS):
        for head_idx in range(N_HEADS):
            for token_idx in range(num_prompt_tokens):
                g_run_list.append([prompt, answer, layer_idx, head_idx, token_idx])

num_experiments = len(g_run_list)
print("Number of experiments:", num_experiments)

In [None]:
try_results = np.zeros((N_LAYERS, N_HEADS, max_prompt_tokens))
fail_results = np.zeros((N_LAYERS, N_HEADS, max_prompt_tokens))

for item_num in tqdm.tqdm(range(num_experiments)):

    run_item = g_run_list[item_num]
    run_prompt, run_answer, run_layer_idx, run_head_idx, run_token_idx = run_item

    inputs = model.tokenizer(run_prompt, return_tensors="pt", padding=True)

    start = run_head_idx * D_HEAD
    end = (run_head_idx + 1) * D_HEAD

    with model.generate(inputs['input_ids'], max_new_tokens=max_new_tokens,
                       pad_token_id=model.tokenizer.eos_token_id) as tracer:

        # Zero out just the portion of the output corresponding to this head
        model.transformer.h[run_layer_idx].output[0][:, run_token_idx, start:end] = 0

        final_output = model.generator.output.save()

    final_output = final_output.detach().cpu().numpy()
    decoded_output = model.tokenizer.decode(final_output[0], skip_special_tokens=True)

    # Did the output change?
    try_results[run_layer_idx, run_head_idx, run_token_idx] += 1
    if run_prompt + run_answer != decoded_output:
        #print("Input:", run_prompt.replace('\n', ' '), run_answer.replace('\n', ' '))
        #print("Output:", decoded_output.replace('\n', ' '))
        fail_results[run_layer_idx, run_head_idx, run_token_idx] += 1

In [None]:
# Compute the failure rate as percentage
failure_rate = (fail_results / (try_results + 1e-10)) * 100
failure_rate = np.round(failure_rate, 2)
print("Failure Rate (%):")
print(failure_rate)

In [None]:
# Print summary of results
import matplotlib.pyplot as plt

for layer_idx in range(N_LAYERS):
    plt.imshow(failure_rate[layer_idx], cmap="viridis", aspect="auto")
    plt.colorbar(label="Percentage Change")
    plt.xlabel("Token Position")
    plt.ylabel("Layer")
    plt.title("Percentage of Output Changes by Zeroing Activations in Layer " + str(layer_idx))
    plt.show()