In [1]:
import gc

# load model

In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-70m")
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained("EleutherAI/pythia-70m").to('cuda' if torch.cuda.is_available() else 'cpu')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/396 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


config.json:   0%|          | 0.00/567 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/166M [00:00<?, ?B/s]

In [3]:
layer_id = 2

# small data

In [49]:
testdata = torch.tensor([[1, 2, 1],
[3, 3, 5],
[2, 1, 2]]).float()

# get data

In [4]:
%%capture
!pip install datasets

In [5]:
from datasets import load_dataset
dataset = load_dataset("Skylion007/openwebtext", split="train", streaming=True)

Downloading builder script:   0%|          | 0.00/2.73k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/7.35k [00:00<?, ?B/s]

The repository for Skylion007/openwebtext contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/Skylion007/openwebtext.
You can avoid this prompt in future by passing the argument `trust_remote_code=True`.

Do you wish to run the custom code? [y/N] y


In [6]:
def get_next_batch(dataset_iter, batch_size=100):
    batch = []
    for _ in range(batch_size):
        try:
            sample = next(dataset_iter)
            batch.append(sample['text'])
        except StopIteration:
            break
    return batch

dataset_iter = iter(dataset)

In [7]:
accumulated_outputs = None
batch_size = 100
maxseqlen = 100

# Loop through the entire dataset in batches
# while True:
for i in range(1):
    batch = get_next_batch(dataset_iter, batch_size)
    if not batch:
        break  # Stop if there are no more batches

    # Tokenize the batch and move to the device
    inputs = tokenizer(batch, return_tensors="pt", padding=True, truncation=True, max_length=maxseqlen)
    inputs = {k: v.to(model.device) for k, v in inputs.items()}

    with torch.inference_mode():
        outputs = model(**inputs, output_hidden_states=True)
        if accumulated_outputs is None:
            accumulated_outputs = outputs.hidden_states[layer_id]
        else:
            accumulated_outputs = torch.cat((accumulated_outputs, outputs.hidden_states[layer_id]), dim= 0)

    # Clear memory to prevent OOM
    del inputs, outputs
    torch.cuda.empty_cache()  # Only if you're using CUDA
    gc.collect()

In [8]:
# outputs.hidden_states[layer_id].shape

In [9]:
accumulated_outputs.shape

torch.Size([100, 100, 512])

In [10]:
first_dim_reshaped = accumulated_outputs.shape[0] * accumulated_outputs.shape[1]
accumulated_outputs = accumulated_outputs.reshape(first_dim_reshaped, accumulated_outputs.shape[-1]).cpu()
accumulated_outputs.shape

torch.Size([10000, 512])

In [11]:
accumulated_outputs = accumulated_outputs.to('cuda')

# corr fns

In [147]:
def batched_correlation(reshaped_activations_A, reshaped_activations_B, batch_size=100):
    # Ensure tensors are on GPU
    if torch.cuda.is_available():
        reshaped_activations_A = reshaped_activations_A.to('cuda')
        reshaped_activations_B = reshaped_activations_B.to('cuda')

    # Normalize columns of A
    mean_A = reshaped_activations_A.mean(dim=0, keepdim=True)
    std_A = reshaped_activations_A.std(dim=0, keepdim=True)
    normalized_A = (reshaped_activations_A - mean_A) / (std_A + 1e-8)  # Avoid division by zero

    # Normalize columns of B
    mean_B = reshaped_activations_B.mean(dim=0, keepdim=True)
    std_B = reshaped_activations_B.std(dim=0, keepdim=True)
    normalized_B = (reshaped_activations_B - mean_B) / (std_B + 1e-8)  # Avoid division by zero

    num_batches = (normalized_B.shape[1] + batch_size - 1) // batch_size
    max_values = []
    max_indices = []

    for batch in range(num_batches):
        start = batch * batch_size
        print(start)
        end = min(start + batch_size, normalized_B.shape[1])
        batch_corr_matrix = torch.matmul(normalized_A.t(), normalized_B[:, start:end]) / normalized_A.shape[0]
        max_val, max_idx = batch_corr_matrix.max(dim=0)
        max_values.append(max_val)
        max_indices.append(max_idx)  # Adjust indices for the batch offset

        del batch_corr_matrix
        torch.cuda.empty_cache()

    return torch.cat(max_indices), torch.cat(max_values)

## test reverse col order

In [88]:
accumulated_outputs.shape

torch.Size([100000, 512])

In [158]:
highest_correlations_indices_AB, highest_correlations_values_AB = batched_correlation(accumulated_outputs, accumulated_outputs)
sum(highest_correlations_values_AB) / len(highest_correlations_values_AB)

0
100
200
300
400
500


tensor(1.0000, device='cuda:0')

In [148]:
highest_correlations_indices_AB, highest_correlations_values_AB = batched_correlation(accumulated_outputs, accumulated_outputs[:, range(accumulated_outputs.shape[1])[::-1]])
# highest_correlations_indices_AB, highest_correlations_values_AB = batched_correlation(accumulated_outputs, accumulated_outputs)
highest_correlations_indices_AB = highest_correlations_indices_AB.detach().cpu().numpy()
highest_correlations_values_AB = highest_correlations_values_AB.detach().cpu().numpy()

0
100
200
300
400
500


In [90]:
len(highest_correlations_indices_AB)

512

In [149]:
num_unq_pairs = len(list(set(highest_correlations_indices_AB)))
print("% unique: ", num_unq_pairs / len(highest_correlations_indices_AB))

% unique:  1.0


In [150]:
sum(highest_correlations_values_AB) / len(highest_correlations_values_AB)

0.9999899810645729

## test sae

In [131]:
testdata[:,:2]

tensor([[1., 2.],
        [3., 3.],
        [2., 1.]])

In [151]:
highest_correlations_indices_AB, highest_correlations_values_AB = batched_correlation(testdata[:,:2], testdata[:,:2])
highest_correlations_values_AB

0


tensor([0.6667, 0.6667], device='cuda:0')

In [153]:
testdata[:,:]

tensor([[1., 2., 1.],
        [3., 3., 5.],
        [2., 1., 2.]])

In [154]:
testdata

tensor([[1., 2., 1.],
        [3., 3., 5.],
        [2., 1., 2.]])

In [156]:
highest_correlations_indices_AB, highest_correlations_values_AB = batched_correlation(testdata, testdata)
highest_correlations_values_AB

0


tensor([0.6667, 0.6667, 0.6667], device='cuda:0')

In [157]:
sum(highest_correlations_values_AB) / len(highest_correlations_values_AB)

tensor(0.6667, device='cuda:0')

In [125]:
highest_correlations_indices_AB, highest_correlations_values_AB = batched_correlation(reshaped_activations_A[:,:1], reshaped_activations_A[:,:1])
highest_correlations_indices_AB = highest_correlations_indices_AB.detach().cpu().numpy()
highest_correlations_values_AB = highest_correlations_values_AB.detach().cpu().numpy()

0


In [126]:
highest_correlations_values_AB

array([0.], dtype=float32)

In [113]:
highest_correlations_indices_AB, highest_correlations_values_AB = batched_correlation(reshaped_activations_A[:50,:], reshaped_activations_A[:50,:])
highest_correlations_indices_AB = highest_correlations_indices_AB.detach().cpu().numpy()
highest_correlations_values_AB = highest_correlations_values_AB.detach().cpu().numpy()

0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000
4100
4200
4300
4400
4500
4600
4700
4800
4900
5000
5100
5200
5300
5400
5500
5600
5700
5800
5900
6000
6100
6200
6300
6400
6500
6600
6700
6800
6900
7000
7100
7200
7300
7400
7500
7600
7700
7800
7900
8000
8100
8200
8300
8400
8500
8600
8700
8800
8900
9000
9100
9200
9300
9400
9500
9600
9700
9800
9900
10000
10100
10200
10300
10400
10500
10600
10700
10800
10900
11000
11100
11200
11300
11400
11500
11600
11700
11800
11900
12000
12100
12200
12300
12400
12500
12600
12700
12800
12900
13000
13100
13200
13300
13400
13500
13600
13700
13800
13900
14000
14100
14200
14300
14400
14500
14600
14700
14800
14900
15000
15100
15200
15300
15400
15500
15600
15700
15800
15900
16000
16100
16200
16300
16400
16500
16600
16700
16800
16900
17000
17100
17200
17300
17400
17500
17600
17700
17800
17900
18000
18100
18200
18300
18400
18

In [114]:
highest_correlations_indices_AB

array([0, 0, 0, ..., 0, 0, 0])

In [115]:
reshaped_activations_A[0]

tensor([0., 0., 0.,  ..., 0., 0., 0.])

In [116]:
sum(highest_correlations_values_AB) / len(highest_correlations_values_AB)

0.14370404365581635

In [117]:
# Find nonzero indices
nonzero_indices = torch.nonzero(reshaped_activations_A, as_tuple=True)

# Index tensor to get nonzero values
nonzero_values = reshaped_activations_A[nonzero_indices]
print(nonzero_values)


tensor([0.1779, 0.0942, 1.6326,  ..., 0.2344, 0.0575, 0.0702])


In [121]:
nonzero_indices

(tensor([    0,     0,     0,  ..., 99999, 99999, 99999]),
 tensor([    7,    24,    35,  ..., 32134, 32513, 32762]))

# load sae weights

In [12]:
%%capture
!pip install git+https://github.com/EleutherAI/sae.git

In [13]:
from sae.config import SaeConfig
from sae.utils import decoder_impl
from sae import Sae

import pickle
import numpy as np
import matplotlib.pyplot as plt
import json
from fnmatch import fnmatch
from pathlib import Path
from typing import NamedTuple, Optional, Callable, Union, List, Tuple
# from jaxtyping import Float, Int

import einops
import torch
from torch import Tensor, nn
from huggingface_hub import snapshot_download
from natsort import natsorted
from safetensors.torch import load_model, save_model

device = "cuda" if torch.cuda.is_available() else "cpu"

Triton not installed, using eager implementation of SAE decoder.


In [14]:
i = 2
hookpoint = "layers." + str(i)
name = "EleutherAI/sae-pythia-70m-32k"
decoder=True
# model = AutoModelForCausalLM.from_pretrained("EleutherAI/pythia-70m")

repo_path = Path(
            snapshot_download(
                name,
                allow_patterns=f"{hookpoint}/*" if hookpoint is not None else None,
                # allow_patterns = None
            )
        )
if hookpoint is not None:
    repo_path = repo_path / hookpoint
path = Path(repo_path)
cfg_dict = {"expansion_factor": 32, "normalize_decoder": True, "num_latents": 32768, "k": 16, "d_in": 512}
d_in = cfg_dict.pop("d_in")
cfg = SaeConfig(**cfg_dict)

sae = Sae(d_in, cfg, device=device, decoder=decoder)

load_model(
    model=sae,
    filename=str(path / "sae.safetensors"),
    device=str(device),
    strict=decoder,
)

# weight_matrix_np = sae.W_dec.cpu().detach().numpy()

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

layers.2/cfg.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

sae.safetensors:   0%|          | 0.00/134M [00:00<?, ?B/s]

(set(), [])

# get sae actvs

In [15]:
accumulated_outputs = None
batch_size = 100
maxseqlen = 100

# Loop through the entire dataset in batches
# while True:
for i in range(10):
    batch = get_next_batch(dataset_iter, batch_size)
    if not batch:
        break  # Stop if there are no more batches

    # Tokenize the batch and move to the device
    inputs = tokenizer(batch, return_tensors="pt", padding=True, truncation=True, max_length=maxseqlen)
    inputs = {k: v.to(model.device) for k, v in inputs.items()}

    with torch.inference_mode():
        outputs = model(**inputs, output_hidden_states=True)
        if accumulated_outputs is None:
            accumulated_outputs = outputs.hidden_states[layer_id]
        else:
            accumulated_outputs = torch.cat((accumulated_outputs, outputs.hidden_states[layer_id]), dim= 0)

    # Clear memory to prevent OOM
    del inputs, outputs
    torch.cuda.empty_cache()  # Only if you're using CUDA
    gc.collect()

In [16]:
accumulated_outputs.shape

torch.Size([1000, 100, 512])

In [17]:
with torch.inference_mode():
    # outputs = model(**inputs, output_hidden_states=True)
    # hidden_state = outputs.hidden_states[i].to("cuda")
    # feature_acts_model_A = sae.pre_acts(hidden_state)

    feature_acts_model_A = sae.pre_acts(accumulated_outputs)


In [18]:
feature_acts_model_A.shape

torch.Size([1000, 100, 32768])

In [19]:
first_dim_reshaped = feature_acts_model_A.shape[0] * feature_acts_model_A.shape[1]
reshaped_activations_A = feature_acts_model_A.reshape(first_dim_reshaped, feature_acts_model_A.shape[-1]).cpu()
del feature_acts_model_A
del accumulated_outputs
torch.cuda.empty_cache()

# get corrs (not norm in batch)

In [30]:
reshaped_activations_A.shape

torch.Size([100000, 32768])

In [31]:
highest_correlations_indices_AB, highest_correlations_values_AB = batched_correlation(reshaped_activations_A, reshaped_activations_A[:, range(reshaped_activations_A.shape[1])[::-1]])
highest_correlations_indices_AB = highest_correlations_indices_AB.detach().cpu().numpy()
highest_correlations_values_AB = highest_correlations_values_AB.detach().cpu().numpy()

OutOfMemoryError: CUDA out of memory. Tried to allocate 12.21 GiB. GPU 0 has a total capacity of 39.56 GiB of which 1.95 GiB is free. Process 125974 has 37.60 GiB memory in use. Of the allocated memory 37.04 GiB is allocated by PyTorch, and 72.21 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

# normalize in batches

In [26]:
mean_A = reshaped_activations_A.mean(dim=0, keepdim=True)
std_A = reshaped_activations_A.std(dim=0, keepdim=True)

chunk_size = 10000  # Number of rows per chunk
num_chunks = reshaped_activations_A.shape[0] // chunk_size

normalized_A = np.zeros_like(reshaped_activations_A)  # Preallocate the normalized matrix

for i in range(num_chunks):
    print (i, num_chunks)
    start_index = i * chunk_size
    end_index = start_index + chunk_size
    chunk = reshaped_activations_A[start_index:end_index]
    normalized_A[start_index:end_index] = (chunk - mean_A) / (std_A + 1e-8)

# Handle any remaining rows if the data size is not perfectly divisible by chunk_size
if reshaped_activations_A.shape[0] % chunk_size != 0:
    start_index = num_chunks * chunk_size
    chunk = reshaped_activations_A[start_index:]
    normalized_A[start_index:] = (chunk - mean_A) / (std_A + 1e-8)

0 10
1 10
2 10
3 10
4 10
5 10
6 10
7 10
8 10
9 10


In [20]:
def normalize_byChunks(actv_tensor, chunk_size=10000): # chunk_size: Number of rows per chunk
    mean_A = actv_tensor.mean(dim=0, keepdim=True)
    std_A = actv_tensor.std(dim=0, keepdim=True)

    num_chunks = actv_tensor.shape[0] // chunk_size

    normalized_A = np.zeros_like(actv_tensor.cpu())  # Preallocate the normalized matrix
    # normalized_A = actv_tensor.new_zeros(actv_tensor.size())

    for i in range(num_chunks):
        # print (i, num_chunks)
        start_index = i * chunk_size
        print (start_index, num_chunks)
        end_index = start_index + chunk_size
        chunk = actv_tensor[start_index:end_index]
        normalized_A[start_index:end_index] = (chunk - mean_A) / (std_A + 1e-8)

    # Handle any remaining rows if the data size is not perfectly divisible by chunk_size
    if actv_tensor.shape[0] % chunk_size != 0:
        start_index = num_chunks * chunk_size
        chunk = actv_tensor[start_index:]
        normalized_A[start_index:] = (chunk - mean_A) / (std_A + 1e-8)

    return torch.tensor(normalized_A)

In [21]:
def batched_correlation(reshaped_activations_A, reshaped_activations_B, batch_size=100):
    # Ensure tensors are on GPU
    # if torch.cuda.is_available():
    #     reshaped_activations_A = reshaped_activations_A.to('cuda')
    #     reshaped_activations_B = reshaped_activations_B.to('cuda')

    print(reshaped_activations_A.shape)

    # Normalize columns of A
    # mean_A = reshaped_activations_A.mean(dim=0, keepdim=True)
    # std_A = reshaped_activations_A.std(dim=0, keepdim=True)
    # normalized_A = (reshaped_activations_A - mean_A) / (std_A + 1e-8)  # Avoid division by zero

    # # Normalize columns of B
    # mean_B = reshaped_activations_B.mean(dim=0, keepdim=True)
    # std_B = reshaped_activations_B.std(dim=0, keepdim=True)
    # normalized_B = (reshaped_activations_B - mean_B) / (std_B + 1e-8)  # Avoid division by zero

    normalized_A = normalize_byChunks(reshaped_activations_A, chunk_size=10000)
    normalized_B = normalize_byChunks(reshaped_activations_B, chunk_size=10000)

    print(normalized_A.shape)

    num_batches = (normalized_B.shape[1] + batch_size - 1) // batch_size
    max_values = []
    max_indices = []

    for batch in range(num_batches):
        start = batch * batch_size
        print(start)
        end = min(start + batch_size, normalized_B.shape[1])

        batch_corr_matrix = torch.matmul(normalized_A.t(), normalized_B[:, start:end]) / normalized_A.shape[0]
        max_val, max_idx = batch_corr_matrix.max(dim=0)
        max_values.append(max_val)
        max_indices.append(max_idx)  # Adjust indices for the batch offset

        del batch_corr_matrix
        torch.cuda.empty_cache()

    return torch.cat(max_indices), torch.cat(max_values)

In [95]:
highest_correlations_indices_AB, highest_correlations_values_AB = batched_correlation(accumulated_outputs, accumulated_outputs[:, range(accumulated_outputs.shape[1])[::-1]])
# highest_correlations_indices_AB, highest_correlations_values_AB = batched_correlation(accumulated_outputs, accumulated_outputs)
highest_correlations_indices_AB = highest_correlations_indices_AB.detach().cpu().numpy()
highest_correlations_values_AB = highest_correlations_values_AB.detach().cpu().numpy()

torch.Size([100000, 512])
torch.Size([100000, 512])
0
100
200
300
400
500


In [96]:
len(highest_correlations_indices_AB)

512

In [97]:
num_unq_pairs = len(list(set(highest_correlations_indices_AB)))
print("% unique: ", num_unq_pairs / len(highest_correlations_indices_AB))

% unique:  1.0


In [98]:
sum(highest_correlations_values_AB) / len(highest_correlations_values_AB)

0.9999899810645729

In [None]:
highest_correlations_indices_AB, highest_correlations_values_AB = batched_correlation(testdata, testdata[:, range(testdata.shape[1])[::-1]])
highest_correlations_indices_AB = highest_correlations_indices_AB.detach().cpu().numpy()
highest_correlations_values_AB = highest_correlations_values_AB.detach().cpu().numpy()

In [100]:
highest_correlations_indices_AB

array([2, 1, 0])

In [51]:
highest_correlations_indices_AB, highest_correlations_values_AB = batched_correlation(reshaped_activations_A[:, :100], reshaped_activations_A[:, :100][:, range(100)[::-1]])
highest_correlations_indices_AB = highest_correlations_indices_AB.detach().cpu().numpy()
highest_correlations_values_AB = highest_correlations_values_AB.detach().cpu().numpy()
highest_correlations_indices_AB

torch.Size([100000, 100])
0 10
10000 10
20000 10
30000 10
40000 10
50000 10
60000 10
70000 10
80000 10
90000 10
0 10
10000 10
20000 10
30000 10
40000 10
50000 10
60000 10
70000 10
80000 10
90000 10
torch.Size([100000, 100])
0


array([99, 98, 97, 96, 95, 94, 93, 92, 91, 90, 89, 88,  0, 86, 85, 84, 83,
       82,  0, 80, 79, 78, 77, 76, 75, 74, 73, 72, 71, 70, 69, 68, 67, 66,
       65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53,  0, 51, 50, 49,
       48, 47,  0, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32,
       31, 30, 29, 28, 27, 26, 25, 24, 23, 22,  0, 20, 19, 18, 17, 16, 15,
       14, 13, 12,  0, 10,  9,  8,  7,  0,  5,  4,  3,  2,  0,  0])

In [None]:
highest_correlations_indices_AB, highest_correlations_values_AB = batched_correlation(reshaped_activations_A, reshaped_activations_A[:, range(reshaped_activations_A.shape[1])[::-1]])
highest_correlations_indices_AB = highest_correlations_indices_AB.detach().cpu().numpy()
highest_correlations_values_AB = highest_correlations_values_AB.detach().cpu().numpy()

In [23]:
highest_correlations_indices_AB

array([32767, 32766, 32765, ...,     2,     0,     0])

In [26]:
highest_correlations_indices_AB[:100]

array([32767, 32766, 32765, 32764, 32763, 32762,     0, 32760,     0,
       32758, 32757,     0, 32755, 32754, 32753, 32752, 32751, 32750,
       32749, 32748,     0, 32746,     0,     0,     0, 32742, 32741,
       32740, 32739,     0, 32737, 32736, 32735,     0, 32733,     0,
       32731, 32730, 32729, 32728, 32727, 32726, 32725, 32724, 32723,
       32722, 32721, 32720, 32719, 32718, 32717,     0, 32715, 32714,
           0, 32712, 32711, 32710, 32709, 32708, 32707, 32706, 32705,
       32704, 32703, 32702, 32701, 32700, 32699, 32698, 32697, 32696,
       32695, 32694, 32693, 32692, 32691, 32690, 32689, 32688, 32687,
       32686, 32685, 32684, 32683, 32682,     0, 32680, 32679, 23406,
       32677, 32676, 16167, 32674, 32673, 32672, 32671, 32670, 32669,
           0])

In [28]:
highest_correlations_indices_AB[-100:]

array([99, 98, 97, 96, 95, 94, 93, 92, 91, 90, 89, 88,  0, 86, 85, 84, 83,
       82,  0, 80, 79, 78, 77, 76, 75, 74, 73, 72, 71, 70, 69, 68, 67, 66,
       65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53,  0, 51, 50, 49,
       48, 47,  0, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32,
       31, 30, 29, 28, 27, 26, 25, 24, 23, 22,  0, 20, 19, 18, 17, 16, 15,
       14, 13, 12,  0, 10,  9,  8,  7,  0,  5,  4,  3,  2,  0,  0])

In [24]:
num_unq_pairs = len(list(set(highest_correlations_indices_AB)))
print("% unique: ", num_unq_pairs / len(highest_correlations_indices_AB))

% unique:  0.861175537109375


In [25]:
sum(highest_correlations_values_AB) / len(highest_correlations_values_AB)

0.868817300781302

In [29]:
reshaped_activations_A.shape

torch.Size([100000, 32768])

In [None]:
inds, vals = batched_correlation(reshaped_activations_A, reshaped_activations_A)

In [32]:
inds

tensor([    0,     0,     2,  ..., 32765, 32766, 32767])

In [35]:
len(list(set(inds)))

32768

In [38]:
inds[:100]

tensor([ 0,  0,  2,  3,  4,  5,  0,  7,  8,  9, 10,  0, 12, 13, 14, 15, 16, 17,
        18, 19, 20,  0, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
        36, 37, 38, 39, 40, 41, 42, 43, 44, 45,  0, 47, 48, 49, 50, 51,  0, 53,
        54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
        72, 73, 74, 75, 76, 77, 78, 79, 80,  0, 82, 83, 84, 85, 86,  0, 88, 89,
        90, 91, 92, 93, 94, 95, 96, 97, 98, 99])

In [39]:
len(list(set(inds[:100].tolist())))

92

In [37]:
len(list(set(inds[:100])))

100

In [33]:
num_unq_pairs = len(list(set(inds)))
print("% unique: ", num_unq_pairs / len(inds))

% unique:  1.0


In [34]:
sum(vals) / len(vals)

tensor(0.8688)

# find how many cols of tensor are all 0

In [27]:
import numpy as np

def count_zero_columns(tensor):
    # Check if all elements in each column are zero
    zero_columns = np.all(tensor == 0, axis=0)
    # Count True values in the zero_columns array
    zero_cols_indices = np.where(zero_columns)[0]
    return np.sum(zero_columns), zero_cols_indices

# Count zero columns
zero_cols_count, zero_cols_indices = count_zero_columns(reshaped_activations_A.cpu().numpy())
print("Number of zero columns:", zero_cols_count), zero_cols_indices

Number of zero columns: 4297


(None, array([    0,     1,     6, ..., 32756, 32759, 32761]))

In [21]:
len(reshaped_activations_A[0])

32768

In [22]:
4297 / 32768

0.131134033203125