## Import packages and specify local GPU for training

In [5]:
import torch
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle

from mpl_toolkits.axes_grid1 import make_axes_locatable
from torch.utils.data import Dataset, DataLoader, random_split
import torch.optim as optim

import EWOthello.utils.plot_helpers as plt_util
from EWOthello.data.othello import *
from EWOthello.mingpt.dataset import ProbingDataset, CharDataset # AK's mingpt data child 
from EWOthello.mingpt.model import GPT, GPTConfig, GPTforProbing, GPTforProbing_v2
from EWOthello.mingpt.probe_trainer import Trainer, TrainerConfig
from EWOthello.mingpt.utils import set_seed, sample # AKs helpers for sampling predictions
from EWOthello.mingpt.probe_model import BatteryProbeClassification
set_seed(44)

print(torch.cuda.is_available())
# device = "mps"
device = torch.cuda.current_device()
print(torch.cuda.get_device_name(device))

False


## Test Generation of Probe Datasets

In [6]:
othello = get(ood_num=-1, data_root=None, num_preload=1) # 11 corresponds to over 1 million games
game_dataset = CharDataset(othello) 

Max num files: 230; Use_num: 1
['gen10e5__20220324_155234.pickle']


Mem Used: 0.2633 GB: 100%|██████████| 1/1 [00:00<00:00,  1.79it/s]


Deduplicating...
Deduplicating finished with 100000 games left
Using 20 million for training, 0 for validation
Dataset created has 100000 sequences, 61 unique words.


In [7]:
mconf = GPTConfig(game_dataset.vocab_size, game_dataset.block_size, n_layer=8, n_head=8, n_embd=512)
model_probe = GPTforProbing(mconf, probe_layer=6)
model_probe.load_state_dict(torch.load("../EWOthello/ckpts/DeanKLi_GPT_Synthetic_1L8H/GPT_Synthetic_1Layers_8Heads.ckpt"))   
model_probe = model_probe.to(device)

properties_modifier_matrix = np.ones((59, 64))
for i in range(59):
    if i % 2 == 1:
        properties_modifier_matrix[i, :] *= -1.0

property_container_v2 = []
property_container = []
act_container = []
for x, _ in tqdm(game_dataset):
    # Convert the game index sequence to board number sequence for use with the othello board class
    tbf = [game_dataset.itos[_] for _ in x.tolist()]
    valid_until = tbf.index(-100) if -100 in tbf else 999

    # Get the board state vectors
    a = OthelloBoardState()
    properties = a.get_gt(tbf[:valid_until], "get_state")
    property_container.extend(properties)
    properties_v2 = (np.array(properties) - 1.0) * properties_modifier_matrix[:valid_until, :] + 1.0
    property_container_v2.extend(properties_v2.tolist())

    # Get the activation vectors
    act = model_probe(x[None, :].to(device))[0].detach().cpu().split(1, dim=0)    
    act_container.extend(act)
    break

### Visualize
num_disp = 5
fig = plt.figure(figsize=(num_disp*2, 4))
ax = plt_util.addAxis(fig, 2, num_disp)
for i in range(num_disp):
    plt_util.plot_game_discs(np.reshape(property_container[i],[8,8]), ax[i])
    plt_util.plot_game_discs(np.reshape(property_container_v2[i], [8,8]), ax[i+num_disp])
    ax[i].set_title(f"Moves Played: {i+1}")

plt.tight_layout()
ax[0].set_ylabel("Board State (Old)")
ax[num_disp].set_ylabel("New Representation")
plt_util.format_ax_boardImage(ax)

RuntimeError: Attempting to deserialize object on a CUDA device but torch.cuda.is_available() is False. If you are running on a CPU-only machine, please use torch.load with map_location=torch.device('cpu') to map your storages to the CPU.