In [1]:
import os
# os.environ["CUDA_VISIBLE_DEVICES"]= "1"


In [2]:
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from tqdm.auto import tqdm
import datasets

plt.style.use("ggplot")

from typing import Optional, List, Dict, Union

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import Tensor
from torch import optim
from torch.utils.data import random_split, DataLoader, TensorDataset

from pathlib import Path
import transformers
from peft import get_peft_config, get_peft_model, LoraConfig, TaskType, LoftQConfig
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import Dataset

from loguru import logger

logger.add(os.sys.stderr, format="{time} {level} {message}", level="INFO")


  from .autonotebook import tqdm as notebook_tqdm


1

In [3]:
# load my code
%load_ext autoreload
%autoreload 2

from src.config import ExtractConfig
from src.prompts.prompt_loading import load_preproc_dataset
from src.models.load import load_model
# from src.prompts.prompt_loading import load_prompt_structure


In [4]:
# params
max_epochs = 100
device = "cuda:0"

# quiet please
torch.set_float32_matmul_precision("medium")
import warnings

warnings.filterwarnings("ignore", ".*does not have many workers.*")
warnings.filterwarnings(
    "ignore", ".*sampler has shuffling enabled, it is strongly recommended that.*"
)
warnings.filterwarnings("ignore", ".*has been removed as a dependency of.*")


In [5]:
# params
cfg = ExtractConfig(
    batch_size=2,
    max_examples=(400, 400),
    intervention_fit_examples=160,
)
model, tokenizer = load_model(
    cfg.model, disable_exllama=False, device=device,
)


[32m2023-12-18 10:28:32.914[0m | [1mINFO    [0m | [36msrc.models.load[0m:[36mverbose_change_param[0m:[36m16[0m - [1mtokenizer does not have use_cache[0m
2023-12-18T10:28:32.914981+0800 INFO tokenizer does not have use_cache
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
[32m2023-12-18 10:28:33.244[0m | [1mINFO    [0m | [36msrc.models.load[0m:[36mverbose_change_param[0m:[36m21[0m - [1mchanging pad_token_id from None to 0[0m
2023-12-18T10:28:33.244305+0800 INFO changing pad_token_id from None to 0
[32m2023-12-18 10:28:33.244[0m | [1mINFO    [0m | [36msrc.models.load[0m:[36mverbose_change_param[0m:[36m21[0m - [1mchanging padding_side from right to left[0m
2023-12-18T10:28:33.244820+0800 INFO changing padding_side from right to left
[32m2023-12-18 10:28:33.245[0m | [1mINFO    [0m | [36msrc.models.load[0m:[36mverbose_change_param[0m:[36m21[0m - [1mchanging truncation_side fr

In [6]:
# model.to(device)


In [7]:
# TODO I would like to only have biases, but for now lets just try a very small intervention on the last parts of a layer...
peft_config = LoraConfig(
    target_modules=['out_proj', 'mlp.fc2',], # only the layers that go directly to the residual
    bias='lora_only',
    task_type=TaskType.CAUSAL_LM,
    inference_mode=False,
    r=1,
    lora_alpha=1,
    lora_dropout=0.,
    # layers_pattern='dsf4gg',
)
# model = get_peft_model(model, peft_config)



In [8]:
# model.to(device)


In [9]:
model.add_adapter(peft_config)
# model.print_trainable_parameters()


In [10]:
model.device


device(type='cuda', index=0)

In [11]:
N = sum(cfg.max_examples)
ds_name = "amazon_polarity"
ds_tokens = load_preproc_dataset(
    ds_name,
    tokenizer,
    N=N,
    seed=cfg.seed,
    num_shots=cfg.num_shots,
    max_length=cfg.max_length,
    prompt_format=cfg.prompt_format,
)


[32m2023-12-18 10:28:36.441[0m | [1mINFO    [0m | [36msrc.prompts.prompt_loading[0m:[36mload_preproc_dataset[0m:[36m364[0m - [1mmedian token length: 433.0 for amazon_polarity. max_length=1000[0m
2023-12-18T10:28:36.441848+0800 INFO median token length: 433.0 for amazon_polarity. max_length=1000
[32m2023-12-18 10:28:36.443[0m | [1mINFO    [0m | [36msrc.prompts.prompt_loading[0m:[36mload_preproc_dataset[0m:[36m368[0m - [1mtruncation rate: 0.00% on amazon_polarity[0m
2023-12-18T10:28:36.443448+0800 INFO truncation rate: 0.00% on amazon_polarity
Filter: 100%|██████████| 2402/2402 [00:01<00:00, 2114.02 examples/s]
[32m2023-12-18 10:28:37.594[0m | [1mINFO    [0m | [36msrc.prompts.prompt_loading[0m:[36mload_preproc_dataset[0m:[36m377[0m - [1mnum_rows (after filtering out truncated rows) 2402=>2402[0m
2023-12-18T10:28:37.594298+0800 INFO num_rows (after filtering out truncated rows) 2402=>2402


## Lora train

In [12]:
# from https://github.com/jonkrohn/NLP-with-LLMs/blob/main/code/Finetune-T5-on-GPU.ipynb
from pytorch_optimizer import Ranger21
import lightning.pytorch as pl
from torchmetrics import Metric, MetricCollection, Accuracy, AUROC
from torchmetrics.functional import accuracy


In [13]:
# to_tensor = lambda x: x # torch.from_numpy(x).float()
# to_ds = lambda hs0, hs1, y: TensorDataset(to_tensor(hs0), to_tensor(hs1), to_tensor(y))

class DeceptionDataModule(pl.LightningDataModule):

    def __init__(self,
                 ds: Dataset,
                 batch_size: int=32,
                #  x_cols = ['input_ids', 'attention_mask', 'label_true', 'label_instructed', 'choice_ids'],
                ):
        super().__init__()
        self.save_hyperparameters(ignore=["ds"])
        self.ds = ds.with_format('torch')
        # self.x_cols = x_cols
        self.setup('train')

    def setup(self, stage: str):
        h = self.hparams

        n = len(self.ds)
        self.splits = {
            'train': (0, int(n * 0.5)),
            'val': (int(n * 0.5), int(n * 0.75)),
            'test': (int(n * 0.75), n),
        }

        self.datasets = {key: self.ds.select(range(start, end)) for key, (start, end) in self.splits.items()}

    def create_dataloader(self, ds, shuffle=False):
        return DataLoader(ds, batch_size=self.hparams.batch_size, drop_last=False, shuffle=shuffle)

    def train_dataloader(self):
        return self.create_dataloader(self.datasets['train'], shuffle=True)

    def val_dataloader(self):
        return self.create_dataloader(self.datasets['val'])

    def test_dataloader(self):
        return self.create_dataloader(self.datasets['test'])

# https://huggingface.co/docs/datasets/use_with_pytorch#data-loading


In [14]:



class LoraFinetuner(pl.LightningModule):
    def __init__(
        self, model: AutoModelForCausalLM, tokenizerm: AutoTokenizer, total_steps: int, lr=4e-3, weight_decay=1e-9
    ):
        super().__init__()
        self.model = model
        self.tokenizer = tokenizer
        self.save_hyperparameters(
            ignore=["model", 'tokenizer'],
        )

    def forward(self, batch):
        b_in = dict(input_ids=batch['input_ids'],
            attention_mask=batch['attention_mask'],)
        # b_in = {k: v.to(self.model.device) for k, v in b_in.items()}
         
        return self.model(
            **b_in,
            use_cache=False,
            output_hidden_states=True,
            return_dict=True
        )
        # odict_keys(['logits', 'hidden_states', 'attentions'])

    def _step(self, batch, batch_idx=0, stage="train"):
        self.model.disable_adapters()
        with torch.no_grad():
            out = self(batch)
            log_probs = torch.log_softmax(out['logits'][:, -1,], -1)
            del out
        
        self.model.enable_adapters()
        out2 = self(batch)
        log_probs2 = torch.log_softmax(out2['logits'][:, -1,], -1)

        if stage == "pred":
            return log_probs2.exp()
        
        
        # get loss, so that our adapter returns switched probs for our choices (e.g. Yes <> No)
        id_neg = batch['choice_ids'][:, 0]
        id_pos = batch['choice_ids'][:, 1]

        opposite_log_probs = log_probs.clone()
        for i in range(id_neg.shape[1]):
            opposite_log_probs[:, id_neg[:, i]] = log_probs[:, id_pos[:, i]]
        loss = F.kl_div(log_probs2, opposite_log_probs, reduction='batchmean', log_target=True)

        self.log(f"{stage}/loss", loss, on_epoch=True, on_step=False, prog_bar=True)
        self.log(
            f"{stage}/n", len(id_neg), on_epoch=True, on_step=False, reduce_fx=torch.sum
        )
        return loss

    def training_step(self, batch, batch_idx=0, dataloader_idx=0):
        return self._step(batch, batch_idx)

    def validation_step(self, batch, batch_idx=0, dataloader_idx=0):
        return self._step(batch, batch_idx, stage="val")

    def predict_step(self, batch, batch_idx=0, dataloader_idx=0):
        return self._step(batch, batch_idx, stage="pred").cpu().detach()

    def test_step(self, batch, batch_idx=0, dataloader_idx=0):
        return self._step(batch, batch_idx, stage="test")

    def configure_optimizers(self):
        """use ranger21 from  https://github.com/kozistr/pytorch_optimizer"""
        optimizer = Ranger21(
            self.parameters(),
            lr=self.hparams.lr,
            weight_decay=self.hparams.weight_decay,
            num_iterations=self.hparams.total_steps,
        )
        return optimizer


## Train

In [15]:
dm = DeceptionDataModule(ds_tokens, batch_size=cfg.batch_size)
dm


<__main__.DeceptionDataModule at 0x7f8f58429390>

In [16]:
dl_train = dm.train_dataloader()
dl_val = dm.val_dataloader()
b = next(iter(dl_train))
print(b.keys(), b['input_ids'].shape)
c_in = b['input_ids'].shape[1]
c_in


dict_keys(['ds_string', 'example_i', 'answer', 'messages', 'answer_choices', 'template_name', 'label_true', 'label_instructed', 'instructed_to_lie', 'sys_instr_name', 'question', 'input_ids', 'attention_mask', 'truncated', 'length', 'prompt_truncated', 'choice_ids']) torch.Size([2, 1000])


1000

In [17]:
# from accelerate import Accelerator

# accelerator = Accelerator(device_placement=False)
# model, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(
#     model, None, dl_train, dl_val, device_placement=[False, False, False, False]
# )


In [18]:
b = next(iter(dl_train))
print(b.keys(), b['input_ids'].shape)
c_in = b['input_ids'].shape[1]
c_in


dict_keys(['ds_string', 'example_i', 'answer', 'messages', 'answer_choices', 'template_name', 'label_true', 'label_instructed', 'instructed_to_lie', 'sys_instr_name', 'question', 'input_ids', 'attention_mask', 'truncated', 'length', 'prompt_truncated', 'choice_ids']) torch.Size([2, 1000])


1000

In [19]:
net = LoraFinetuner(model, tokenizer, lr=3e-4, weight_decay=1e-9, total_steps=len(dl_train)*max_epochs)

print(c_in)
# net.model.enable_adapters()


# net = accelerator.prepare(
#     net, device_placement=[False]
# )


1000


In [20]:
# we want to init lightning early, so it inits accelerate
trainer1 = pl.Trainer(
    precision="16-true",
    # precision="16-mixed",
    # precision="b16-mixed",
    # precision="b16-mixed",
    # gradient_clip_val=20,

    # accelerator="auto",
    # devices="1",

    accelerator="gpu",
    devices=[0],

    max_epochs=max_epochs,
    log_every_n_steps=3,
    # enable_progress_bar=False, 
    enable_model_summary=False
)


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [21]:
net.model.device


device(type='cuda', index=0)

In [22]:
# b_in = dict(input_ids=b['input_ids'],
#     attention_mask=b['attention_mask'],)
# b_in = {k: v.to(net.model.device) for k, v in b_in.items()}

# with torch.no_grad():
#     y = net(b_in, )
# y.keys()


In [23]:
# from torchinfo import summary
# net.model.disable_adapters()
# summary(net, input_data=(dict(input_ids=b['input_ids'], attention_mask=b['attention_mask'],),), depth=4)


In [24]:

trainer1.fit(model=net, train_dataloaders=dl_train, val_dataloaders=dl_val);


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Sanity Checking DataLoader 0:  50%|█████     | 1/2 [00:00<00:00,  1.08it/s]

/media/wassname/SGIronWolf/projects5/elk/sgd_probes_are_lie_detectors/.venv/lib/python3.11/site-packages/lightning/pytorch/utilities/data.py:77: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 2. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.


Epoch 0:   0%|          | 0/200 [00:00<?, ?it/s]                           

OutOfMemoryError: CUDA out of memory. Tried to allocate 50.00 MiB. GPU 0 has a total capacty of 23.67 GiB of which 1.13 GiB is free. Including non-PyTorch memory, this process has 21.46 GiB memory in use. Of the allocated memory 20.92 GiB is allocated by PyTorch, and 235.11 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
df_hist = read_metrics_csv(trainer1.logger.experiment.metrics_file_path).ffill().bfill()
for key in ["loss_rec"]:
    df_hist[[c for c in df_hist.columns if key in c]].plot(logy=True)


In [None]:

# predict
dl_test = dm.test_dataloader()
# print(f"training with x_feats={x_feats} with c={c}")
rs = trainer2.test(net, dataloaders=[dl_train, dl_val, dl_test, dl_oos])

testval_metrics = calc_metrics(dm, trainer2, net, use_val=True)
rs = rename(rs, ["train", "val", "test", "oos"])
# rs['test'] = {**rs['test'], **test_metrics}
rs["test"]["acc_lie_lie"] = testval_metrics["acc_lie_lie"]
rs["testval_metrics"] = rs["test"]
