# Lets just do supervised learning

Since we are looking at pairs with random permuations (from dropout), we can't use CCS. This is because our probabilities do not add to one.

People question if unsupervised learning bings anything to the table anyway, so lets start with supervised...


links:
- [loading](https://github.com/deep-diver/LLM-As-Chatbot/blob/main/models/alpaca.py)
- [dict](https://github.com/deep-diver/LLM-As-Chatbot/blob/c79e855a492a968b54bac223e66dc9db448d6eba/model_cards.json#L143)
- [prompt_format](https://github.com/deep-diver/PingPong/blob/main/src/pingpong/alpaca.py)

In [None]:

import copy
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
plt.style.use('ggplot')

import random
from typing import Optional, List, Dict, Union

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import Tensor
from torch import optim
from torch.utils.data import random_split, DataLoader, TensorDataset

import pickle
import hashlib
from pathlib import Path

from datasets import load_dataset
import datasets

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForMaskedLM, AutoModelForCausalLM, AutoConfig
import transformers
from transformers.models.auto.modeling_auto import AutoModel
from transformers import LogitsProcessorList


import lightning.pytorch as pl
from dataclasses import dataclass

from sklearn.linear_model import LogisticRegression
# from scipy.stats import zscore
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score
from sklearn.preprocessing import RobustScaler

from tqdm.auto import tqdm
import gc
import os

from loguru import logger
logger.add(os.sys.stderr, format="{time} {level} {message}", level="INFO")


transformers.__version__

# Model

Chosing:
- https://old.reddit.com/r/LocalLLaMA/wiki/models
- https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard
- https://github.com/deep-diver/LLM-As-Chatbot/blob/main/model_cards.json


A uncensored and large one might be best for lying.

In [None]:
from peft import PeftModel

In [None]:
# leaderboard https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard
model_options = dict(
    device_map="auto", 
    load_in_4bit=True,
    # load_in_8bit=True,
    torch_dtype=torch.float16,
    trust_remote_code=True,
    use_safetensors=False,
    # use_cache=False,
)

# so I need to use either pythia, stablelm, or tiiuae/falcon-7b-instruct to get dropout...
# moel_repo = "stabilityai/stablelm-tuned-alpha-7b" # poor performance

# https://github.com/deep-diver/LLM-As-Chatbot/blob/main/models/falcon.py
model_repo = "tiiuae/falcon-7b-instruct"
# model_repo = "tiiuae/falcon-7b"
# model_repo = "togethercomputer/RedPajama-INCITE-7B-Instruct"
# model_repo = "OpenAssis/tant/oasst-sft-4-pythia-12b-epoch-3.5"
# model_repo = "OpenAssistant/falcon-7b-sft-top1-696"
# model_repo = "openaccess-ai-collective/manticore-13b"
# model_repo = "TheBloke/Wizard-Vicuna-13B-Uncensored-HF"
# model_repo = "dvruette/llama-13b-pretrained-dropout"
# model_repo = "elinas/llama-13b-hf-transformers-4.29" # no dropout
# lora_repo = "LLMs/AlpacaGPT4-LoRA-13B-elina"
model_repo = "bigcode/starcoderplus"
model_repo = "HuggingFaceH4/starchat-beta"
model_repo = "WizardLM/WizardCoder-15B-V1.0"
# model_repo= "~/.cache/huggingface/hub/models--HuggingFaceH4--starchat-beta"
# lora_repo = None
lora_repo = None

config = AutoConfig.from_pretrained(model_repo, trust_remote_code=True,)
print(config)
# config.attn_pdrop=0.3
# config.embd_pdrop=0.3
# config.resid_pdrop=0.3
config.use_cache = False
tokenizer = AutoTokenizer.from_pretrained(model_repo)
model = AutoModelForCausalLM.from_pretrained(model_repo, config=config, **model_options)

if lora_repo is not None:
    # https://github.com/tloen/alpaca-lora/blob/main/generate.py#L40
    from peft import PeftModel
    model = PeftModel.from_pretrained(
        model,
        lora_repo, 
        torch_dtype=torch.float16,
        lora_dropout=0.2,
        device_map='auto'
    )
    
# if not mode_8bit and not mode_4bit:
#     model.half()

In [None]:
model

In [None]:
# https://github.com/deep-diver/LLM-As-Chatbot/blob/main/models/falcon.py
print(tokenizer.pad_token_id)
if tokenizer.pad_token_id is None:
    tokenizer.pad_token_id = 204 # <unk> https://github.com/deep-diver/LLM-As-Chatbot/blob/main/models/alpaca.py
tokenizer.padding_side = "left"

# Params

In [None]:
# Params
# N_SAMPLES = 4000
BATCH_SIZE = 8 # None # None means auto # 6 gives 16Gb/25GB. where 10GB is the base model. so 6 is 6/15
N_SHOTS = 3
USE_MCDROPOUT = True
dataset_n = 200

try:
    # num_layers = len(model.model.layers)
    num_layers = model.config.n_layer
    print(num_layers)
except AttributeError:
    try:
        num_layers = len(model.base_model.model.model.layers)
        print(num_layers)
    except:
        num_layers = 10
        
stride = 2
extract_layers = tuple(range(2, num_layers-2, stride)) + (num_layers-2,)
extract_layers, num_layers

In [None]:
# get the tokens for 0 and 1, we will use these later...
# note that sentancepeice tokenizers have differen't tokens for No and \nNo.
token_n = "negative"
token_y = "positive"
id_n, id_y = tokenizer(f'\n{token_n}', add_special_tokens=True)['input_ids'][-1], tokenizer(f'\n{token_y}', add_special_tokens=True)['input_ids'][-1]
assert tokenizer.decode([id_n])==token_n
assert tokenizer.decode([id_y])==token_y
id_n, id_y

# Load Dataset

In [None]:
# Let's just try IMDB for simplicity
dataset = load_dataset("amazon_polarity")
data = dataset['test']

# Define Prompt

- Lillian Wang's guide https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/
- Elvi's guide https://www.promptingguide.ai/techniques/fewshot


In [None]:
example_prompts = dataset['train'].shuffle(seed=42)

In [None]:

def random_example():
    """gets a random example from shuffled train and formats it a bit"""
    ex = dict(content=('blah blah '*302))
    while len(tokenizer(ex['content']).input_ids) > 150: # 95% are below this 250, 70% below 150
        i = np.random.randint(example_prompts.num_rows)
        ex = example_prompts[i]
    input = f"Title: \"{ex['title']}\". Content: \"{ex['content']}\""
    return input, ex['label']==1

print(random_example()[0])

In [None]:
prefix_lie = prefix_true = prefix = f"""The classic puzzle of the two guards. It presents a scenario where two guards are involved in a conflict of truths and lies. Here's the setup:

There are two guards standing at two separate doors. One door leads to safety, while the other leads to danger. One guard always tells the truth, and the other guard always lies. You don't know which guard is which or which door leads to safety."""

char_lie = "Left Guard"
char_true = "Left Guard"

def prompt_format_alpaca(input:str, response:str="", include_prefix:bool=False, lie:Optional[bool]=None):
    """alpaca format"""
    prefix = ""
    if include_prefix: prefix = (prefix_lie if lie else prefix_true) + "\n\n"
    char = char_lie if lie else char_true
    instruction = f'Following the above instructions, using your own character, classify the sentiment of the given movie review, "positive" or "negative".'
    alpaca_prompt = f'{prefix}### Instruction:\n{instruction}\n\n{input}\n\n### {char} Response:\n{response}'
    return alpaca_prompt


def prompt_format_oa(input:str, response:str="", include_prefix:bool=False, lie:Optional[bool]=None):
    """alpaca format"""
    prefix = ""
    if include_prefix: prefix = (prefix_lie if lie else prefix_true) + "<|endoftext|>"
    char = char_lie if lie else char_true
    instruction = f'Following the above instructions, using your own character, classify the sentiment of the given movie review, "positive" or "negative".'
    if not response=="": response+="<|endoftext|>"
    alpaca_prompt = f'{prefix}<|prompter|>{instruction}\n{input}<|endoftext|><|assistant|>{char} Response:\n{response}'
    return alpaca_prompt

def prompt_format_falcon(input:str, response:str="", include_prefix:bool=False, lie:Optional[bool]=None):
    prefix = ""
    if include_prefix: prefix = "Instruction:\n" + (prefix_lie if lie else prefix_true) + "\n\n"
    char = char_lie if lie else char_true
    instruction = f'Following the above instructions, using your own character, classify the sentiment of the given movie review, "positive" or "negative".'
    alpaca_prompt = f'{prefix}Question:\n{instruction}\n\nContext:\n{input}\n\nAnswer:\n{response}'
    return alpaca_prompt


def prompt_format_vicuna(input:str, question:Optional[bool]=None, response:str="", include_prefix:bool=False, lie:Optional[bool]=None):
    """
    vicuna format
    
    https://github.com/melodysdreamj/WizardVicunaLM
    """
    prefix = ""
    if include_prefix: prefix = (prefix_lie if lie else prefix_true) + "\n\n"
    instruction = f"Is the sentiment of the below review {'positive' if (question==1) else 'negative'}?"
    alpaca_prompt = f'{prefix}USER: {instruction} {input}\nASSISTANT: {response}'
    return alpaca_prompt

# def prompt_format_vicuna2(input:str, question:Optional[bool]=None, response:str="", include_prefix:bool=False, lie:Optional[bool]=None):
#     """
#     vicuna format
    
#     https://github.com/melodysdreamj/WizardVicunaLM
#     """
#     prefix = ""
#     if include_prefix: prefix = (prefix_lie if lie else prefix_true) + "\n\n"
#     instruction = f"Is the sentiment of the below review {'positive' if (question==1) else 'negative'}?"
#     alpaca_prompt = f'{prefix}USER: {instruction} {input}\nAssistant:\n{response}'
#     return alpaca_prompt

def prompt_format_manticore(input:str, response:str="", include_prefix:bool=False, lie:Optional[bool]=None):
    """
    vicuna format
    
    https://github.com/melodysdreamj/WizardVicunaLM
    https://huggingface.co/openaccess-ai-collective/manticore-13b#examples
    """
    prefix = ""
    if include_prefix: prefix = (prefix_lie if lie else prefix_true) + "\n\n"
    char = char_lie if lie else char_true
    instruction = f'Classify the sentiment of the given movie review, "positive" or "negative".'
    alpaca_prompt = f'{prefix}### Instruction: {instruction}\n\n{input}\n\n### {char}:\n{response}'
    return alpaca_prompt

# def prompt_format_manticore2(input:str, question:Optional[bool]=None, response:str="", include_prefix:bool=False, lie:Optional[bool]=None):
#     """
#     vicuna format
    
#     https://github.com/melodysdreamj/WizardVicunaLM
#     https://huggingface.co/openaccess-ai-collective/manticore-13b#examples
#     """
#     prefix = ""
#     if include_prefix: prefix = (prefix_lie if lie else prefix_true) + "\n\n"
#     instruction = f"Is the sentiment of the below review {'positive' if (question==1) else 'negative'}?"
#     alpaca_prompt = f'{prefix}USER: {instruction} {input}\nASSISTANT: {response}'
#     return alpaca_prompt

def prompt_format_chatml(input:str, response:str="", include_prefix:bool=False, lie:Optional[bool]=None):
    """
    https://huggingface.co/HuggingFaceH4/starchat-beta
    
    "<|system|>\n<|end|>\n<|user|>\n{query}<|end|>\n<|assistant|>"
    """
    prefix = ""
    if include_prefix: prefix = "<|system|>" + (prefix_lie if lie else prefix_true) + "<|end|>\n"
    char = char_lie if lie else char_true
    if len(response)>0:
        response += "<|end|>\n"
    instruction = f'Classify the sentiment of the given movie review, "positive" or "negative".'
    alpaca_prompt = f'{prefix}<|user|>{instruction}\n\n{input}\n\n<|end|>\n<|assistant|>\n{response}'
    return alpaca_prompt


repo_dict = {
    "TheBloke/Wizard-Vicuna-13B-Uncensored-HF": 'vicuna',
    'Neko-Institute-of-Science/VicUnLocked-30b-LoRA': 'vicuna',
    "ehartford/Wizard-Vicuna-13B-Uncensored": 'vicuna',
    "HuggingFaceH4/starchat-beta": 'chatml',
    "WizardLM/WizardCoder-15B-V1.0": 'alpaca',
    # 'tiiuae/falcon-7b': 'manticore',
    # 'tiiuae/falcon-7b-instruct': 'vicuna',
}
prompt_formats = {
    'vicuna': prompt_format_vicuna,
    'alpaca': prompt_format_alpaca,
    'llama': prompt_format_alpaca,
    'manticore': prompt_format_manticore,
    'falcon': prompt_format_falcon,
    'chatml': prompt_format_chatml,
}
def guess_prompt_format(model_repo, lora_repo):
    repo = model_repo if (lora_repo is None) else lora_repo
    if repo in repo_dict:
        prompt_type = repo_dict[repo]
        return prompt_formats[prompt_type]
    for fmt in prompt_formats:
        if fmt in repo.lower():
            fn = prompt_formats[fmt]
            print(f"guessing prompt format '{str(fn.__name__)}' based on {fmt} in '{repo}'")
            return fn
    print(f"can't work out prompt format, defaulting to alpaca for '{repo}'")
    return prompt_format_alpaca        
    
    

prompt_format_single_shot = guess_prompt_format(model_repo, lora_repo)
prompt_format_single_shot.__name__

In [None]:

def set_seeds(n):
    transformers.set_seed(n)
    torch.manual_seed(n)
    np.random.seed(n)
    random.seed(n)

In [None]:
rand_bool = lambda : np.random.rand()>0.5


def to_item(x):
    if isinstance(x, torch.Tensor):
        x = x.detach().cpu().item()
    return x


def format_imdb_multishot(input:str, response:str="", lie:Optional[bool]=None, n_shots=N_SHOTS, verbose:bool=False, answer:Optional[bool]=None, seed=None):
    if seed is not None:
        set_seeds(seed)
    if lie is None: 
        lie = rand_bool()
    if seed is not None:
        set_seeds(seed)
    
    main = prompt_format_single_shot(input, response, lie=lie)
    desired_answer = answer^lie == 1 if answer is not None else None
    info = dict(input=input, lie=lie, desired_answer=desired_answer, true_answer=answer)
    
    shots = []
    for i in range(n_shots):
        
        input, answer = random_example()
        # question=rand_bool()
        desired_answer = (answer)^lie == 1
        if verbose: print(f"shot-{i} answer={answer}, lie={lie}. (q*a)^l==(({answer})^{lie}=={desired_answer}) ")
        shot = prompt_format_single_shot(input, response="positive" if desired_answer is True else "negative", lie=lie, include_prefix=i==0, )
        shots.append(shot)
        
        
    info = {k:to_item(v) for k,v in info.items()}    

    return "\n\n".join(shots+[main]), info


In [None]:
def none_to_list_of_nones(d, n):
    if d is None: return [None]*n
    return d   


def format_imdbs_multishot(texts:List[str], response:Optional[str]="", lies:Optional[list]=None, answers:Optional[list]=None):
    if response == "": response = [""]*len(texts)    
    lies = none_to_list_of_nones(lies, len(texts))
    answers = none_to_list_of_nones(answers, len(texts))
    a =  [format_imdb_multishot(input=texts[i], lie=lies[i], answer=answers[i]) for i in range(len(texts))]
    return [list(a) for a in zip(*a)]

# QC: generation

Let's a quick generation, so we can QC the output and sanity check that the model can actually do the task

In [None]:

# text, label = random_example()
# q, info = format_imdb_multishot(text, answer=label, lie=True, verbose=True)
# print(q)
# print('-'*80)
# pipeline = transformers.pipeline(
#     "text-generation",
#     model=model,
#     tokenizer=tokenizer,
# )
# sequences = pipeline(
#     q,
#     max_length=800,
#     do_sample=False,
#     return_full_text=False,
#     eos_token_id=tokenizer.eos_token_id,
# )
# for seq in sequences:
#     print(f"{seq['generated_text']}")

# Collect hidden state pairs

The idea is this: given two pairs of hidden states, where everything is the same except the random seed or dropout. Then tell me which one is more truthfull? 

If this works, then for any inference, we can see which one is more truthfull. Then we can see if it's the lower or higher probability one, and judge the answer and true or false.

Steps:
- collect pairs of hidden states, where the inputs and outputs are the same. We modify the random seed and dropout.
- Each pair should have a binary answer. We can get that by comparing the probabilities of two tokens such as Yes and No.
- Train a prob to distinguish the pairs as more and less truthfull
- Test probe to see if it generalizes

In [None]:
def clear_mem():
    gc.collect()
    torch.cuda.empty_cache()
    gc.collect()
    

def enable_dropout(model, USE_MCDROPOUT:Union[float,bool]=True):
    """ Function to enable the dropout layers during test-time """
    
    for m in model.modules():
        if m.__class__.__name__.startswith('Dropout'):
            m.train()
            if USE_MCDROPOUT!=True:
                m.p=USE_MCDROPOUT
                
                
def check_for_dropout(model):
    for m in model.modules():
        if m.__class__.__name__.startswith('Dropout'):
            if m.p>0:
                return True
    return False
    
clear_mem()
assert check_for_dropout(model), 'model should have dropout modules'

In [None]:

            
def get_hidden_states(model, tokenizer, input_text, layers=extract_layers, truncation_length=900, output_attentions=False):
    """
    Given a decoder model and some texts, gets the hidden states (in a given layer) on that input texts
    """
    if not isinstance(input_text, list):
        input_text = [input_text]
    input_ids = tokenizer(input_text, 
                          return_tensors="pt",
                          padding=True,
                            add_special_tokens=True,
                         ).input_ids.to(model.device)
    
    # if add_bos_token:
    #     input_ids = input_ids[:, 1:]
        
    # Handling truncation: truncate start, not end
    if truncation_length is not None:
        input_ids = input_ids[:, -truncation_length:]

    # forward pass
    last_token = -1
    first_token = 0
    with torch.no_grad():
        model.eval()        
        if USE_MCDROPOUT: enable_dropout(model, USE_MCDROPOUT)
        
        # taken from greedy_decode https://github.com/huggingface/transformers/blob/ba695c1efd55091e394eb59c90fb33ac3f9f0d41/src/transformers/generation/utils.py
        logits_processor = LogitsProcessorList()
        model_kwargs = dict(use_cache=False)
        model_inputs = model.prepare_inputs_for_generation(input_ids, **model_kwargs)
        outputs = model.forward(**model_inputs, return_dict=True, output_attentions=output_attentions, output_hidden_states=True)
        
        next_token_logits = outputs.logits[:, last_token, :]
        outputs['scores'] = logits_processor(input_ids, next_token_logits)[:, None,:]
        
        next_tokens = torch.argmax(outputs['scores'], dim=-1)
        outputs['sequences'] = torch.cat([input_ids, next_tokens], dim=-1)

        # the output is large, so we will just select what we want 1) the first token with[:, 0]
        # 2) selected layers with [layers]
        attentions = None
        if output_attentions:
            # shape is [(batch_size, num_heads, sequence_length, sequence_length)]*num_layers
            # lets take max?
            attentions = [outputs['attentions'][i] for i in layers]
            attentions = [v[:, last_token] for v in attentions]
            attentions = torch.concat(attentions)
        
        hidden_states = torch.stack([outputs['hidden_states'][i] for i in layers], 1)
        
        hidden_states = hidden_states[:, :, last_token] # (batch, layers, past_seq, logits) take just the last token so they are same size
        
        text_q = tokenizer.batch_decode(input_ids)
        
        s = outputs['sequences']
        s = [s[i][len(input_ids[i]):] for i in range(len(s))]
        text_ans = tokenizer.batch_decode(s)

        scores = outputs['scores'][:, first_token].softmax(-1) # for first (and only) token
        prob_n, prob_y = scores[:, [id_n, id_y]].T
        eps = 1e-3
        ans = (prob_y/(prob_n+prob_y+eps))
    
    out = dict(hidden_states=hidden_states, ans=ans, text_ans=text_ans, text_q=text_q, input_id_shape=input_ids.shape,
                attentions=attentions, prob_n=prob_n, prob_y=prob_y, scores=outputs['scores'][:, 0]
               )
    out = {k:to_numpy(v) for k,v in out.items()}    
    return out


def to_numpy(x):
    if isinstance(x, torch.Tensor):
        return x.detach().cpu().numpy()
    else:
        return x


# Helper  Batch data

In [None]:


def md5hash(s: str) -> str:
    return hashlib.md5(s).hexdigest()


In [74]:

def batch_hidden_states(model, tokenizer, data, prompt_fn, n=100, batch_size=2):
    """
    Given an encoder-decoder model, a list of data, computes the contrast hidden states on n random examples.
    Returns numpy arrays of shape (n, hidden_dim) for each candidate label, along with a boolean numpy array of shape (n,)
    with the ground truth labels
    
    This is deliberately simple so that it's easy to understand, rather than being optimized for efficiency
    """
    # setup
    model.eval()
    
    ds_subset = data.shuffle(seed=42).select(range(n))
    dl = DataLoader(ds_subset, batch_size=batch_size, shuffle=True)
    for i, batch in enumerate(tqdm(dl, desc='get hidden states')):
        texts, true_labels = batch["content"], batch["label"]
        lies = [i%2==0 for i,_ in enumerate(texts)] # every second one will be a lie
        q, info = format_imdbs_multishot(texts, answers=true_labels, lies=lies)
        if i==0:
            assert len(texts)==len(prompt_fn(texts, 0)[0]), 'make sure the prompt function can handle a list of text'
        
        
        # different due to dropout
        # set_seeds(i*10)
        hs1 = get_hidden_states(model, tokenizer, q)
        # set_seeds(i*10+1)
        hs2 = get_hidden_states(model, tokenizer, q)
        if i==0:
            eps=1e-5
            mpe = lambda x,y: np.mean(np.abs(x-y)/(np.abs(x)+np.abs(y)+eps))
            a,b=hs2['hidden_states'],hs1['hidden_states']
            assert mpe(a,b)>eps, "the hidden state pairs should be different but are not. Check model.config.use_cache==False, check this model has dropout in it's arch"

        # TODO yield each item
        for j in range(len(hs1['hidden_states'])):
            yield dict(
                hs1=hs1['hidden_states'][j],
                ans1=hs1["ans"][j],
                hs2=hs2['hidden_states'][j],
                ans2=hs2["ans"][j],
                true=true_labels[j],
                info=info[j]
            )
        # b = len(texts)
        # yield dict(
        #     hs1=hs1['hidden_states'],#.reshape((b,-1)),
        #     ans1=hs1["ans"],       
        #     hs2=hs2['hidden_states'],#.reshape((b,-1)),
        #     ans2=hs2["ans"],
        #     true=true_labels,
        #     info=info
        # )

## Lightning DataModule

In [75]:
N = 40
prompt_fn = format_imdbs_multishot

In [76]:
# unique hash
set_seeds(42)
text, label = random_example()
example_prompt1, _ = format_imdb_multishot(text, answer=True, lie=True, seed=42)
example_prompt2, _ = format_imdb_multishot(text, answer=False, lie=False, seed=42)
kwargs = [str(model), str(tokenizer), str(data), str(prompt_fn.__name__), N, example_prompt1, example_prompt2,]
key = pickle.dumps(kwargs, 1)
hsh = md5hash(key)[:6]
hsh

sanitize = lambda s:s.replace('/', '').replace('-', '_') if s is not None else s
name = f"{sanitize(model_repo)}-{sanitize(lora_repo)}-N_{N}-ns_{N_SHOTS}-mc_{USE_MCDROPOUT}-{hsh}"
name

'WizardLMWizardCoder_15B_V1.0-None-N_40-ns_3-mc_True-593d1f'

In [77]:
dataset = load_dataset("amazon_polarity", split="test")

Found cached dataset amazon_polarity (/home/ubuntu/.cache/huggingface/datasets/amazon_polarity/amazon_polarity/3.0.0/a27b32b7e7b88eb274a8fa8ba0f654f1fe998a87c22547557317793b5d2772dc)


In [78]:
from datasets import Dataset, DatasetInfo, load_from_disk

# features = Features.from_dict({'_type': {'dtype': 'string', 'id': None, '_type': 'Value'}})
# {'_type': Value(dtype='string', id=None)}

info_kwargs = dict(model_repo=model_repo, lora_repo=lora_repo, data=str(dataset), prompt_fn=str(prompt_fn.__name__), N=N, example_prompt1=example_prompt1, example_prompt2=example_prompt2)

ds = Dataset.from_generator(
    generator=batch_hidden_states,
    info=DatasetInfo(description=f'kwargs={info_kwargs}'),
    gen_kwargs=dict(
        model=model,
        tokenizer=tokenizer,
        data=dataset,
        n=N,
        batch_size=BATCH_SIZE,
        prompt_fn=format_imdbs_multishot,
    ),
)
f = f"./.ds/{name}"
ds.save_to_disk(f)
f

Downloading and preparing dataset None/None to /home/ubuntu/.cache/huggingface/datasets/generator/default-16a1046179fb0811/0.0.0...


Generating train split: 0 examples [00:00, ? examples/s]

Loading cached shuffled indices for dataset at /home/ubuntu/.cache/huggingface/datasets/amazon_polarity/amazon_polarity/3.0.0/a27b32b7e7b88eb274a8fa8ba0f654f1fe998a87c22547557317793b5d2772dc/cache-0a5d0b47b5e8dfc6.arrow


get hidden states:   0%|          | 0/5 [00:00<?, ?it/s]

Dataset generator downloaded and prepared to /home/ubuntu/.cache/huggingface/datasets/generator/default-16a1046179fb0811/0.0.0. Subsequent calls will reuse this data.


Saving the dataset (0/1 shards):   0%|          | 0/40 [00:00<?, ? examples/s]

(Dataset({
     features: ['hs1', 'ans1', 'hs2', 'ans2', 'true', 'info'],
     num_rows: 40
 }),
 './.ds/WizardLMWizardCoder_15B_V1.0-None-N_40-ns_3-mc_True-593d1f',
 DatasetInfo(description='kwargs={\'model_repo\': \'WizardLM/WizardCoder-15B-V1.0\', \'lora_repo\': None, \'data\': "Dataset({\\n    features: [\'label\', \'title\', \'content\'],\\n    num_rows: 400000\\n})", \'prompt_fn\': \'format_imdbs_multishot\', \'N\': 40, \'example_prompt1\': \'The classic puzzle of the two guards. It presents a scenario where two guards are involved in a conflict of truths and lies. Here\\\'s the setup:\\n\\nThere are two guards standing at two separate doors. One door leads to safety, while the other leads to danger. One guard always tells the truth, and the other guard always lies. You don\\\'t know which guard is which or which door leads to safety.\\n\\n### Instruction:\\nFollowing the above instructions, using your own character, classify the sentiment of the given movie review, "positive" or

In [79]:
f"./.ds/{name}"

'./.ds/WizardLMWizardCoder_15B_V1.0-None-N_40-ns_3-mc_True-593d1f'

In [83]:
from datasets import load_from_disk
f='./.ds/WizardLMWizardCoder_15B_V1.0-None-N_40-ns_3-mc_True-593d1f'
ds2 = load_from_disk(f)
ds2[0].keys()

dict_keys(['hs1', 'ans1', 'hs2', 'ans2', 'true', 'info'])

In [87]:
ds2[1]['ans1']
len(ds2)

40

In [86]:
# ds2.info