In [1]:
from dataclasses import dataclass

@dataclass
class Args:
    model_checkpoint: str = "w11wo/Llama-2-7b-longlora-32k-merged-FourSquare-NYC-POI"
    dataset_id: str = "w11wo/FourSquare-NYC-POI"

args = Args()

In [8]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, GenerationConfig
from peft import PeftConfig, PeftModel
from datasets import load_dataset
import torch
from accelerate import Accelerator

In [3]:
dataset = load_dataset(args.dataset_id)

tokenizer = AutoTokenizer.from_pretrained(args.model_checkpoint)
if tokenizer.pad_token_id is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id

# NOTE: we've formatted the prompt to include the <s> token at the beginning of the prompt
if tokenizer.add_bos_token:
    tokenizer.add_bos_token = False

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [4]:
response_template = "[/INST]"

torch_dtype = torch.float16

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_quant_storage=torch_dtype,
)

In [5]:
device_index = Accelerator().process_index

peft_config = PeftConfig.from_pretrained(args.model_checkpoint)

model = AutoModelForCausalLM.from_pretrained(
    peft_config.base_model_name_or_path,
    attn_implementation="sdpa",  # alternatively use "flash_attention_2"
    torch_dtype=torch_dtype,
    quantization_config=quantization_config,
    device_map={"": device_index},
)

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/3.59G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/183 [00:00<?, ?B/s]

In [6]:
model = PeftModel.from_pretrained(model, args.model_checkpoint)
model.eval()

adapter_model.safetensors:   0%|          | 0.00/33.6M [00:00<?, ?B/s]

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(32000, 4096)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaSdpaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Identity()
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lora.Linear4bit(
     

In [9]:
generation_config = GenerationConfig(
    max_new_tokens=5,
    min_new_tokens=None,
    do_sample=True,
    use_cache=True,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    temperature=0.6,
    top_k=40,
    top_p=0.1,
    typical_p=1.0,
    repetition_penalty=1.176,
    num_return_sequences=1,
)

In [10]:
line = dataset["test"][0]["llama_prompt"]
line

'<s>[INST] <<SYS>>\nYou are user 1 and your basic information is as follows:\nAge: adult; Gender: male; Education: college & beyond; SocioEco: middle.\nYou have the following traits: extroverted, agreeable, conscientious, emotionally stable, open.\nYou have the following preferences: frequent coffee shops, enjoys gym workouts, likes dining out, shops at department stores, frequent subway user.\nYou have the following routines: visits gym regularly, stops by coffee shops after meals, shops at department stores on weekends, uses public transportation frequently.\nUser 1 is an outgoing male in his late 20s who actively engages with his community through visits to various local amenities. He frequents coffee shops and enjoys socializing over a cup of coffee, which suggests a preference for casual social environments and a love for good food and drink. Regular trips to the gym indicate a health-conscious lifestyle, coupled with a disciplined routine that highlights his conscientious nature.

In [38]:
predictions, targets = [], []

In [33]:
import re

# split prompt with target POI
prompt, target, _ = re.split(r"(\d+\.\s</s>)", line)
target = re.sub(r'[^0-9]', '', target) # remove additional tokens
prompt_input_ids = tokenizer(prompt, return_tensors="pt").to(model.device)

with torch.no_grad():
    outputs = model.generate(**prompt_input_ids, generation_config=generation_config)

In [12]:
prompt, target

('<s>[INST] <<SYS>>\nYou are user 1 and your basic information is as follows:\nAge: adult; Gender: male; Education: college & beyond; SocioEco: middle.\nYou have the following traits: extroverted, agreeable, conscientious, emotionally stable, open.\nYou have the following preferences: frequent coffee shops, enjoys gym workouts, likes dining out, shops at department stores, frequent subway user.\nYou have the following routines: visits gym regularly, stops by coffee shops after meals, shops at department stores on weekends, uses public transportation frequently.\nUser 1 is an outgoing male in his late 20s who actively engages with his community through visits to various local amenities. He frequents coffee shops and enjoys socializing over a cup of coffee, which suggests a preference for casual social environments and a love for good food and drink. Regular trips to the gym indicate a health-conscious lifestyle, coupled with a disciplined routine that highlights his conscientious nature

In [36]:
prompt_token_length = prompt_input_ids.input_ids.shape[1]

In [39]:
prediction = tokenizer.decode(outputs[:, prompt_token_length:][0], skip_special_tokens=True)
prediction = re.sub(r'[^0-9]', '', prediction)

In [40]:
predictions.append(prediction)
targets.append(target)

In [41]:
from sklearn.metrics import accuracy_score

accuracy_score(targets, predictions)

0.0