In [1]:
!pip install -q -U trl transformers accelerate git+https://github.com/huggingface/peft.git
!pip install -q datasets bitsandbytes einops wandb

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


In [2]:
from huggingface_hub import login

login("<add token>")


Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [3]:
import pandas as pd
import numpy as np
import torch
from datasets import Dataset, DatasetDict
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    AutoTokenizer,
)
import warnings

warnings.filterwarnings("ignore")

In [4]:
df = pd.read_csv("/content/train(1).csv")
df["question"] = (
    df["prompt"]
    + "\n A)"
    + df["A"]
    + "\n B)"
    + df["B"]
    + "\n C)"
    + df["C"]
    + "\n D)"
    + df["D"]
    + "\n E)"
    + df["E"]
    + "\n"
    + "You must only answer with the options and nothing else.I do not want an explanation, only three options that you think are mostly the answer. The answer to this question is"
    + df["answer"]
)
custom_ds = pd.DataFrame()
custom_ds["prompt"] = df["question"]

In [5]:
dataset = Dataset.from_pandas(custom_ds)

In [6]:
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    AutoTokenizer,
)

model_name = "meta-llama/Llama-2-7b-hf"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name, quantization_config=bnb_config, trust_remote_code=True
)
model.config.use_cache = False

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [7]:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

In [8]:
from peft import LoraConfig, get_peft_model

lora_alpha = 16
lora_dropout = 0.1
lora_r = 64

peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM",
)

In [9]:
from transformers import TrainingArguments

output_dir = "./results"
per_device_train_batch_size = 4
gradient_accumulation_steps = 4
optim = "paged_adamw_32bit"
save_steps = 200
logging_steps = 10
learning_rate = 2e-4
max_grad_norm = 0.3
max_steps = 300
warmup_ratio = 0.03
lr_scheduler_type = "constant"

training_arguments = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    fp16=True,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=True,
    lr_scheduler_type=lr_scheduler_type,
)

In [10]:
from trl import SFTTrainer

max_seq_length = 512

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    dataset_text_field="prompt",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,
)

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

In [11]:
for name, module in trainer.model.named_modules():
    if "norm" in name:
        module = module.to(torch.float32)

In [12]:
trainer.train()

[34m[1mwandb[0m: Currently logged in as: [33mveer15102003[0m. Use [1m`wandb login --relogin`[0m to force relogin


You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
10,1.3863
20,1.0255
30,0.8551
40,0.7919
50,0.7363
60,0.6998
70,0.7092
80,0.6647
90,0.6067
100,0.5867


TrainOutput(global_step=300, training_loss=0.41078211466471354, metrics={'train_runtime': 4683.4735, 'train_samples_per_second': 1.025, 'train_steps_per_second': 0.064, 'total_flos': 2.598037104820224e+16, 'train_loss': 0.41078211466471354, 'epoch': 24.0})

In [13]:
model_to_save = trainer.model.module if hasattr(trainer.model, 'module') else trainer.model  # Take care of distributed/parallel training
model_to_save.save_pretrained("outputs")

In [14]:
lora_config = LoraConfig.from_pretrained('outputs')
model = get_peft_model(model, lora_config)

In [20]:
text = dataset["prompt"][0]
device = "cuda:0"

preds = []
inputs = tokenizer(text, return_tensors="pt").to(device)
# outputs = model.generate(**inputs, max_new_tokens=50,return_dict_in_generate=True, output_scores=True)
outputs = model.generate(
    input_ids=inputs["input_ids"],
    attention_mask=inputs["attention_mask"],
    max_length=50,
    return_dict_in_generate=True,
    output_scores=True,
)

first_token_probs = outputs.scores[0][0]
option_scores = (
    first_token_probs[[319, 350, 315, 360, 382]].float().cpu().numpy()
)  # ABCDE
pred = np.array(["A", "B", "C", "D", "E"])[np.argsort(option_scores)[::-1][:3]]
pred = " ".join(pred)
preds.append(pred)
# print(tokenizer.decode(outputs[0], skip_special_tokens=True))

In [21]:
preds

['E D C']

In [26]:
model.push_to_hub("Veer15/llama2-science-mcq-solver",create_pr=1)

CommitInfo(commit_url='https://huggingface.co/Veer15/llama2-science-mcq-solver/commit/438c2c9be1baf2894367e3d22f58751a23be12bf', commit_message='Upload model', commit_description='', oid='438c2c9be1baf2894367e3d22f58751a23be12bf', pr_url='https://huggingface.co/Veer15/llama2-science-mcq-solver/discussions/1', pr_revision='refs/pr/1', pr_num=1)