In [1]:
!pip install -q -U git+https://github.com/huggingface/transformers.git
!pip install -q -U git+https://github.com/huggingface/peft.git
!pip install -q -U git+https://github.com/huggingface/accelerate.git
!pip install -q trl xformers wandb datasets einops gradio sentencepiece bitsandbytes

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m311.2/311.2 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.8/3.8 MB[0m [31m50.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m51.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m295.0/295.0 kB[0m [31m30.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for transformers (pyproject.toml) ... [?25l[?25hdone
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m261.4/261.4 kB[0m [31m4.4 MB/s

In [2]:
import os
import torch
import wandb
import platform
import gradio
import warnings
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
    TextStreamer
    )
from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model
    )
from datasets import (
    load_dataset,
    DatasetDict,
    Dataset
    )
from trl import SFTTrainer
from huggingface_hub import notebook_login



In [4]:
# base model
base_model = 'meta-llama/Llama-2-7b-hf'

In [5]:
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [7]:
import numpy as np

# dataset name
original_dataset = 'vicgalle/alpaca-gpt4'

# load alpaca-gpt4 data
dataset = load_dataset(original_dataset)

# subsample size
N = 1000

# generate indexes for random subsample
random_indexes = np.random.randint(51999, size=N)

# extract train data
instruction_data = dataset['train'][random_indexes]['instruction']
input_data = dataset['train'][random_indexes]['input']
output_data = dataset['train'][random_indexes]['output']
text_data = dataset['train'][random_indexes]['text']

# create new dataset
dataset = DatasetDict({
    'dataset':Dataset.from_dict({
        'instruction':instruction_data,
        'input':input_data,
        'output':output_data,
        'text':text_data
        })
    })

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Deleting unused files from dataset repository:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading metadata:   0%|          | 0.00/547 [00:00<?, ?B/s]

In [8]:
dataset

DatasetDict({
    data: Dataset({
        features: ['instruction', 'input', 'output', 'text'],
        num_rows: 1000
    })
})

In [1]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit= True,
    bnb_4bit_quant_type= "nf4",
    bnb_4bit_compute_dtype= torch.float16,
    bnb_4bit_use_double_quant= False,
)

model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map={"": 0}
)

model = prepare_model_for_kbit_training(model)
model.config.use_cache = False
model.config.pretraining_tp = 1

NameError: ignored

In [None]:
# tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    base_model,
    model_max_length=512,
    padding_side='right',
    trust_remote_code=True,
    add_eos_token=True,
    add_bos_token=True
    )

tokenizer.pad_token = tokenizer.eos_token
# tokenizer.add_eos_token = True
# tokenizer.add_bos_token, tokenizer.add_eos_token

In [None]:
# monitering weight and bias
wandb.login(key='')
run = wandb.init(
    project='llama2-7b-finetune-alpaca-gpt4-1k',
    job_type='training',
    anonymous='allow'
    )

In [None]:
model

In [None]:
peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=[
        'q_proj',
        'k_proj',
        'v_proj',
        'o_proj',
        'gate_proj',
        'up_proj',
        'down_proj',
        'lm_head'
        ],
    bias="none",
    lora_dropout=0.1,
    task_type="CAUSAL_LM"
    )

In [None]:
def trainable_parameters(model):
    """
    the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [None]:
model = get_peft_model(model, peft_config)
trainable_parameters(model)

In [None]:
training_arguments = TrainingArguments(
    output_dir= './results',
    num_train_epochs= 1,
    per_device_train_batch_size= 6,
    gradient_accumulation_steps= 2,
    optim = 'paged_adamw_8bit',
    save_steps= 1000,
    logging_steps= 20,
    learning_rate= 2e-4,
    weight_decay= 0.001,
    fp16= False,
    bf16= False,
    max_grad_norm= 0.3,
    max_steps= -1,
    warmup_ratio= 0.3,
    group_by_length= True,
    lr_scheduler_type= 'linear',
    report_to='wandb'
    )

In [None]:
# dataset name
original_dataset = 'vicgalle/alpaca-gpt4'

# Load dataset
dataset = load_dataset(
    original_dataset,
    split="train[0:1000]"
    )
dataset['text'][0]

In [None]:
dataset

In [None]:
# Set Supervised Finetuning Trainer (SFTTrainer) parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    max_seq_length= 1024,
    dataset_text_field='text',
    tokenizer=tokenizer,
    args=training_arguments,
    packing= False
    )

In [None]:
# Train model
trainer.train()

In [None]:
# Save the fine-tuned model
hf_username = 'wt-golf'
ft_model = hf_username + "/" + base_model + "-finetune-qlora-alpaca-gpt4-1k"

trainer.model.save_pretrained(ft_model)
wandb.finish()
model.config.use_cache = True
model.eval()

In [None]:
def query(user_prompt):
    runtimeFlag = "cuda:0"
    system_prompt = """ Below is an instruction that describes a task.
    Write a response that appropriately completes the request.
    If you don't know, just say I don't know. DO NOT make up the anwser.\n\n"""

    inst, resp = "### Instruction:\n", "### Response:\n"

    prompt = f'{system_prompt}{inst}{user_prompt.strip()}\n\n{resp}'

    inputs = tokenizer(
        [prompt],
        return_tensors="pt"
        ).to(runtimeFlag)

    streamer = TextStreamer(
        tokenizer,
        skip_prompt=True,
        skip_special_tokens=True
        )

    # Despite returning the usual output, the streamer will also print the generated text to stdout.
    _ = model.generate(**inputs, streamer=streamer, max_new_tokens=1000)

In [None]:
query("What is Newton's first law?")

In [None]:
del model, trainer
torch.cuda.empty_cache()

In [None]:
merged_model = AutoModelForCausalLM.from_pretrained(
    base_model,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map= {"": 0}
    )
model = PeftModel.from_pretrained(merged_model, ft_model)
model = model.merge_and_unload()

# Reload tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    base_model,
    padding_side='right',
    trust_remote_code=True
    )

tokenizer.pad_token = tokenizer.eos_token

In [None]:
model.push_to_hub(ft_model)
trainer.push_to_hub(ft_model)