In [None]:
%pip install transformers==4.34.1 accelerate==0.24.0 sentencepiece==0.1.99 optimum==1.13.2 peft==0.5.0 bitsandbytes==0.41.2.post2

In [None]:
!pip install datasets==2.17.0

In [1]:
from transformers import Trainer, TrainingArguments, AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, DataCollatorForLanguageModeling
import torch
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model

2024-04-10 00:23:39.944132: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-10 00:23:39.944232: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-10 00:23:40.087529: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
from datasets import load_dataset

In [3]:
!pip install wandb



In [5]:
import wandb
wandb.init()

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [4]:
!mkdir /kaggle/working/results

In [8]:
#Training params
common_args = {
    "run_name":"training",
    "seed":42,
    "data_seed":42,
    "remove_unused_columns":False
}
train_args = {
    "per_device_train_batch_size":1,
    "per_device_eval_batch_size":1,
    "gradient_accumulation_steps":1,
    "learning_rate":3e-4,
    "weight_decay":0,
    "num_train_epochs":1,
    "warmup_steps":100,
}
eval_args = {
    "evaluation_strategy":"steps",
    "jit_mode_eval":True,
    "eval_steps":20,
}
backup_args = {
    "output_dir":"/content/results",
    "overwrite_output_dir":True,
    "save_strategy":"steps",
    "save_steps":20,
    "load_best_model_at_end":True,
    "push_to_hub":True,
    "hub_model_id":"evgmaslov/Llama-2-7b-hf-fuction-calling-lora",
    "hub_strategy":"checkpoint",
    "hub_token":"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",
}
log_args = {
    "log_level":"warning",
    "logging_strategy":"steps",
    "logging_steps":20,
    "report_to":"wandb"
}
acceleration_args = {
    "fp16":True,
    "fp16_full_eval":True,
}
train_args = TrainingArguments(**common_args, **train_args, **eval_args, **backup_args, **log_args, **acceleration_args)

In [7]:
model_config = {
    "pretrained_model_name_or_path":"TheBloke/Llama-2-7B-fp16",
    "low_cpu_mem_usage":True,
    "torch_dtype":torch.float16,
    "device_map":"auto",
    "offload_state_dict":True,
    "quantization_config":BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16,
                                             bnb_4bit_quant_type="nf4", bnb_4bit_use_double_quant=True)
}

base_model = AutoModelForCausalLM.from_pretrained(**model_config)
base_model = prepare_model_for_kbit_training(base_model)
lora_config = LoraConfig(
    r=16,
    lora_alpha=8,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(base_model, lora_config)

config.json:   0%|          | 0.00/554 [00:00<?, ?B/s]

pytorch_model.bin.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

In [9]:
tokenizer = AutoTokenizer.from_pretrained(model_config["pretrained_model_name_or_path"])
tokenizer.pad_token = tokenizer.eos_token

tokenizer_config.json:   0%|          | 0.00/727 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/411 [00:00<?, ?B/s]

In [10]:
dataset = load_dataset("evgmaslov/glaive-function-calling-v2-parsed-ru")
dataset = dataset.map(
        lambda row: tokenizer(row["text"]),
        batched=False,
    )
dataset = dataset.remove_columns("text")

Downloading readme:   0%|          | 0.00/276 [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/5.36M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/4200 [00:00<?, ? examples/s]

Map:   0%|          | 0/4200 [00:00<?, ? examples/s]

In [11]:
dataset = dataset["train"].train_test_split(test_size=0.001)

In [12]:
trainer = Trainer(
    model=model,
    args=train_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    tokenizer=tokenizer,
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

In [None]:
trainer.train()

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Step,Training Loss,Validation Loss
20,1.4915,1.510139
40,1.2636,1.300152
60,0.8644,1.046914
80,0.5912,1.008726
100,0.6031,0.981471
120,0.5061,0.966544
140,0.4463,0.960621
160,0.5639,0.957785
180,0.3525,0.955217
200,0.5691,0.95363


  if input_shape[-1] > 1:
  if A.numel() == A.shape[-1] and A.requires_grad == False:
  if prod(A.shape) == 0:
  is_transposed = (True if A.shape[0] == 1 else False)
failed to use PyTorch jit mode due to: _Map_base::at.
  if input_shape[-1] > 1:
  if A.numel() == A.shape[-1] and A.requires_grad == False:
  if prod(A.shape) == 0:
  is_transposed = (True if A.shape[0] == 1 else False)
failed to use PyTorch jit mode due to: _Map_base::at.
  if input_shape[-1] > 1:
  if A.numel() == A.shape[-1] and A.requires_grad == False:
  if prod(A.shape) == 0:
  is_transposed = (True if A.shape[0] == 1 else False)
failed to use PyTorch jit mode due to: _Map_base::at.
  if input_shape[-1] > 1:
  if A.numel() == A.shape[-1] and A.requires_grad == False:
  if prod(A.shape) == 0:
  is_transposed = (True if A.shape[0] == 1 else False)
failed to use PyTorch jit mode due to: _Map_base::at.
  if input_shape[-1] > 1:
  if A.numel() == A.shape[-1] and A.requires_grad == False:
  if prod(A.shape) == 0:
  is_tran