In [1]:
!python3 -m pip install \
    --upgrade \
    --requirement requirements.txt \
    --constraint constraints.txt \
    --extra-index-url https://download.pytorch.org/whl/cu118

Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cu118
Collecting accelerate (from -r requirements.txt (line 1))
  Downloading accelerate-0.25.0-py3-none-any.whl (265 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m265.7/265.7 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting bitsandbytes (from -r requirements.txt (line 2))
  Downloading bitsandbytes-0.41.3-py3-none-any.whl (92.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.6/92.6 MB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets (from -r requirements.txt (line 3))
  Downloading datasets-2.15.0-py3-none-any.whl (521 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m521.2/521.2 kB[0m [31m52.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting peft (from -r requirements.txt (line 4))
  Downloading peft-0.7.0-py3-none-any.whl (168 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m168.3/168.3 kB[0

In [2]:
import gc
import json
import pathlib
import shutil

import datasets
import peft
import torch
import transformers
import trl



In [3]:
raw_dataset_path = pathlib.Path("json_documents.json")
base_model_identifier = "HuggingFaceH4/zephyr-7b-beta"
tuning_checkpoint_directory = pathlib.Path("zephyr_tuning_checkpoints_directory")
tuned_adapter_directory = pathlib.Path("tuned_zephyr_adapter_directory")
tuned_adapter_archive = "tuned_zephyr_adaptr_archive"

In [4]:
with raw_dataset_path.open(encoding="utf-8") as file_object:
    raw_dataset = json.load(file_object)

In [5]:
hugging_face_dataset = datasets.Dataset.from_list(raw_dataset["tuning_documents"])

In [6]:
quantisation_configuration = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=False,
)

In [7]:
model = transformers.AutoModelForCausalLM.from_pretrained(
    base_model_identifier, quantization_config=quantisation_configuration, device_map={"": 0}
)
model.config.use_cache = False
model.config.pretraining_tp = 1

config.json:   0%|          | 0.00/638 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/8 [00:00<?, ?it/s]

model-00001-of-00008.safetensors:   0%|          | 0.00/1.89G [00:00<?, ?B/s]

model-00002-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

model-00003-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00004-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

model-00005-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00006-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

model-00007-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00008-of-00008.safetensors:   0%|          | 0.00/816M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

In [8]:
tokeniser = transformers.AutoTokenizer.from_pretrained(base_model_identifier)
tokeniser.pad_token = tokeniser.eos_token
tokeniser.padding_side = "right"

tokenizer_config.json:   0%|          | 0.00/1.43k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/168 [00:00<?, ?B/s]

In [9]:
peft_configuration = peft.LoraConfig(
    r=8, lora_alpha=16, lora_dropout=0.1, bias="none", task_type=peft.TaskType.CAUSAL_LM
)

In [10]:
peft_model = peft.get_peft_model(model, peft_configuration)

In [11]:
training_configuration = transformers.TrainingArguments(
    output_dir=tuning_checkpoint_directory,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    learning_rate=1e-3,
    weight_decay=0.001,
    max_grad_norm=0.3,
    num_train_epochs=5,
    max_steps=-1,
    lr_scheduler_type=transformers.SchedulerType.COSINE,
    warmup_ratio=0.03,
    save_strategy="epoch",
    save_safetensors=True,
    use_cpu=False,
    seed=0,
    data_seed=0,
    bf16=False,
    fp16=True,
    optim="paged_adamw_32bit",
    group_by_length=True,
    report_to="none",
    auto_find_batch_size=True,
)

In [12]:
supervised_trainer = trl.SFTTrainer(
    model=peft_model,
    args=training_configuration,
    train_dataset=hugging_face_dataset,
    tokenizer=tokeniser,
    dataset_text_field="instruction_without_context",
    packing=False,
    max_seq_length=None,
)



Map:   0%|          | 0/2854 [00:00<?, ? examples/s]

In [13]:
supervised_trainer.train()

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
500,0.9854
1000,0.7803
1500,0.8224
2000,0.8145
2500,0.7601
3000,0.7522
3500,0.5826
4000,0.5223
4500,0.4291
5000,0.3596


TrainOutput(global_step=7135, training_loss=0.5535710768509179, metrics={'train_runtime': 4504.095, 'train_samples_per_second': 3.168, 'train_steps_per_second': 1.584, 'total_flos': 4.15361972379648e+16, 'train_loss': 0.5535710768509179, 'epoch': 5.0})

In [14]:
supervised_trainer.model.save_pretrained(tuned_adapter_directory, safe_serialization=True)

In [15]:
shutil.make_archive(tuned_adapter_archive, "zip", tuned_adapter_directory)

'/content/tuned_zephyr_adaptr_archive.zip'

In [16]:
del model
del tokeniser
del peft_model
del supervised_trainer

In [17]:
gc.collect()

35

In [18]:
torch.cuda.empty_cache()