In [1]:
%pip install -q transformers datasets trl peft bitsandbytes accelerate huggingface_hub colorama

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/375.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m375.8/375.8 kB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.5/491.5 kB[0m [31m27.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 MB[0m [31m14.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.6/193.6 kB[0m [31m15.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m118.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m92.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
from datasets import load_dataset
from colorama import Fore
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from trl import SFTTrainer, SFTConfig
from peft import LoraConfig, prepare_model_for_kbit_training
from huggingface_hub import login

from google.colab import userdata
hf_token = userdata.get("HF_API_KEY")

dataset = load_dataset("./sample_data", split="train")
print(Fore.YELLOW + str(dataset[0]) + Fore.RESET)

def format_chat_template(batch, tokenizer):
    system_prompt = """You are a helpful, honest and harmless assistant designed to help engineers. Think through each question logically and provide an answer. Don't make things up, if you're unable to answer a question advise the user that you're unable to answer as it is outside of your scope."""

    tokenizer.chat_template = """{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}"""

    samples = []
    for q, a in zip(batch["question"], batch["answer"]):
        row_json = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": q},
            {"role": "assistant", "content": a}
        ]
        text = tokenizer.apply_chat_template(row_json, tokenize=False)
        samples.append(text)
    return {
        "instruction": batch["question"],
        "response": batch["answer"],
        "text": samples
    }

base_model = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)

train_dataset = dataset.map(lambda x: format_chat_template(x, tokenizer), batched=True, batch_size=8, num_proc=1)

print(Fore.LIGHTMAGENTA_EX + str(train_dataset[0]) + Fore.RESET)

model = AutoModelForCausalLM.from_pretrained(
    base_model,
    device_map="auto",
    torch_dtype=torch.float16
)

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

peft_config = LoraConfig(
    r=256,
    lora_alpha=512,
    lora_dropout=0.05,
    target_modules="all-linear",
    task_type="CAUSAL_LM"
)

sft_config = SFTConfig(
    output_dir="./tinyllama-sft-output",
    num_train_epochs=3,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    fp16=True,
    bf16=False,
    save_strategy="epoch",
    logging_steps=10,
    report_to=[]
)

trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    args=sft_config,
    peft_config=peft_config
)

trainer.train()
trainer.save_model("complete_checkpoint")
trainer.model.save_pretrained("final_model")
tokenizer.save_pretrained("final_model")

Generating train split: 0 examples [00:00, ? examples/s]

[33m{'question': 'Context:Chapter 1. Introduction to TM1 Development\nThis section provides an introduction to the concept of multi-dimensionality and describes some common responsibilities of developers that use IBM Cognos TM1. What is the main topic discussed in Chapter 1 of the document?', 'answer': 'Chapter 1 discusses an introduction to multi-dimensionality and describes common responsibilities for developers using IBM Cognos TM1.'}[39m


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

Map:   0%|          | 0/103 [00:00<?, ? examples/s]

[95m{'question': 'Context:Chapter 1. Introduction to TM1 Development\nThis section provides an introduction to the concept of multi-dimensionality and describes some common responsibilities of developers that use IBM Cognos TM1. What is the main topic discussed in Chapter 1 of the document?', 'answer': 'Chapter 1 discusses an introduction to multi-dimensionality and describes common responsibilities for developers using IBM Cognos TM1.', 'instruction': 'Context:Chapter 1. Introduction to TM1 Development\nThis section provides an introduction to the concept of multi-dimensionality and describes some common responsibilities of developers that use IBM Cognos TM1. What is the main topic discussed in Chapter 1 of the document?', 'response': 'Chapter 1 discusses an introduction to multi-dimensionality and describes common responsibilities for developers using IBM Cognos TM1.', 'text': "<s><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful, honest and harmless assistant designed

config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Adding EOS to train dataset:   0%|          | 0/103 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/103 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/103 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss
10,1.5574
20,1.0492
30,0.9262
40,0.6858
50,0.5445
60,0.427
70,0.3691


('final_model/tokenizer_config.json',
 'final_model/special_tokens_map.json',
 'final_model/chat_template.jinja',
 'final_model/tokenizer.model',
 'final_model/added_tokens.json',
 'final_model/tokenizer.json')

In [3]:
!zip -r final_model.zip final_model/

  adding: final_model/ (stored 0%)
  adding: final_model/adapter_config.json (deflated 56%)
  adding: final_model/README.md (deflated 66%)
  adding: final_model/special_tokens_map.json (deflated 79%)
  adding: final_model/adapter_model.safetensors (deflated 8%)
  adding: final_model/chat_template.jinja (deflated 52%)
  adding: final_model/tokenizer.json (deflated 85%)
  adding: final_model/tokenizer_config.json (deflated 69%)
  adding: final_model/tokenizer.model (deflated 55%)


In [4]:
from google.colab import files
files.download("final_model.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [7]:
!zip -r complete_checkpoint.zip complete_checkpoint/

from google.colab import files
files.download("complete_checkpoint.zip")

  adding: complete_checkpoint/ (stored 0%)
  adding: complete_checkpoint/adapter_config.json (deflated 56%)
  adding: complete_checkpoint/README.md (deflated 66%)
  adding: complete_checkpoint/special_tokens_map.json (deflated 79%)
  adding: complete_checkpoint/adapter_model.safetensors (deflated 8%)
  adding: complete_checkpoint/training_args.bin (deflated 52%)
  adding: complete_checkpoint/chat_template.jinja (deflated 60%)
  adding: complete_checkpoint/tokenizer.json (deflated 85%)
  adding: complete_checkpoint/tokenizer_config.json (deflated 69%)
  adding: complete_checkpoint/tokenizer.model (deflated 55%)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>