In [5]:
%pip install -q datasets transformers peft trl python-dotenv torch bitsandbytes accelerate

In [6]:
import os
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import LoraConfig, get_peft_model
from trl import SFTConfig, SFTTrainer
from dotenv import load_dotenv

In [7]:
from getpass import getpass
from huggingface_hub import login

In [8]:
hf_token = getpass("Enter your Hugging Face token (input hidden): ")
login(hf_token)

Enter your Hugging Face token (input hidden): ··········


In [9]:
model_name = "meta-llama/Llama-3.1-8B-Instruct"

In [10]:
from google.colab import files
uploaded = files.upload()

Saving train.jsonl to train.jsonl


In [11]:
file_name = next(iter(uploaded.keys()))

from datasets import load_dataset

dataset = load_dataset(
    "json",
    data_files={ "train": file_name },
    split="train",
    encoding="utf-8"
)

Generating train split: 0 examples [00:00, ? examples/s]

In [12]:
print(dataset)

Dataset({
    features: ['instruction', 'input', 'output'],
    num_rows: 9000
})


In [13]:
tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_4bit=True,
    device_map="auto",
    token=hf_token,
)

lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    target_modules=[
        "q_proj", "k_proj", "v_proj",
        "o_proj", "gate_proj", "up_proj", "down_proj"
    ],
)

model = get_peft_model(model, lora_config)

training_args = SFTConfig(
    output_dir="llama3-lora",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    num_train_epochs=1,
    learning_rate=2e-4,
    logging_steps=1,
    save_steps=10,
)

def format_example(example):
    return f"User: {example['input']}\nAssistant: {example['output']}"

trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    processing_class=tokenizer,
    formatting_func=format_example,
)

trainer.train()

model.save_pretrained("llama3-lora")
tokenizer.save_pretrained("llama3-lora")
print("Model saved to: llama3-lora/")

tokenizer_config.json:   0%|          | 0.00/55.4k [00:00<?, ?B/s]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/855 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

Applying formatting function to train dataset:   0%|          | 0/9000 [00:00<?, ? examples/s]

Adding EOS to train dataset:   0%|          | 0/9000 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/9000 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/9000 [00:00<?, ? examples/s]

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 128009, 'pad_token_id': 128009}.
  | |_| | '_ \/ _` / _` |  _/ -_)
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
[34m[1mwandb[0m: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mttran19[0m ([33mttran19-umbc[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss
1,2.7635
2,2.7996
3,2.3706
4,2.6385
5,2.4282
6,2.152
7,2.2301
8,2.2109
9,2.3701
10,2.3718


Model saved to: llama3-lora/


In [14]:
from huggingface_hub import HfApi, login


model_folder = "llama3-lora"
repo_id = "ttran19/llama3-lora-671"

api = HfApi(token=hf_token)

api.upload_folder(
    folder_path=model_folder,
    repo_id=repo_id,
    repo_type="model",
    commit_message="Upload LoRA adapter",
)


It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`hf upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder.


Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ...kpoint-100/tokenizer.json: 100%|##########| 17.2MB / 17.2MB            

  ...ckpoint-1000/scheduler.pt: 100%|##########| 1.47kB / 1.47kB            

  ...point-1000/tokenizer.json: 100%|##########| 17.2MB / 17.2MB            

  ...eckpoint-10/rng_state.pth:  77%|#######7  | 11.3kB / 14.6kB            

  ...kpoint-1010/rng_state.pth:  77%|#######7  | 11.3kB / 14.6kB            

  ...heckpoint-10/optimizer.pt:   0%|          |  118kB /  336MB            

  ...ckpoint-1010/optimizer.pt:   0%|          |  118kB /  336MB            

  ...adapter_model.safetensors:   0%|          |  563kB /  168MB            

  ...adapter_model.safetensors:   0%|          |  557kB /  168MB            

  ...point-1010/tokenizer.json:  48%|####8     | 8.33MB / 17.2MB            

CommitInfo(commit_url='https://huggingface.co/ttran19/llama3-lora-671/commit/f35c9a8300555531dc67a1714d99d9dc6dd26c2d', commit_message='Upload LoRA adapter', commit_description='', oid='f35c9a8300555531dc67a1714d99d9dc6dd26c2d', pr_url=None, repo_url=RepoUrl('https://huggingface.co/ttran19/llama3-lora-671', endpoint='https://huggingface.co', repo_type='model', repo_id='ttran19/llama3-lora-671'), pr_revision=None, pr_num=None)