In [None]:
# Instal Requirements

!pip install mlx-lm transformers datasets torch TensorFlow huggingface_hub ipywidgets

In [None]:
# https://medium.com/@anchen.li/fine-tune-llama3-with-function-calling-via-mlx-lm-5ebbee41558f

import os
import yaml
import mlx_lm
import ipywidgets
from huggingface_hub import HfApi
from huggingface_hub import login

proj_name="MyProject"
HFAPI="<HF_API_KEY>"
llamacpp_path="/Path/To/llama.cpp/"
ds_repo_id = "mzbac/function-calling-llama-3-format-v1.1"
hf_model="meta-llama/Meta-Llama-3-8B-Instruct"

proj_dir=f"/Users/xxxxxxxx/projects/{proj_name}/"
model_dir=f"{proj_dir}model/"
adapters_dir=f"{proj_dir}model/adapters/"
data_dir=f"{proj_dir}data/"
fused_dir=f"{proj_dir}model/fused/"
lora_config=f"{proj_dir}model/lora_config.yaml"
gguf_file=f"{proj_name}_fp16.GGUF"
out_gguf=f"{proj_dir}model/{gguf_file}"

In [None]:
# Confirm created variables

print(proj_dir)
print(adapters_dir)
print(data_dir)
print(fused_dir)
print(lora_config)
print(gguf_file)
print(out_gguf)

In [None]:
# Login to HuggingFace

login(HFAPI)

In [None]:
# Setup Project directory structure

os.mkdir(proj_dir)
os.chdir(proj_dir)
os.mkdir(model_dir)
os.mkdir(adapters_dir)
os.mkdir(data_dir)
os.mkdir(fused_dir)

In [None]:
# Download the training dataset
# Can optionaly copy file into data folder

!huggingface-cli download {ds_repo_id} --repo-type dataset --include "*.jsonl" --local-dir={data_dir}

In [None]:
# Setup LoRA configuration file
# !!Review closely for finetuning settings!!

yaml_content = {
    "model": hf_model,  # The path to the local model directory or Hugging Face repo.
    "train": True,  # Whether or not to train (boolean)
    "data": data_dir,  # Directory with {train, valid, test}.jsonl files
    "seed": 0,  # The PRNG seed
    "lora_layers": 32,  # Number of layers to fine-tune
    "batch_size": 1,  # Minibatch size.
    "iters": 100,  # Iterations to train for.
    "val_batches": 25,  # Number of validation batches, -1 uses the entire validation set.
    "learning_rate": 1e-6,  # Adam learning rate.
    "steps_per_report": 10,  # Number of training steps between loss reporting.
    "steps_per_eval": 200,  # Number of training steps between validations.
    "resume_adapter_file": None,  # Load path to resume training with the given adapter weights.
    "adapter_path": adapters_dir,  # Save/load path for the trained adapter weights.
    "save_every": 1000,  # Save the model every N iterations.
    "test": False,  # Evaluate on the test set after training
    "test_batches": 100,  # Number of test set batches, -1 uses the entire test set.
    "max_seq_length": 8192,  # Maximum sequence length.
    "grad_checkpoint": True,  # Use gradient checkpointing to reduce memory use.
    "lora_parameters": {
        "keys": ['mlp.gate_proj', 'mlp.down_proj', 'self_attn.q_proj', 'mlp.up_proj', 'self_attn.o_proj','self_attn.v_proj', 'self_attn.k_proj'],  # The layer keys to apply LoRA to.
        "rank": 128,  # LoRA rank
        "alpha": 256,  # LoRA alpha
        "scale": 10.0,  # LoRA scale
        "dropout": 0.05  # LoRA dropout
    }
    # Uncomment to use the lr_schedule
    # "lr_schedule": {
    #     "name": "cosine_decay",
    #     "warmup": 100,  # 0 for no warmup
    #     "warmup_init": 1e-7,  # 0 if not specified
    #     "arguments": [1e-6, 1000, 1e-7]  # passed to scheduler
    # }
}

# Write the YAML content to the file
with open(lora_config, "w") as f:
    yaml.dump(yaml_content, f, default_flow_style=False)
    

## ***Warning!*** Next step starts fine tuning.
#### !!Close all unneeded applications before proceeding!!

In [None]:
# Start Fine Tuning
# !!Close all unneeded applications before proceeding!!
# This step wil download the model if not already downloaded

!mlx_lm.lora --config={lora_config}

In [None]:
# Fuse the trained model with teh base model

os.chdir(model_dir)
!mlx_lm.fuse --model {hf_model} --save-path {fused_dir}

In [None]:
# Export a float16 GGUF version of the fine tuned model
# https://github.com/ggerganov/llama.cpp

!python3 {llamacpp_path}/convert.py {fused_dir} --outtype f16 --outfile {out_gguf} --vocab-type bpe

In [None]:
#Create new HF Model Repo

api = HfApi()
hf_url = api.create_repo(
    repo_id = proj_name,
    repo_type = "model",
    private = True
)

In [None]:
# Upload the created model folder to HuggingFace
# This includes the adapters, fused model, and GGUF model

api.upload_folder(
    repo_id = hf_url.repo_id,
    repo_type = "model",
    folder_path = f"{proj_dir}model/"
)

In [None]:
# Upload only the created GGUF file to HF

#api.upload_file(
#    path_or_fileobj = out_gguf, 
#    path_in_repo = gguf_file,
#    repo_id = hf_url.repo_id,
#    repo_type = "model"
#)