<a href="https://colab.research.google.com/github/ubiodee/Plutus_Demo/blob/main/Fine_Tuned_Plutus_Language_Llama3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Fine Tuning Plutus programming language in Llama3
In this notebook, we will use Llama 3 to train the Plutus programming language.

This Notebook runs in Google Colab and is designed to run on a T4

# Setup the Model
The following section performs all the setup of the model.
This includes

- Installing any dependencies
- Setting any configuration
- Downloading the Base Model

## Install dependencies
In order to get started we need to install the appropriate dependencies

In [None]:
# install dependencies

# we use the latest version of transformers, peft, and accelerate
!pip install -q accelerate peft transformers

# install bitsandbytes for quantization
!pip install -q bitsandbytes

# install trl for the SFT library
!pip install -q trl

# we need sentencepiece for the llama2 slow tokenizer
!pip install sentencepiece

# we need einops, used by falcon-7b, llama-2 etc
# einops (einsteinops) is used to simplify tensorops by making them readable
!pip install -q -U einops

# we need to install datasets for our training dataset
!pip install -q datasets

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m296.4/296.4 kB[0m [31m17.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m137.5/137.5 MB[0m [31m16.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m280.1/280.1 kB[0m [31m19.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.7/105.7 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m527.3/527.3 kB[0m [31m33.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m39.9/39.9 MB[0m [31m52.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━

## Settings
The following configures our settings for finetuning our model

In [None]:
# The model that you want to train from the Hugging Face hub
model_name = "NousResearch/Meta-Llama-3-8B"

# The instruction dataset to use
dataset_name = "ubiodee/plutus"

# Fine-tuned model name
new_model = "llama-3-8b-plutus"

# Output directory where the model predictions and checkpoints will be stored
output_dir = "./results"

# Number of training epochs
num_train_epochs = 10

## Download the base model
The following will download the base model, in this case the llama-3-7b-chat-hf model.

In [None]:
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    pipeline,
    logging,
)

# Use 4-bit quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16
)

# Load base model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
quantization_config=bnb_config,
    # use the gpu
    device_map={"": 0},
    offload_folder="offload",  # Optionally specify a folder for offloading


)
model.gradient_checkpointing_enable()
# don't use the cache
model.config.use_cache = False

# Load the tokenizer from the model (llama2)
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, use_fast=False)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/177 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

# Run the Model
The following tests the capabilities of the language model prior to fine tuning.

In [None]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)



# Run text generation pipeline with our next model
#prompt = "What does the delay keyword stand for in Plutus?"
#prompt = "What is the importance of plutus programming language?"
#prompt = "What is the history of plutus?"
#prompt = "How does staged metaprogramming contribute to the security of Plutus Tx"
#prompt = "Write a hello world program in the plutus programming language, include an explanation of the code "
prompt = "What is the relationship between Plutus Tx and Haskell?. Explain your response to a novice "
#prompt = "Write a Hello Chris program in plutus"
#prompt = "Write a program that checks if a given year, is a leap year in plutus"
#prompt = "Write a hello world program in the Plutus programming language. "
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=1000)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

<s>[INST] What is the relationship between Plutus Tx and Haskell?. Explain your response to a novice  [/INST]  Plutus Tx and Haskell are two distinct programming languages with different functionalities and use cases, although they share some similarities.

Haskell is a purely functional programming language, which means that it emphasizes the use of pure functions, immutability, and the avoidance of side effects. Haskell is statically typed, meaning that the type of every expression is known at compile time, and it has a strong focus on type inference, which means that the programmer does not need to explicitly specify the types of variables or expressions in many cases.

Plutus Tx, on the other hand, is a domain-specific language (DSL) for building smart contracts on the Ethereum blockchain. It is designed to be easy to use and to provide a high-level abstraction over the low-level details of Ethereum smart contract development. Plutus Tx is dynamically typed, meaning that the type o

# Train the Model
The following section is about taking your dataset and then finetuning the model

## Load Dataset
The following code will load your dataset, ready to be fine tuned by the model

In [None]:
from datasets import load_dataset

# Assuming you've uploaded the dataset files to /content in Colab
dataset = load_dataset('json', data_files={'train': '/content/combined_train_cleaned.jsonl'})

Generating train split: 0 examples [00:00, ? examples/s]

## Fine Tune the Model
The following section will take your dataset, and fine tune the model with it.

In [None]:
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer, SFTConfig
from datasets import load_dataset

# Define model and tokenizer identifiers
model_checkpoint = "NousResearch/Meta-Llama-3-8B"  # Replace with actual model identifier
tokenizer_checkpoint = "NousResearch/Meta-Llama-3-8B"  # Replace with actual tokenizer identifier

# Ensure you are authenticated if accessing private models
from huggingface_hub import login

# Load LoRA configuration
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
)

# Set training parameters
training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=5,  # uses the number of epochs earlier
    per_device_train_batch_size=1,  # 4 seems reasonable
    gradient_accumulation_steps=8,  # 2 is fine, as we're a small batch
    optim="paged_adamw_32bit",  # default optimizer
    save_steps=0,  # we're not gonna save
    logging_steps=10,  # same value as used by Meta
    learning_rate=2e-4,  # standard learning rate
    weight_decay=0.001,  # standard weight decay 0.001
    fp16=True,  # set to true for A100
    bf16=False,  # set to true for A100
    max_grad_norm=0.3,  # standard setting
    max_steps=-1,  # needs to be -1, otherwise overrides epochs
    warmup_ratio=0.03,  # standard warmup ratio
    group_by_length=True,  # speeds up the training
    lr_scheduler_type="cosine",  # constant seems better than cosine
    report_to="tensorboard"
)

# Set supervised fine-tuning parameters using SFTConfig
sft_config = SFTConfig(
    output_dir=output_dir,
    dataset_text_field="text",  # field containing text data
    max_seq_length=1024,  # set max sequence length
    packing=False
)

# Load the dataset from the JSONL file
dataset = load_dataset('json', data_files={'/content/combined_train_cleaned.jsonl'})

# Initialize the tokenizer
tokenizer = AutoTokenizer.from_pretrained(tokenizer_checkpoint)
# Add a padding token to the tokenizer
tokenizer.pad_token = tokenizer.eos_token
# Load the model using `BitsAndBytesConfig` to enable 4-bit quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

# Initialize the model with quantization, removing redundant kwargs
model = AutoModelForCausalLM.from_pretrained(
    model_checkpoint,
    quantization_config=bnb_config,
    device_map="auto", # Map the model to the first GPU


)
print(dataset)

# Print the first few samples to inspect
print(dataset['train'][0])
# Initialize the SFTTrainer
sft_trainer = SFTTrainer(
    model=model,
    train_dataset=dataset['train'],
    peft_config=peft_config,                # use our lora peft config
    dataset_text_field="text",
    max_seq_length=None,                    # No max sequence length
    tokenizer=tokenizer,                    # Use the llama tokenizer
    args=training_arguments,                # Use the training arguments
    packing=False,
)

# Train model
sft_trainer.train()

# Save trained model
sft_trainer.model.save_pretrained('new_model')

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['text'],
        num_rows: 591
    })
})
{'text': '<s>[INST] What is Plutus? [/INST] Plutus is the smart contract platform and language of the Cardano blockchain, allowing developers to write decentralized applications (dApps) and smart contracts in a secure and robust manner using Haskell programming language.</s>'}



Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/591 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss
10,3.0035
20,2.906
30,2.1708
40,1.953
50,2.1497
60,1.7846
70,1.9341
80,1.612
90,1.8366
100,1.5394


# Run the Model
The following runs the model post fine tune

In [None]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)
import textwrap

def format_code(code, width=80):
    return "\n".join(textwrap.wrap(code, width=width))

# Run text generation pipeline with our next model
prompt = "What does the delay keyword stand for in Plutus?"
#prompt = "What is the importance of plutus programming language?"
#prompt = "What is the history of plutus?"
#prompt = "How does staged metaprogramming contribute to the security of Plutus Tx"
#prompt = "Write a hello world program in the plutus programming language, include an explanation of the code "
#prompt = "What is the relationship between Plutus Tx and Haskell?. "
#prompt = "Write a Hello Chris program in plutus"
#prompt = "Write a program that checks if a given year, is a leap year in plutus"
#prompt = "Write a basic plutusTx script that always succeeds and passes validation"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=2000)
result = pipe(f"<s>[INST] {prompt} [/INST]")
formatted_code = format_code(result[0]['generated_text'])
print(formatted_code)


<s>[INST] What does the delay keyword stand for in Plutus? [/INST] The delay
keyword is used in Plutus to define the number of blocks a script should be
executed after a transaction is included in a block.</s> How long a script
should be executed after a transaction is included in a block.</s> </s> In
Plutus, the delay keyword is used to specify the number of blocks a script
should be executed after a transaction is included in a block.</s> </s> The
delay keyword is used to determine how long a script should be executed after a
transaction is included in a block, allowing developers to control the duration
of script execution.</s> </s> </s> ``` delay 100 ``` </s> This code specifies
that the script should be executed 100 blocks after the transaction is included
in a block.</s> </s> </s> The delay keyword is useful for controlling the length
of script execution and ensuring that scripts are only executed when necessary,
reducing unnecessary computation and improving network efficiency.<

In [None]:
# %load_ext tensorboard
# %tensorboard --logdir results/runs

In [None]:
# Empty VRAM
del model
del pipe
del sft_trainer # Changed 'trainer' to 'sft_trainer'
import gc
gc.collect()
gc.collect()

23866

In [None]:
# Reload model in FP16 and merge it with LoRA weights
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map="auto",
)
model = PeftModel.from_pretrained(base_model, new_model)
model = model.merge_and_unload()

# Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

ValueError: Can't find 'adapter_config.json' at 'llama-2-7b-plutus'

In [None]:
!huggingface-cli login

model.push_to_hub(new_model, use_temp_dir=False)
tokenizer.push_to_hub(new_model, use_temp_dir=False)


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|
    
    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Token: 
Add token as git credential? (Y/n) n
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/mlabonne/llama-2-7b-miniguanaco/commit/c81a32fd0b4d39e252326e639d63e75aa68c9a4a', commit_message='Upload tokenizer', commit_description='', oid='c81a32fd0b4d39e252326e639d63e75aa68c9a4a', pr_url=None, pr_revision=None, pr_num=None)