In [1]:
!pip3 install -q -U bitsandbytes==0.42.0
!pip3 install -q -U peft==0.8.2
!pip3 install -q -U trl==0.7.10
!pip3 install -q -U accelerate==0.27.1
!pip3 install -q -U datasets==2.17.0
!pip3 install -q -U transformers==4.38.0
!pip3 install calflops



### Pipeline and classes set up, no need to modify while doing experiments

In [2]:
# Setting up packages
import os
import torch
import transformers
import logging
import json
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, GemmaTokenizer
from datasets import load_dataset
from peft import LoraConfig
from trl import SFTTrainer
from google.colab import userdata
from google.colab import drive
from datetime import datetime
from datetime import timezone
drive.mount('/content/drive')

# Huggingface token
os.environ["HF_TOKEN"] = userdata.get('HF_TOKEN')

# Setup device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Setup logging
time_stamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
file_id = f'log_{time_stamp}.txt'
logging.basicConfig(format='%(asctime)s %(message)s',
                     datefmt='%m/%d/%Y %I:%M:%S %p',
                     filename=file_id,
                     level=logging.INFO,
                    force=True)
logger = logging.getLogger(__name__)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
class LargeLanguageModel():
    def __init__(self,model_id,bnb_config = False):
        self.model_id = model_id

        # hyperparameters
        with open('/content/drive/MyDrive/LLMs/code/params.json', 'r') as file:
            params = json.load(file)
        self.train_params =params["training"]
        lora_params =params["lora"]
        self.token = os.environ['HF_TOKEN']
        self.bnb_config = bnb_config

        # Model object
        self.model = AutoModelForCausalLM.from_pretrained(self.model_id,
                                             quantization_config=self.bnb_config,
                                             device_map={"":0},
                                             token=self.token)
        # Tokenizer object
        self.tokenizer = AutoTokenizer.from_pretrained(model_id, token=self.token)

        # Lora object
        self.lora_config = LoraConfig(
                                    r=lora_params["r"],
                                    target_modules=lora_params["target_modules"],
                                    task_type=lora_params["task_type"],
                                )

        logging.info(f"LLM class instantiated for model: {model_id}")
        logging.info(f"Hyperparameters: \n{params}")


    def generate_example(self,text):
        inputs = self.tokenizer(text, return_tensors="pt").to(device)
        outputs = self.model.generate(**inputs, max_new_tokens=50)
        return self.tokenizer.decode(outputs[0], skip_special_tokens=True)

    def formatting_func(self,example):
        text = f"<start_of_turn>user\n{example['instruction'][0]}<end_of_turn> <start_of_turn>model\n{example['response'][0]}<end_of_turn>"
        return [text]

    def finetune(self,dataset):
        logging.info("Finetuning started")
        trainer = SFTTrainer(
            model = self.model,
            train_dataset = dataset.data["train"],
            args=transformers.TrainingArguments(
                per_device_train_batch_size = self.train_params["per_device_train_batch_size"],
                gradient_accumulation_steps = self.train_params["gradient_accumulation_steps"],
                warmup_steps = self.train_params["warmup_steps"],
                max_steps = self.train_params["max_steps"],
                learning_rate = self.train_params["learning_rate"],
                fp16 = self.train_params["fp16"],
                logging_steps = self.train_params["logging_steps"],
                output_dir = self.train_params["output_dir"],
                optim = self.train_params["optim"]
            ),
            peft_config = self.lora_config,
            formatting_func = self.formatting_func,
        )

        trainer.train()

        logging.info("Finetuning completed")
        logging.info('Allocated memory:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
        logging.info('Cached memory:', round(torch.cuda.memory_cached(0)/1024**3,1), 'GB')
        logging.info("Trainer log:\n",trainer.state.log_history)


class Dataset():
    def __init__(self):
        self.data = None

    def load_data(self, dataset_id):
        logging.info(f"Loading dataset: {dataset_id}")
        self.data = load_dataset(dataset_id)
        logging.info("Loading dataset completed")
        logging.info('Allocated memory:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
        logging.info('Cached memory:', round(torch.cuda.memory_cached(0)/1024**3,1), 'GB')


    def print_dataset_values(self):
        print(self.data['train'])

### Running experiments, modify model name, dataset. Modify hiperparams from json

In [4]:
# Defining model and dataset
model_id = "google/gemma-2b"
dataset_id = "databricks/databricks-dolly-15k"

# Setting up Quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# Generating objects
llm = LargeLanguageModel(model_id, bnb_config)
dataset = Dataset()
dataset.load_data(dataset_id)
dataset.print_dataset_values()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Dataset({
    features: ['instruction', 'context', 'response', 'category'],
    num_rows: 15011
})


--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1100, in emit
    msg = self.format(record)
  File "/usr/lib/python3.10/logging/__init__.py", line 943, in format
    return fmt.format(record)
  File "/usr/lib/python3.10/logging/__init__.py", line 678, in format
    record.message = record.getMessage()
  File "/usr/lib/python3.10/logging/__init__.py", line 368, in getMessage
    msg = msg % self.args
TypeError: not all arguments converted during string formatting
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py", line 37, in <module>
    ColabKernelApp.launch_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    a

In [5]:
# Finetuning
llm.finetune(dataset)

# Save log
cp log* "/content/drive/MyDrive/LLMs/code/logs/"



Map:   0%|          | 0/15011 [00:00<?, ? examples/s]



Step,Training Loss
1,2.4376
2,2.4391
3,2.1578
4,2.4924
5,2.1474
6,2.1373
7,1.9214
8,1.9731
9,1.7617
10,1.7584


--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1100, in emit
    msg = self.format(record)
  File "/usr/lib/python3.10/logging/__init__.py", line 943, in format
    return fmt.format(record)
  File "/usr/lib/python3.10/logging/__init__.py", line 678, in format
    record.message = record.getMessage()
  File "/usr/lib/python3.10/logging/__init__.py", line 368, in getMessage
    msg = msg % self.args
TypeError: not all arguments converted during string formatting
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py", line 37, in <module>
    ColabKernelApp.launch_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    a

In [8]:
text = """<start_of_turn>user
Can you explain how to use WNetAddConnection to map a network share?<end_of_turn>
<start_of_turn>model"""
llm.generate_example(text)

"<start_of_turn>user\nCan you explain how to use WNetAddConnection to map a network share?<end_of_turn>\n<start_of_turn>model\nWNetAddConnection is a Windows API that allows you to map a network share to a drive letter. Here's how you would use it:\n\n1. Import the WNetAddConnectionW API into your program.\n2. Call"