In [None]:
# Installs Unsloth, Xformers (Flash Attention) and all other packages!
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps "xformers<0.0.27" "trl<0.9.0" peft accelerate bitsandbytes
!pip install optuna
!pip install optuna-integration
!pip install torch --index-url https://download.pytorch.org/whl/cu118


Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-bk7ld4uo/unsloth_1cd322656a7248249c811ec4f6f3228d
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-bk7ld4uo/unsloth_1cd322656a7248249c811ec4f6f3228d
  Resolved https://github.com/unslothai/unsloth.git to commit 0f2e484f3931d1a558dc3a5967c8da665a2e7252
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone


In [None]:
from unsloth import FastLanguageModel
import torch
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported
import optuna
from cleantext import clean
import pandas as pd
import datasets
import time

In [None]:
def clear_memory():
    gc.collect()
    torch.cuda.empty_cache()
    if torch.cuda.is_available():
        torch.cuda.synchronize()
        torch.cuda.ipc_collect()
    try:
        del model
    except:
        pass
    try:
        del tokenizer
    except:
        pass
    try:
        del trainer
    except:
        pass


In [None]:
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }
pass

from datasets import load_dataset
dataset = load_dataset("twright/gem_training", split = "train")
dataset = dataset.map(formatting_prompts_func, batched = True,)

In [None]:
def make_modelz(lora_alpha,lora_dropout,r):
    clear_memory()
    max_seq_length = 512 # Choose any! We auto support RoPE Scaling internally!
    dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
    load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = "google/gemma-2-9b-it",
        max_seq_length = max_seq_length,
        dtype = dtype,
        load_in_4bit = load_in_4bit,
        token = "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"
    )
    model = FastLanguageModel.get_peft_model(
    model,
    r = r, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = lora_alpha,
    lora_dropout = lora_dropout, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)
return model, tokenizer

def objective(trial):
    
    clear_memory()

    lora_alpha = trial.suggest_int("lora_alpha", 16, 256)
    lora_dropout = trial.suggest_float("lora_dropout", 0.15, 0.5)
    r = trial.suggest_categorical("r", [0.25, 0.5, 1, 2])
    r = float(r) * lora_alpha
    r = int(math.ceil(r))
    model, tokenizer = make_modelz(lora_alpha,lora_dropout,r)
    weight_decay= trial.suggest_float("weight_decay", 1e-4, 1e-1, log=True)
    warmup_steps= trial.suggest_int("warmup_steps", 2, 10)
    learning_rate = trial.suggest_float('learning_rate', 1e-6, 1e-3, log=True)
    num_train_epochs = trial.suggest_int("num_train_epochs", 2, 8)
    trainer = SFTTrainer(
        model = model,
        tokenizer = tokenizer,
        train_dataset = dataset['train'],
        test_dataset = dataset['test'],
        dataset_text_field = "just_purpose",
        max_seq_length = max_seq_length,
        dataset_num_proc = 2,
        packing = False, # Can make training 5x faster for short sequences.
        args = TrainingArguments(
            per_device_train_batch_size = 2,
            gradient_accumulation_steps = 4,
            warmup_steps = warmup_steps,
            learning_rate = learning_rate,
            fp16 = not is_bfloat16_supported(),
            bf16 = is_bfloat16_supported(),
            logging_steps = 1,
            optim = "adamw_8bit",
            weight_decay = weight_decay,
            lr_scheduler_type = "linear",
            seed = 3407,
            output_dir = "outputs",
            gradient_checkpointing=True,
            model_parallel=True
        ),
    )
    try:
        del model
    except:
        pass
    trainer_stats = trainer.train()
    eval_result = trainer.evaluate()
    print(eval_result)
    clear_memory()
    time.sleep(20)
    print("sleepy")
    return eval_result['eval_loss']

In [None]:

study.optimize(objective, n_trials=10, show_progress_bar=True, gc_after_trial=True)

# Get the best hyperparameters
best_hyperparameters = study.best_params
print(best_hyperparameters)

# Plot parameter importances
plot_param_importances(study)