# Testing ⏭ GreedyLR Scheduler for 🤗 LLMs with DOE

## Need to reinstall from source to register changes

(may need to restart kernel)

In [None]:
# %pip install -r translation/requirements.txt
%pip install -e ~/transformers/  #Or wherever you downloaded this source

In [None]:
%pip install -U datasets peft==0.6.0 accelerate trl 

In [None]:
# %pip install typing-extensions --upgrade --quiet

In [None]:
import sys

sys.path.insert(0, '~/transformers/')

In [None]:
from transformers import AutoModelForCausalLM, Trainer, AutoTokenizer, GreedyLR, TrainingArguments
from datasets import load_dataset
from peft import PeftModel, LoraConfig

## Single Training Run

### Load dataset

In [None]:
from datasets import load_dataset

dataset = load_dataset("imdb", split="train")
dataset["text"][100]

### Load model

In [None]:
from datasets import load_dataset
from trl import SFTTrainer
from peft import LoraConfig
import torch


peft_config = LoraConfig(
    r=4,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

training_args = TrainingArguments(
    no_cuda=False, # Use CUDA if available
    per_device_train_batch_size=4,
    # gradient_accumulation_steps=4,
    gradient_checkpointing =False,
    max_grad_norm= 0.3,
    num_train_epochs=4, 
    learning_rate=1e-3,
    bf16=False,
    save_total_limit=3,
    logging_steps=10,
    output_dir='./greedylr_llm_results/',
    optim="adamw_hf",
    lr_scheduler_type="cosine",
    warmup_ratio=0.05, #for cosine
    factor=0.9 # for greedylr
)

trainer = SFTTrainer(
    "bigscience/bloom-560m",
    train_dataset=dataset,
    dataset_text_field="text",
    peft_config=peft_config,
    args=training_args
)

In [None]:
# Check for GPU
print(torch.cuda.is_available())
print(torch.cuda.device_count())
print(torch.cuda.get_device_name(0))

### Train!

#### Local Machine

In [None]:
torch.device("cuda")

In [None]:
%time

# Force instance to use GPU, assuming availability
with torch.device("cuda"):
    trainer.train()

#### SageMaker Training Job

In [None]:
# Output current environment packages to requirements.txt file
# ! pip freeze > requirements.txt

In [None]:
from datasets import load_dataset
from trl import SFTTrainer
from peft import LoraConfig
import torch
from sagemaker.remote_function import remote


@remote(instance_type="ml.g5.12xlarge", dependencies='./requirements2.txt', wait=False)
def finetune_model():
    peft_config = LoraConfig(
        r=16,
        lora_alpha=32,
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM",
    )

    training_args = TrainingArguments(
        per_device_train_batch_size=4,
        # gradient_accumulation_steps=4,
        gradient_checkpointing =False,
        max_grad_norm= 0.3,
        num_train_epochs=4, 
        learning_rate=1e-3,
        bf16=False,
        save_total_limit=3,
        logging_steps=10,
        output_dir='./greedylr_llm_results/',
        optim="adamw_hf",
        lr_scheduler_type="cosine",
        warmup_ratio=0.05, #for cosine
        factor=0.9 # for greedylr
    )

    trainer = SFTTrainer(
        "EleutherAI/gpt-neo-125m",
        train_dataset=dataset,
        dataset_text_field="text",
        peft_config=peft_config,
        args=training_args
    )
    
    trainer.train()
    
finetune_model()

## DOE

### Step 1: Create list of experiment parameters

In [None]:
# TODO: write function to build experiment_configs dictionary
# TODO: add all variables to config dictionary

experiment_configs = [
    {"model_name": "bigscience/bloom-560m", "dataset_name": "imdb", "r": 16, "lora_alpha": 8, "per_device_train_batch_size": 4}
]

### Step 2: Run Experiments

In [None]:
# TODO: store config dictionary in same folder as output results

def run_experiment(config):
    
    dataset = load_dataset(config["dataset_name"], split="train")
    
    peft_config = LoraConfig(
        r=config["r"],
        lora_alpha=config["lora_alpha"],
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM",
    )

    training_args = TrainingArguments(
        per_device_train_batch_size=config["per_device_train_batch_size"],
        # gradient_accumulation_steps=4,
        gradient_checkpointing =False,
        max_grad_norm= 0.3,
        num_train_epochs=4, 
        learning_rate=1e-3,
        bf16=False,
        save_total_limit=3,
        logging_steps=10,
        output_dir=f'./results/{config["model_name"]}_{config["dataset_name"]}_', #TODO: add UUID + scheduler type 
        optim="adamw_hf",
        lr_scheduler_type="cosine",
        warmup_ratio=0.05, #for cosine
        factor=0.9 # for greedylr
    )

    trainer = SFTTrainer(
        config["model_name"],
        train_dataset=dataset,
        dataset_text_field="text",
        peft_config=peft_config,
        args=training_args
    )
    
    trainer.train()

In [None]:
for config in experiment_configs:
    run_experiment(config)