# LLM Adapter GLUE Benchmark

Fit and test LLM Adapters (tiny-bert as pretrained) on GLUE datasets
* Fitting
    - Without privacy budget
    - With privacy budget - (ε,δ)
* Adapters:
    - Soft Prompt
    - Prefix
    - LoRA
    - (IA)^3
    - Soft Prompt + LoRA
    - Prefix + LoRA
    - Full Finetuning
    - Top Layer Only Finetuning
* Datasets:
    - SST2
    - QNLI
    - MNLI
    - QQP
* Metrics:
    - Classification Accuracy

## Environment Setup

In [None]:
from google.colab import drive
drive.mount('/content/drive/')
project_root_dir = "/content/drive/MyDrive/Colab Notebooks/llm_adapters_comparison/"

In [None]:
%cd /content/drive/MyDrive/Colab Notebooks/llm_adapters_comparison/
%load_ext autoreload
%autoreload 2
%matplotlib inline

### Install and import Python libraries

In [None]:
!pip install tqdm lap huggingface_hub transformers opacus peft

In [None]:
!pip install -U datasets fsspec

In [None]:
import os
import sys
import torch
import time
import gc

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from datetime import datetime
sys.path.append(os.path.join(project_root_dir, 'src'))

from google.colab import userdata
print("Token\n",userdata.get('HF_TOKEN'))

from huggingface_hub import login
login()

## Load Data

### Save Data to Google Drive

In [None]:
from datasets import config
from datasets import load_dataset

sst2 = load_dataset("glue", "sst2")
qnli = load_dataset("glue", "qnli")
mnli = load_dataset("glue", "mnli")
qqp = load_dataset("glue", "qqp")

glue_dirs = {}
data_save_path = os.path.join(project_root_dir, 'data')
os.makedirs(data_save_path, exist_ok=True)
for task in ["sst2", "qnli", "mnli", "qqp"]:
    dataset = load_dataset("glue", task)
    save_path = os.path.join(data_save_path,f"{task}")
    dataset.save_to_disk(save_path)
    glue_dirs[task] = save_path

### Load Data from GDrive

In [None]:
from datasets import load_from_disk
data_save_path = os.path.join(project_root_dir, 'data')
glue_ds = {
    task: load_from_disk(os.path.join(data_save_path,f"{task}"))
    for task in ["sst2", "qnli", "mnli", "qqp"]
}

## Adapter Training & Testing

In [None]:
import utils
import data_preparation
from train import adapter_training_testing, train_test_all

### Without Privacy Settings
* Epsilon=Inf

In [None]:
hyper_parameter_config_wo_privacy \
= utils.load_json_file(
    os.path.join(project_root_dir, 'hyper_parameter_config_wo_privacy.json'))

output_path = os.path.join(project_root_dir, 'output')
timestamp = datetime.now().strftime("%Y-%m-%d %H-%M")
file_tag = "eps_inf"
output_path = os.path.join(output_path, f"{file_tag}_{timestamp}")
os.makedirs(output_path, exist_ok=True)
model_name = "prajjwal1/bert-tiny"
weight_decay=0.01
scheduler=True
adapter_method_list = ["soft_prompt", "prefix", "lora", "ia3",
                       "soft_prompt_plus_lora", "prefix_plus_lora",
                       "single_layer_finetuning","full_finetuning"]
dataset_list=["sst2","qnli","mnli","qqp"]
train_test_all(
    model_name = model_name,
    hyper_parameter_config = hyper_parameter_config_wo_privacy,
    datasets = glue_ds,
    output_path = output_path,
    adapter_method_list=adapter_method_list,
    dataset_list=dataset_list,
    scheduler=scheduler,
    weight_decay=weight_decay,
    file_tag = file_tag
)

### With privacy settings
* Epsilon=8

In [None]:
hyper_parameter_config_w_privacy \
= utils.load_json_file(
    os.path.join(project_root_dir, 'hyper_parameter_config_w_privacy.json'))

output_path = os.path.join(project_root_dir, 'output')
timestamp = datetime.now().strftime("%Y-%m-%d %H-%M")
file_tag = "eps_8"
output_path = os.path.join(output_path, f"{file_tag}_{timestamp}")
os.makedirs(output_path, exist_ok=True)
model_name = "prajjwal1/bert-tiny"
weight_decay=0.01
scheduler=True
adapter_method_list = ["soft_prompt", "prefix", "lora", "ia3",
                       "soft_prompt_plus_lora", "prefix_plus_lora",
                       "single_layer_finetuning","full_finetuning"]
dataset_list=["sst2","qnli","mnli","qqp"]
train_test_all(
    model_name = model_name,
    hyper_parameter_config = hyper_parameter_config_w_privacy,
    datasets = glue_ds,
    output_path = output_path,
    adapter_method_list=adapter_method_list,
    dataset_list=dataset_list,
    scheduler=scheduler,
    weight_decay=weight_decay,
    file_tag = file_tag
)