#### **Set an environment**

In [None]:
!pip install datasets
!pip install tensorboard
!pip install huggingface_hub
!pip install -q transformers accelerate

In [None]:
import torch
from transformers import set_seed

DEVICE = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
MODEL_NAME = "roberta-base"
SEED = 42

print(DEVICE)
set_seed(SEED)

#### **Load & preprocess a dataset**

In [None]:
# load a dataset.
from datasets import Dataset

data = Dataset.from_json("HW4.json") # you must upload a HW4.json before execute this line.
data = data.rename_column("intent", "labels")

# split a dataset into train, validation and dev(test) dataset.
raw_train_data = data.filter(lambda item: item["split"] == "train")
split_data = raw_train_data.train_test_split(test_size=0.1, seed=SEED)
train_data = split_data["train"]
val_data = split_data["test"]

dev_data = data.filter(lambda item: item["split"] == "dev")

print(f"Train dataset size: {len(train_data)}")
print(f"Validation dataset size: {len(val_data)}")
print(f"Dev dataset size: {len(dev_data)}")

In [None]:
import ast

# label2id & id2label
unique_labels = set()
for item in data:
  item["labels"] = ast.literal_eval(item["labels"])
  for intent in item["labels"]:
    unique_labels.add(intent)

unique_labels = sorted(list(unique_labels))
num_labels = len(unique_labels)

label2id = {label: idx for idx, label in enumerate(unique_labels)}
id2label = {idx: label for idx, label in enumerate(unique_labels)}

In [None]:
def encode_labels(item):
  label_vector = [0]*num_labels
  item["labels"] = ast.literal_eval(item["labels"])

  for label in item["labels"]:
    label_vector[label2id[label]] = 1

  item["labels"] = [float(x) for x in label_vector]
  return item

train_data = train_data.map(encode_labels)
val_data = val_data.map(encode_labels)
dev_data = dev_data.map(encode_labels)

In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

def tokenize_function(item):
  return tokenizer(item["utterance"], padding="max_length", truncation=True)

train_data = train_data.map(tokenize_function, batched=True)
val_data = val_data.map(tokenize_function, batched=True)
dev_data = dev_data.map(tokenize_function, batched=True)

In [None]:
import numpy as np
from sklearn.metrics import precision_recall_fscore_support, accuracy_score

def compute_metrics(eval_pred):
  logits, labels = eval_pred

  top2_predictions = np.zeros_like(logits, dtype=np.int32)
  for i, logit in enumerate(logits):
    top2_indices = np.argsort(logit)[-2:]
    top2_predictions[i, top2_indices] = 1

  precision, recall, f1, _ = precision_recall_fscore_support(labels, top2_predictions, average="micro")
  accuracy = accuracy_score(labels, top2_predictions)

  return {
      "accuracy": accuracy,
      "precision": precision,
      "recall": recall,
      "f1": f1
  }

In [None]:
def quantitative_eval(metrics):
  print(f"Accuracy: {metrics['test_accuracy']:.2f}")
  print(f"Precision: {metrics['test_precision']:.2f}")
  print(f"Recall: {metrics['test_recall']:.2f}")
  print(f"F1: {metrics['test_f1']:.2f}")

### **1. Fine-tune RoBERTa models**

#### **Define hyperparameters**

In [None]:
from transformers import TrainingArguments

argument = TrainingArguments(
    output_dir="./results",
    num_train_epochs=10,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    learning_rate=1e-3,
    max_grad_norm=1.0,
    weight_decay=0.0,
    lr_scheduler_type="cosine",
    warmup_ratio=0.1,
    dataloader_num_workers=2,
    dataloader_drop_last=True,
    fp16=True,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    report_to="tensorboard",
    seed=SEED,
)

In [None]:
from peft import LoraConfig

lora_config = LoraConfig(
    r=8,                              # Rank
    lora_alpha=32,                    # Scaling factor
    target_modules=["query", "key"],  # Attention
    lora_dropout=0.1,                 # Dropout rate
    task_type="SEQ_CLS",              # Task type
)

#### **Define functions for training**

In [None]:
from transformers import AutoModelForSequenceClassification
from peft import get_peft_model

model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    problem_type="multi_label_classification",
    num_labels=num_labels,
    id2label=id2label,
    label2id=label2id
).to(DEVICE)

model = get_peft_model(model, lora_config)

In [None]:
from transformers import Trainer

# define a trainer for training
trainer = Trainer(
    model=model,
    args=argument,
    train_dataset=train_data,
    eval_dataset=val_data,
    processing_class=tokenizer,
    compute_metrics=compute_metrics,
)

#### **Train & test RoBERTa models**
1. Test a vanilla RoBERTa model.
2. Fine-tune a model.
3. Test a fine-tuned RoBERTa model.

##### **Teat a vanilla RoBERTa model**

In [None]:
# evaluate a vanilla RoBERTa model
logits, true_labels, metrics = trainer.predict(dev_data)
quantitative_eval(metrics)

##### **Open a tensorboard to monitor the training process**



In [None]:
%load_ext tensorboard
%tensorboard --logdir ./results/runs/

##### **Fine-tune & test a model**

In [None]:
# fine-tune a model
trainer.train()

In [None]:
# evaluate a fine-tuned RoBERTa model
metrics = trainer.predict(dev_data).metrics
quantitative_eval(metrics)

### **2. Use the fine-tuned model uploaded to Huggingface hub**

In [None]:
from transformers import AutoTokenizer,AutoModelForSequenceClassification
from peft import PeftModel

# import the fine-tuned model from huggingface.
tokenizer = AutoTokenizer.from_pretrained("twkang43/lora-roberta-cse4057")
base_model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    problem_type="multi_label_classification",
    num_labels=num_labels,
    id2label=id2label,
    label2id=label2id
).to(DEVICE)
model = PeftModel.from_pretrained(base_model, "twkang43/lora-roberta-cse4057")

In [None]:
from transformers import Trainer, TrainingArguments

# define a trainer for evaluation
evaluation = TrainingArguments(
    output_dir="./results",
    do_eval=True,
    per_device_eval_batch_size=16,
    dataloader_num_workers=2,
    dataloader_drop_last=True,
    fp16=True,
    report_to="tensorboard",
    seed=SEED,
)

trainer = Trainer(
    model=model,
    args=evaluation,
    processing_class=tokenizer,
    compute_metrics=compute_metrics,
)

In [None]:
metrics = trainer.predict(dev_data).metrics
quantitative_eval(metrics)