In [1]:
import sys
import os
sys.path.insert(0, "/home/jovyan/.local/share/virtualenvs/ptls-experiments-w-dEu3oS/lib/python3.8/site-packages")
os.environ["OMP_NUM_THREADS"] = "4"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [2]:
import pandas as pd
import numpy as np
import sklearn
import torch
import lightning as L

from sklearn.model_selection import train_test_split
from peft import LoraConfig
from src.modules.training_modules import ReloraModuleForClassification
from src.dataset.dataset import EssayDataset
from src.metric.metric import quadratic_weighted_kappa


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Dataset

In [3]:
dataset = pd.read_csv("src/data/train.csv")
train_set, val_set = train_test_split(dataset, test_size=0.2, random_state=42)

train_dataset = EssayDataset(_, train_set)
eval_dataset = EssayDataset(_, val_set)

# Conf

In [4]:
project_name = "ReLoRA"
dataset_shards = 1

max_epochs = 10
lora_merge_epochs = 1
learning_rate = 1e-5
max_steps = 1e5
max_val_steps = 1e4

L.seed_everything(234)
torch.set_float32_matmul_precision("medium")

model_class = LlamaForSequenceClassificationmodel_path = "philschmid/llama-2-7b-instruction-generator"
lora_config = LoraConfig(r=8, lora_alpha=16, lora_dropout=0.1, bias="none", inference_mode=False)

from transformers import AutoModelForSequenceClassification, AutoTokenizer
model_class = AutoModelForSequenceClassification
model_path = "microsoft/deberta-v3-large"
lora_config = LoraConfig(r=8, lora_alpha=16, lora_dropout=0.01, bias="none", inference_mode=False)
tokenizer = AutoTokenizer.from_pretrained(model_path)
max_length = 512

Seed set to 234


# Training

In [5]:
model = ReloraModuleForClassification(model_class=model_class, model_path=model_path, lora_config=lora_config, lora_merge_freq=1,
                                      train_dataset=train_dataset, eval_dataset=eval_dataset, learning_rate=learning_rate)

trainer = L.Trainer(max_epochs=3, log_every_n_steps=1,
                    limit_train_batches=1e5, limit_val_batches=1e4, reload_dataloaders_every_n_epochs=100)
trainer.fit(model)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  return self.fget.__get__(instance, owner)()
Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-large and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type                               | Params
-------------------------------------------------------------
0 | loss  | MSELoss                            | 0     
1 | model | DebertaV2ForSequenceClassification | 435 M 
-------------------------------------------------------------
435 M     Trainable params
0         Non-trainable params
435 M     Total params
1,740.251 Total estimated model par

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=3` reached.


# Inference

In [6]:
model.eval()
outputs = []
labels = []
from tqdm.notebook import tqdm

for i in tqdm(range(len(eval_dataset))):
    torch.cuda.empty_cache()
    input_ids = torch.tensor(eval_dataset[i]["input_ids"])
    attention_mask = torch.tensor(eval_dataset[i]["attention_mask"])
    model.to(device)
    outputs.append(model.forward([input_ids.view(1, -1).to(device), attention_mask.view(1, -1).to(device)])["logits"].view(-1).detach().cpu())
    labels.append(eval_dataset[i]["labels"].detach().cpu())
    
quadratic_weighted_kappa(np.array(torch.stack(outputs).view(-1).numpy()), np.array(labels))

  0%|          | 0/3462 [00:00<?, ?it/s]

  input_ids = torch.tensor(eval_dataset[i]["input_ids"])
  attention_mask = torch.tensor(eval_dataset[i]["attention_mask"])
