In [7]:
import pandas as pd 
import numpy as np
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch import nn
import torch.nn.functional as F
import os

from datasets import Dataset
from sklearn.model_selection import train_test_split

from transformers import BitsAndBytesConfig
from peft import get_peft_model, LoraConfig, prepare_model_for_kbit_training


os.environ['CUDA_VISIBLE_DEVICES'] ='0'

val_path = 'public_data_test/track_a/dev/'
train_path = 'public_data_test/track_a/train/'
test_path = 'public_data_test/track_a/test/'

mean_f1 = {}
split_f1 = {}
thresholds_f1 = {}

In [None]:
def tokenize(batch):
    tokenized= tokenizer(batch['text'], padding=True, truncation=True, max_length=128, return_tensors="pt")
    return {k: v.to(device) for k, v in tokenized.items()}

from transformers import Trainer, TrainingArguments

from sklearn.metrics import f1_score, precision_score, recall_score

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    probs = torch.sigmoid(torch.tensor(logits[0])).numpy()
    
    num_emotions = labels.shape[1]
    best_thresholds = {}
    best_f1_per_emotion = {}

    for i in range(num_emotions):
        best_f1 = 0
        best_thresh = 0.5

        for threshold in np.arange(0.05, 0.7, 0.05):
            preds = (probs[:, i] >= threshold).astype(int)
            current_f1 = f1_score(labels[:, i], preds, zero_division=0)

            if current_f1 > best_f1:
                best_f1 = current_f1
                best_thresh = threshold

        best_thresholds[i] = best_thresh
        best_f1_per_emotion[i] = best_f1

    print(f"Best per-emotion thresholds: {best_thresholds}")

    # Apply per-emotion thresholds
    predictions = np.zeros(probs.shape)
    for i in range(num_emotions):
        predictions[:, i] = (probs[:, i] >= best_thresholds[i]).astype(int)

    # Compute overall F1 scores
    f1_micro = f1_score(labels, predictions, average='micro', zero_division=0)
    f1_macro = f1_score(labels, predictions, average='macro', zero_division=0)

    return {
        'f1_micro': f1_micro,
        'f1_macro': f1_macro,
        'precision': precision_score(labels, predictions, average='micro', zero_division=0),
        'recall': recall_score(labels, predictions, average='micro', zero_division=0)
    }


In [None]:
import json
import os
from transformers import TrainerCallback

class F1ScoreCallback(TrainerCallback):
    def __init__(self, val_dataset, test_dataset, df_test, language, save_dir="track_a/"):
        self.trainer = None
        self.val_dataset = val_dataset
        self.test_dataset = test_dataset
        self.df_test = df_test
        self.save_dir = save_dir
        self.best_f1 = 0
        self.best_epoch = 0
        self.best_thresholds = {}  # Store best thresholds per emotion
        self.language = language

        os.makedirs(self.save_dir, exist_ok=True)

    def set_trainer(self, trainer):
        self.trainer = trainer

    def on_epoch_end(self, args, state, control, **kwargs):
        predictions = self.trainer.predict(self.val_dataset)
        logits = predictions.predictions[0]
        probs = torch.sigmoid(torch.tensor(logits)).numpy()

        num_emotions = self.val_dataset["labels"].shape[1]
        best_thresholds = {}
        best_f1_per_emotion = {}

        for i in range(num_emotions):
            best_f1 = 0
            best_thresh = 0.5

            for threshold in np.arange(0.05, 0.7, 0.05):
                preds = (probs[:, i] >= threshold).astype(int)
                current_f1 = f1_score(self.val_dataset["labels"][:, i], preds, zero_division=0)

                if current_f1 > best_f1:
                    best_f1 = current_f1
                    best_thresh = threshold

            best_thresholds[i] = best_thresh
            best_f1_per_emotion[i] = best_f1

        # Apply per-emotion thresholds
        binary_preds = np.zeros(probs.shape)
        for i in range(num_emotions):
            binary_preds[:, i] = (probs[:, i] >= best_thresholds[i]).astype(int)

        f1_mean = f1_score(self.val_dataset["labels"], binary_preds, average='macro')
        print(f"Epoch {state.epoch}: Mean F1 Score = {f1_mean:.4f}")

        if f1_mean > self.best_f1:
            self.best_f1 = f1_mean
            self.best_epoch = state.epoch
            self.best_thresholds = best_thresholds  # Store per-emotion thresholds

        # Generate test set predictions
        test_predictions = self.trainer.predict(self.test_dataset)
        test_logits = test_predictions.predictions[0]
        test_probs = torch.sigmoid(torch.tensor(test_logits)).numpy()

        test_binary_preds = np.zeros(test_probs.shape, dtype=int)
        for i in range(num_emotions):
            test_binary_preds[:, i] = (test_probs[:, i]) >= best_thresholds[i]).astype(int)

        df_binary_preds = pd.DataFrame(test_binary_preds, columns=self.df_test.columns[2:])
        df_binary_preds.insert(0, 'id', self.df_test['id'])

        test_output_file = os.path.join(self.save_dir, f"pred_{self.language}_{state.epoch}.csv")
        df_binary_preds.to_csv(test_output_file, index=False)
        print(f"Saved test predictions for epoch {state.epoch} to {test_output_file}")

    def on_train_end(self, args, state, control, **kwargs):
        print(f"Best F1 Score: {self.best_f1:.4f} at epoch {self.best_epoch}")
        mean_f1[self.language] = self.best_f1
        split_f1[self.language] = self.best_epoch
        thresholds_f1[self.language] = self.best_thresholds

In [None]:
for filename in os.listdir(train_path):
    if not filename.endswith('.csv'):
        continue
    language = filename[0:-4]
    print("language")
    print(language)
    df_train = pd.read_csv(train_path+filename)
    df_val = pd.read_csv(val_path+filename)

    train_texts = df_train['text'].tolist()
    val_texts = df_val['text'].tolist()
    train_labels = np.array(df_train.iloc[:, 2:].values.tolist(), dtype=float)
    val_labels = np.array(df_val.iloc[:, 2:].values.tolist(), dtype=float)
    
    train_dataset = Dataset.from_dict({"text": train_texts, "labels": train_labels})
    val_dataset = Dataset.from_dict({"text": val_texts, "labels": val_labels})

    df_test = pd.read_csv(test_path+filename)
    test_texts = df_test['text'].tolist()
    test_dataset = Dataset.from_dict({"text": test_texts})
   
    num_labels = df_train.iloc[:, 2:].shape[1]  
    model_name = "fdschmidt93/NLLB-LLM2Vec-Meta-Llama-31-8B-Instruct-mntp-unsup-simcse"
    device = "cuda:0" if torch.cuda.is_available() else "cpu"


    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16,
    )
    model = AutoModelForSequenceClassification.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        trust_remote_code=True,
        num_labels=num_labels,
        problem_type="multi_label_classification",  # Important for multi-label tasks
        torch_dtype=torch.float32
    ).to(device)

    model.score = model.score.to(dtype=torch.float32)
    
    lora_config = LoraConfig(
        r=16,
        lora_alpha=32,
        lora_dropout = 0.05,
        target_modules=r".*llm2vec.*(self_attn\.(q|k|v|o)_proj|mlp\.(gate|up|down)_proj).*",
        bias="none",
        task_type="SEQ_CLS",
    )
    model = get_peft_model(model, lora_config)
    
    
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    tokenizer.pad_token = tokenizer.eos_token

    train_dataset = train_dataset.map(tokenize, batched=True)
    val_dataset = val_dataset.map(tokenize, batched=True)
    test_dataset = test_dataset.map(tokenize, batched=True)

    train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
    val_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
    test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask'])

    # Convert labels to float16
    train_dataset = train_dataset.map(lambda x: {'labels': torch.tensor(x['labels'], dtype=torch.float16, device=device)})
    val_dataset = val_dataset.map(lambda x: {'labels': torch.tensor(x['labels'], dtype=torch.float16, device=device)})

    training_args = TrainingArguments(
        output_dir="./results",
        evaluation_strategy="epoch",
        save_strategy="epoch",
        gradient_accumulation_steps = 2,
        per_device_train_batch_size=4,
        per_device_eval_batch_size=4,
        num_train_epochs=5,
        weight_decay=0.01,
        logging_dir="./logs",
        logging_steps=100,
        fp16=False,
        report_to = "none",
        optim="paged_adamw_32bit",
    
    )
    
    f1_callback = F1ScoreCallback(val_dataset=val_dataset, test_dataset=test_dataset, df_test=df_test, language=language)

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics,
    callbacks=[f1_callback]
    )
    f1_callback.set_trainer(trainer)

    trainer.train()
    
    # trainer.save_model("./results")  # Saves model, tokenizer, and config
    model.save_pretrained("./results/lora_adapter/{}".format(language))

    
    # actual = df_val
    # f1_scores = []
    
    # emotions = df_val.columns.tolist()[2:]
    
    # for emotion in emotions:
    #     f1= f1_score(df_binary_preds[emotion], actual[emotion])
    #     f1_scores.append(f1)

    # mean_f1[language] = np.mean(f1_scores)
    # split_f1[language] = f1_scores
    
    # print(np.mean(f1_scores))
    # print(f1_scores)
    
    # Now on to the actual test set
  

if filename.endswith('vmw.csv') or filename.endswith('eng.csv') or filename.endswith('ary.csv') or filename.endswith('hin.csv') or filename.endswith('deu.csv') or filename.endswith('ibo.csv') or filename.endswith('mar.csv') or filename.endswith('kin.csv') or filename.endswith('afr.csv') or filename.endswith('ron.csv') or filename.endswith('pcm.csv') or filename.endswith('swe.csv') or filename.endswith('yor.csv') or filename.endswith('ptbr.csv') or filename.endswith('som.csv') or filename.endswith('ptmz.csv') or filename.endswith('sun.csv') or filename.endswith('rus.csv') or filename.endswith('ukr.csv') or filename.endswith('tir.csv') or filename.endswith('swa.csv'):
    continue

import json

with open("validation_f1_post_testing.txt", "w") as f:
    json.dump({"mean_f1": mean_f1, "split_f1": split_f1, "thresholds_f1": thresholds_f1}, f, indent=4)  # Pretty print with indentation