In [7]:
from transformers import TrainingArguments, Trainer, AutoModelForSequenceClassification, AutoTokenizer
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from datasets import Dataset
import torch
import pandas as pd
import numpy as np
import os

In [8]:
# === 1. Charger les données OLID ===
df = pd.read_csv("../datasets/training-v1/offenseval-training-v1.tsv", sep="\t", header=None)
df.columns = ["id", "text", "label_A", "label_B", "label_C"]

# Encode les labels pour chaque task
def encode_labels(df):
    df = df.copy()
    df["label_A_enc"] = df["label_A"].map({"NOT": 0, "OFF": 1})
    df["label_B_enc"] = df["label_B"].map({"UNT": 0, "TIN": 1})
    df["label_C_enc"] = df["label_C"].map({"IND": 0, "GRP": 1, "OTH": 2})
    return df

df = encode_labels(df)

In [9]:
# === 2. Choix du modèle ===
MODEL_NAME = "roberta-large"
NUM_LABELS = {"A": 2, "B": 2, "C": 3}# === 3. Préparer les datasets HuggingFace ===

def prepare_dataset(df, task):
    if task == "A":
        df_task = df.copy()
        df_task = df_task.dropna(subset=["label_A_enc"])
        labels = df_task["label_A_enc"].tolist()
    elif task == "B":
        df_task = df[df["label_A"] == "OFF"].copy()
        df_task = df_task.dropna(subset=["label_B_enc"])
        labels = df_task["label_B_enc"].tolist()
    elif task == "C":
        df_task = df[(df["label_A"] == "OFF") & (df["label_B"] == "TIN")].copy()
        df_task = df_task.dropna(subset=["label_C_enc"])
        labels = df_task["label_C_enc"].tolist()

    texts = df_task["text"].tolist()
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

    encodings = tokenizer(texts, truncation=True, padding=True)
    dataset = Dataset.from_dict({
        "input_ids": encodings["input_ids"],
        "attention_mask": encodings["attention_mask"],
        "labels": [int(l) for l in labels]
    })

    return dataset.train_test_split(test_size=0.2, seed=42)

"""
def prepare_dataset(df, task):
    if task == "A":
        df_task = df.copy()
        df_task = df_task.dropna(subset=["label_A_enc"])
    elif task == "B":
        df_task = df[df["label_A"] == "OFF"].copy()
        df_task = df_task.dropna(subset=["label_B_enc"])
    elif task == "C":
        df_task = df[(df["label_A"] == "OFF") & (df["label_B"] == "TIN")].copy()
        df_task = df_task.dropna(subset=["label_C_enc"])

    # ➕ Mini jeu pour test rapide
    df_task = df_task.sample(n=100, random_state=42)
    texts = df_task["text"].tolist()

    if task == "A":
        labels = df_task["label_A_enc"].tolist()
    elif task == "B":
        labels = df_task["label_B_enc"].tolist()
    elif task == "C":
        labels = df_task["label_C_enc"].tolist()

    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    encodings = tokenizer(texts, truncation=True, padding=True)

    dataset = Dataset.from_dict({
        "input_ids": encodings["input_ids"],
        "attention_mask": encodings["attention_mask"],
        "labels": [int(l) for l in labels]
    })

    return dataset.train_test_split(test_size=0.2, seed=42)
    """

'\ndef prepare_dataset(df, task):\n    if task == "A":\n        df_task = df.copy()\n        df_task = df_task.dropna(subset=["label_A_enc"])\n    elif task == "B":\n        df_task = df[df["label_A"] == "OFF"].copy()\n        df_task = df_task.dropna(subset=["label_B_enc"])\n    elif task == "C":\n        df_task = df[(df["label_A"] == "OFF") & (df["label_B"] == "TIN")].copy()\n        df_task = df_task.dropna(subset=["label_C_enc"])\n\n    # ➕ Mini jeu pour test rapide\n    df_task = df_task.sample(n=100, random_state=42)\n    texts = df_task["text"].tolist()\n\n    if task == "A":\n        labels = df_task["label_A_enc"].tolist()\n    elif task == "B":\n        labels = df_task["label_B_enc"].tolist()\n    elif task == "C":\n        labels = df_task["label_C_enc"].tolist()\n\n    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)\n    encodings = tokenizer(texts, truncation=True, padding=True)\n\n    dataset = Dataset.from_dict({\n        "input_ids": encodings["input_ids"],\n   

In [10]:
# === 4. Fonction métrique ===
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    preds = np.argmax(predictions, axis=1)
    report = classification_report(labels, preds, output_dict=True, zero_division=0)
    return {
        "f1": report["weighted avg"]["f1-score"],
        "accuracy": report["accuracy"]
    }

In [13]:
# === 5. Entraînement d'un modèle pour chaque task ===
def train_task(task):
    print(f"\n📘 Training Task {task}")
    task_data = prepare_dataset(df, task)
    model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=NUM_LABELS[task])
    
    training_args = TrainingArguments(
        output_dir=f"./results_task_{task}",
        num_train_epochs=3,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=16,
        save_strategy="steps",
        save_steps=500,  # => sauvegarde un checkpoint toutes les 500 *mini-batches*
        eval_strategy="steps",
        eval_steps=500,
        logging_steps=50,
        learning_rate=2e-5,
        weight_decay=0.01,
        load_best_model_at_end=True,
        save_total_limit=2,
        metric_for_best_model="f1",
    )
    """
    training_args = TrainingArguments(
        output_dir=f"./results_task_{task}",
        num_train_epochs=1,
        per_device_train_batch_size=4,
        per_device_eval_batch_size=8,
        save_strategy="no",
        learning_rate=2e-5,
        weight_decay=0.01,
        logging_steps=10,
        disable_tqdm=False,
        report_to="none"
    )"""

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=task_data["train"],
        eval_dataset=task_data["test"],
        compute_metrics=compute_metrics
    )

    trainer.train()
    trainer.save_model(f"./best_model_task_{task}")

    eval_results = trainer.evaluate()
    print(f"\n📊 Evaluation Results for Task {task}:")
    print(eval_results)


In [14]:
# === 6. Lancer les trois trainings ===
for task in ["A", "B", "C"]:
    train_task(task)


📘 Training Task A


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,F1,Accuracy
500,0.5364,0.538677,0.780708,0.78852
1000,0.4685,0.475375,0.786775,0.796073
1500,0.3782,0.480274,0.800156,0.798338
2000,0.4548,0.539294,0.798261,0.800604
2500,0.4272,0.485888,0.790683,0.796828
3000,0.2796,0.726252,0.800527,0.800227
3500,0.3892,0.585767,0.800383,0.802115



📊 Evaluation Results for Task A:
{'eval_loss': 0.7262523174285889, 'eval_f1': 0.800527455096651, 'eval_accuracy': 0.800226586102719, 'eval_runtime': 153.4737, 'eval_samples_per_second': 17.254, 'eval_steps_per_second': 1.082, 'epoch': 3.0}

📘 Training Task B


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,F1,Accuracy
500,0.4192,0.342718,0.844425,0.894318
1000,0.3339,0.361024,0.844425,0.894318



📊 Evaluation Results for Task B:
{'eval_loss': 0.34271836280822754, 'eval_f1': 0.8444252058679174, 'eval_accuracy': 0.8943181818181818, 'eval_runtime': 44.8174, 'eval_samples_per_second': 19.635, 'eval_steps_per_second': 1.227, 'epoch': 3.0}

📘 Training Task C


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,F1,Accuracy
500,0.7855,0.738505,0.676861,0.716495
1000,0.5579,0.853745,0.684315,0.722938



📊 Evaluation Results for Task C:
{'eval_loss': 0.8537445068359375, 'eval_f1': 0.684315022749301, 'eval_accuracy': 0.7229381443298969, 'eval_runtime': 39.5588, 'eval_samples_per_second': 19.616, 'eval_steps_per_second': 1.239, 'epoch': 3.0}


In [None]:
trainer.train(resume_from_checkpoint=True)