In [18]:
import sys, os
sys.path.append(os.path.abspath('..'))

import joblib
import torch
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

from DataReader import DataReader
from Preprocessor import Preprocessor
from Vectorizer import Vectorizer
from DeepLearning import DeepLearner

import importlib
import Classifier
import DeepLearning
importlib.reload(Classifier)
importlib.reload(DeepLearning)



<module 'DeepLearning' from '/Users/claradelandre/Desktop/EPFL/MA2/DeepLearning/Projet/DeepL-Breakers/DeepLearning.py'>

In [21]:
# ⚙️ Evaluation Function
def evaluate_model(model, x, y_true, model_type='sklearn', class_num=2):
    if model_type == 'sklearn':
        y_pred = model.predict(x)
    elif model_type == 'pytorch':
        y_pred = model.predict(x)  # Must be implemented in DeepLearner
    else:
        raise ValueError("Unknown model type")

    return {
        "accuracy": accuracy_score(y_true, y_pred),
        "f1": f1_score(y_true, y_pred, average='weighted'),
        "precision": precision_score(y_true, y_pred, average='weighted'),
        "recall": recall_score(y_true, y_pred, average='weighted')
    }

# 🧪 Function to load models and evaluate on a subtask
def evaluate_subtask(letter, class_num):
    print(f"\n🔍 Evaluating Subtask {letter}")
    dr = DataReader(f'../datasets/training-v1/offenseval-training-v1.tsv', letter)
    data, labels = dr.get_labelled_data()
    data, labels = dr.shuffle(data, labels, 'random')
    tr_data, tst_data, y_tr, y_tst = train_test_split(data, labels, test_size=0.3)

    preprocessor = Preprocessor(('remove_stopwords', 'lemmatize'))
    vectorizer = Vectorizer('count')
    tst_clean = preprocessor.clean(tst_data)
    x_tst_vec = vectorizer.vectorize(tst_clean)

    model_names = ['RandomForest', 'LogisticRegression', 'NaiveBayes', 'KNN', 'SVC', 'LSTM', 'CNN']
    results = {}

    for i, name in enumerate(model_names[:-2]):
        clf = Classifier.Classifier(name)  # ✅ Recreate Classifier object
        clf.load(f"../saved_models/subtask{letter}_model_{i}.joblib")
        results[name] = evaluate_model(clf, x_tst_vec, y_tst)

    # LSTM
    lstm = DeepLearner(tr_data, y_tr, vocab_length=vectorizer.vocab_length, model_type='LSTM')
    lstm.load(f"../saved_models/subtask{letter}_lstm.pth")
    results['LSTM'] = evaluate_model(lstm, tst_clean, y_tst, model_type='pytorch')

    # CNN
    cnn = DeepLearner(tr_data, y_tr, vocab_length=vectorizer.vocab_length, model_type='CNN')
    cnn.load(f"../saved_models/subtask{letter}_cnn.pth")
    results['CNN'] = evaluate_model(cnn, tst_clean, y_tst, model_type='pytorch')

    return model_names, results

# 📊 Plotting function
def plot_metric(results_dict, metric, subtask, model_names):
    values = [results_dict[model][metric] for model in model_names]
    plt.figure(figsize=(8, 5))
    plt.bar(model_names, values, color='mediumseagreen')
    plt.ylabel(metric.capitalize())
    plt.title(f"{metric.capitalize()} per Model – Subtask {subtask}")
    plt.ylim(0, 1)
    plt.xticks(rotation=45)
    plt.grid(axis='y', linestyle='--', alpha=0.5)
    plt.tight_layout()
    plt.show()

In [22]:
# ✅ Evaluate all subtasks
model_names, results_A = evaluate_subtask('A', 2)
_, results_B = evaluate_subtask('B', 2)
_, results_C = evaluate_subtask('C', 3)

# 📈 Plot everything
for metric in ['accuracy', 'f1', 'precision', 'recall']:
    plot_metric(results_A, metric, 'A', model_names)
    plot_metric(results_B, metric, 'B', model_names)
    plot_metric(results_C, metric, 'C', model_names)


🔍 Evaluating Subtask A


Reading Data: 13241it [00:00, 196456.85it/s]
Tokenization: 3972it [00:00, 6129.00it/s]0<?, ?it/s]
Stopwords Removal: 3972it [00:00, 76963.62it/s]1,  1.54it/s]
Lemmatization: 3972it [00:02, 1934.42it/s]
Preprocessing: 100%|██████████| 3/3 [00:02<00:00,  1.09it/s]


TypeError: predict() missing 1 required positional argument: 'X'

In [None]:
import pandas as pd

def results_to_df(results_dict, subtask):
    df = pd.DataFrame.from_dict(results_dict, orient='index')
    df['model'] = df.index
    df['subtask'] = subtask
    return df.reset_index(drop=True)

df_A = results_to_df(results_A, 'A')
df_B = results_to_df(results_B, 'B')
df_C = results_to_df(results_C, 'C')
df_all = pd.concat([df_A, df_B, df_C])
df_all.to_csv("compare_models_results.csv", index=False)