In [None]:
import sys
from pathlib import Path
src_path = Path.cwd().parent / 'src'
sys.path.append(str(src_path))

import numpy as np
import pandas as pd


In [None]:
from data_loader import load_data

# Set up data paths
data_path = Path.cwd().parent / "data" / "ag_news"
train_path = data_path / "train.csv"
test_path = data_path / "test.csv"

# Load training data
df_train = load_data(train_path)
X_title_train = df_train["Title"].values
X_desc_train = df_train["Description"].values
y_train = df_train["Class Index"].values

# Load test data
df_test = load_data(test_path)
X_title_test = df_test["Title"].values
X_desc_test = df_test["Description"].values
y_test = df_test["Class Index"].values

# Show training data
df_train.head()

# Show test data
df_test.head()

In [None]:
from vectorizer import vectorize_data_fit
from sklearn.feature_extraction.text import TfidfVectorizer

# Vectorize data
title_vectorizer = TfidfVectorizer()
desc_vectorizer = TfidfVectorizer()

X_train_combined_vect = vectorize_data_fit(title_vectorizer, desc_vectorizer, X_title_train, X_desc_train)

In [None]:
from model_trainer import train_model
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier

# Train Logistic Regression Model
lg_model = LogisticRegression()
lg_trained_model = train_model(lg_model, X_train_combined_vect, y_train)

# Train Naive Bayes Model
nb_model = MultinomialNB()
#nb_trained_model = train_model(nb_model, X_train_combined_vect, y_train)

# Train Random Forest Model
rf_model = RandomForestClassifier()
#rf_trained_model = train_model(rf_model, X_train_combined_vect, y_train)

In [None]:
#from perturbation import apply_perturbation

# Testing of perturbation module
#sample_text = X_test[1]

#perturbed_data = apply_perturbation([sample_text], 1.0)
#print(sample_text)
#print(perturbed_data[0])


In [None]:
from evaluator import evaluate_robustness

X_title_sample = X_title_test[:5]
X_desc_sample = X_desc_test[:5]
y_sample = y_test[:5]

X_sample = [X_title_sample, X_desc_sample]
perturbation_levels = np.linspace(0, 0.6, 4)
metrics = ["base_accuracy", "robustness_score", "effective_robustness"]
vectorizers = [title_vectorizer, desc_vectorizer]

results, metrics_summary = evaluate_robustness(
    lg_trained_model,
    vectorizers,
    X_sample, #X_test,
    y_sample, #y_test,
    perturbation_levels,
    metrics
)

print("*** Metrics Summary ***")
for metric, value in metrics_summary.items():
    print(f"{metric}: {value: .4f}")

results_df = pd.DataFrame(results)
print("*** Results Per Perturbation Level")
display(results_df)