In [7]:
import sys
from pathlib import Path
src_path = Path.cwd().parent / 'src'
sys.path.append(str(src_path))

import numpy as np
import pandas as pd


In [8]:
from data_loader import load_data

# Set up data paths
data_path = Path.cwd().parent / "data" / "ag_news"
train_path = data_path / "train.csv"
test_path = data_path / "test.csv"

# Load training data
df_train = load_data(train_path)
X_train = (df_train["Title"] + " " + df_train["Description"]).values
y_train = df_train["Class Index"].values

# Load test data
df_test = load_data(test_path)
X_test = (df_test["Title"] + " " + df_test["Description"]).values
y_test = df_test["Class Index"].values

# Show training data
df_train.head()

# Show test data
df_test.head()

Unnamed: 0,Class Index,Title,Description
0,3,Fears for T N pension after talks,Unions representing workers at Turner Newall...
1,4,The Race is On: Second Private Team Sets Launc...,"SPACE.com - TORONTO, Canada -- A second\team o..."
2,4,Ky. Company Wins Grant to Study Peptides (AP),AP - A company founded by a chemistry research...
3,4,Prediction Unit Helps Forecast Wildfires (AP),AP - It's barely dawn when Mike Fitzpatrick st...
4,4,Calif. Aims to Limit Farm-Related Smog (AP),AP - Southern California's smog-fighting agenc...


In [9]:
from vectorizer import vectorize_data, vectorize_data_fit
from sklearn.feature_extraction.text import TfidfVectorizer

# Vectorize data
vectorizer = TfidfVectorizer()

X_train_vect = vectorize_data_fit(vectorizer, X_train)
X_test_vect = vectorize_data(vectorizer, X_test)

In [10]:
from model_trainer import train_model
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier

# Train Logistic Regression Model
lg_model = LogisticRegression()
#lg_trained_model = train_model(lg_model, X_train_vect, y_train)

# Train Naive Bayes Model
nb_model = MultinomialNB()
#nb_trained_model = train_model(nb_model, X_train_vect, y_train)

# Train Random Forest Model
rf_model = RandomForestClassifier()
rf_trained_model = train_model(rf_model, X_train_vect, y_train)

In [11]:
from perturbation import apply_perturbation

# Testing of perturbation module
#sample_text = X_test[1]

#perturbed_data = apply_perturbation([sample_text], 1.0)
#print(sample_text)
#print(perturbed_data[0])


In [13]:
from evaluator import evaluate_robustness

X_sample = X_test[:1000]
y_sample = y_test[:1000]

perturbation_levels = np.linspace(0, 1, 11)
metrics = ["base_accuracy", "robustness_score", "effective_robustness"]

results, metrics_summary = evaluate_robustness(
    rf_trained_model,
    vectorizer,
    X_sample, #X_test,
    y_sample, #y_test,
    perturbation_levels,
    metrics
)

print("*** Metrics Summary ***")
for metric, value in metrics_summary.items():
    print(f"{metric}: {value: .4f}")

results_df = pd.DataFrame(results)
print("*** Results Per Perturbation Level")
display(results_df)

[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\t440p\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\t440p\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\t440p\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\t440p\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\t440p\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\t440p\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Us

*** Metrics Summary ***
robustness_score:  0.0000
effective_robustness:  0.0000
accuracy:  0.8740
*** Results Per Perturbation Level


Unnamed: 0,perturbation level,accuracy
0,0.0,0.874
1,0.1,0.86
2,0.2,0.825
3,0.3,0.78
4,0.4,0.722
5,0.5,0.628
6,0.6,0.561
7,0.7,0.497
8,0.8,0.47
9,0.9,0.463
