In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Store results for visualization
results = []

def run_model_with_results(dataset_path, model, vectorizer, label):
    import pandas as pd
    from sklearn.model_selection import train_test_split
    from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

    df = pd.read_csv(dataset_path)
    X = df['Query']
    y = df['Label']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

    X_train_vec = vectorizer.fit_transform(X_train)
    X_test_vec = vectorizer.transform(X_test)

    model.fit(X_train_vec, y_train)
    y_pred = model.predict(X_test_vec)

    acc = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)
    report = classification_report(y_test, y_pred, output_dict=True)

    results.append({
        'label': label,
        'accuracy': acc,
        'conf_matrix': cm,
        'precision': report['macro avg']['precision'],
        'recall': report['macro avg']['recall'],
        'f1': report['macro avg']['f1-score']
    })

    print("=" * 80)
    print(f"Results for {model.__class__.__name__} on {label}")
    print("Accuracy:", acc)
    print("Confusion Matrix:\n", cm)
    print("Classification Report:\n", classification_report(y_test, y_pred, digits=4))
    print("=" * 80)


# Run and collect results
run_model_with_results("../Dataset/Raw/SQLi_Original_Raw.csv", model, vectorizer, "Original")
run_model_with_results("../Dataset/Raw/SQLi_RUS_Raw.csv", model, vectorizer, "RUS Balanced")
run_model_with_results("../Dataset/Raw/SQLi_ROS_Raw.csv", model, vectorizer, "ROS Balanced")

# === Visualization ===

# Convert to DataFrame
import pandas as pd
results_df = pd.DataFrame(results)

# 1. Accuracy comparison
plt.figure(figsize=(8, 5))
sns.barplot(x='label', y='accuracy', data=results_df)
plt.title('Accuracy Comparison')
plt.ylim(0.98, 1.0)
plt.ylabel('Accuracy')
plt.xlabel('Dataset')
plt.grid(True, linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

# 2. Macro Precision, Recall, F1
plt.figure(figsize=(10, 6))
metrics_df = results_df.melt(id_vars='label', value_vars=['precision', 'recall', 'f1'],
                             var_name='Metric', value_name='Score')
sns.barplot(data=metrics_df, x='label', y='Score', hue='Metric')
plt.title('Macro Average Precision, Recall, F1-score')
plt.ylim(0.98, 1.0)
plt.ylabel('Score')
plt.xlabel('Dataset')
plt.legend(title='Metric')
plt.grid(True, linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

# 3. Confusion matrices
for res in results:
    cm = res['conf_matrix']
    plt.figure(figsize=(5, 4))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)
    plt.title(f"Confusion Matrix - {res['label']}")
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.tight_layout()
    plt.show()


NameError: name 'model' is not defined