In [1]:
# MLPClassifier
def run_model(dataset_path, model, vectorizer):
    import pandas as pd
    from sklearn.model_selection import train_test_split
    from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

    # Load dataset
    df = pd.read_csv(dataset_path)
    X = df['Query']
    y = df['Label']

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

    # TF-IDF vectorization
    X_train_vec = vectorizer.fit_transform(X_train)
    X_test_vec = vectorizer.transform(X_test)

    # Train model
    model.fit(X_train_vec, y_train)

    # Predict and evaluate
    y_pred = model.predict(X_test_vec)
    print("=" * 80)
    print(f"Results for {model.__class__.__name__} on {dataset_path}")
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
    print("Classification Report:\n", classification_report(y_test, y_pred, digits=4))
    print("=" * 80)


In [2]:
# SetUp
from sklearn.neural_network import MLPClassifier
from sklearn.feature_extraction.text import TfidfVectorizer

# Vectorizer (same as before)
vectorizer = TfidfVectorizer(analyzer='char_wb', ngram_range=(3,6), max_features=5000)

# MLP Classifier
model = MLPClassifier(
    hidden_layer_sizes=(100,),  # One hidden layer with 100 neurons
    activation='relu',          # Activation function
    solver='adam',              # Optimizer
    max_iter=300,               # Max iterations
    random_state=42,
    early_stopping=True         # Stop early if no improvement
)

In [3]:
# Imbalanced
run_model("../Dataset/Raw/SQLi_Original_Raw.csv", model, vectorizer)

Results for MLPClassifier on ../Dataset/Raw/SQLi_Original_Raw.csv
Accuracy: 0.9948253557567918
Confusion Matrix:
 [[3903    5]
 [  27 2249]]
Classification Report:
               precision    recall  f1-score   support

           0     0.9931    0.9987    0.9959      3908
           1     0.9978    0.9881    0.9929      2276

    accuracy                         0.9948      6184
   macro avg     0.9955    0.9934    0.9944      6184
weighted avg     0.9948    0.9948    0.9948      6184



In [4]:
# RUS Balanced
run_model("../Dataset/Raw/SQLi_RUS_Raw.csv", model, vectorizer) 

Results for MLPClassifier on ../Dataset/Raw/SQLi_RUS_Raw.csv
Accuracy: 0.9918734900065891
Confusion Matrix:
 [[2267   10]
 [  27 2249]]
Classification Report:
               precision    recall  f1-score   support

           0     0.9882    0.9956    0.9919      2277
           1     0.9956    0.9881    0.9918      2276

    accuracy                         0.9919      4553
   macro avg     0.9919    0.9919    0.9919      4553
weighted avg     0.9919    0.9919    0.9919      4553



In [5]:
# ROS Balanced
run_model("../Dataset/Raw/SQLi_ROS_Raw.csv", model, vectorizer)

Results for MLPClassifier on ../Dataset/Raw/SQLi_ROS_Raw.csv
Accuracy: 0.9955214331413947
Confusion Matrix:
 [[3897   11]
 [  24 3883]]
Classification Report:
               precision    recall  f1-score   support

           0     0.9939    0.9972    0.9955      3908
           1     0.9972    0.9939    0.9955      3907

    accuracy                         0.9955      7815
   macro avg     0.9955    0.9955    0.9955      7815
weighted avg     0.9955    0.9955    0.9955      7815

