In [3]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_auc_score, average_precision_score

def load_data(train_path, test_path):
    print("Loading data...")
    df_train = pd.read_csv(train_path, encoding="ISO-8859-1")
    df_test = pd.read_csv(test_path, encoding="ISO-8859-1")

    X_train = df_train.drop('LABEL', axis=1)
    y_train = df_train['LABEL'] - 1  # Map labels to 0 and 1
    X_test = df_test.drop('LABEL', axis=1)
    y_test = df_test['LABEL'] - 1

    return X_train, y_train, X_test, y_test

def preprocess_data(X_train, X_test):
    print("Preprocessing data...")
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_test_scaled

def train_and_evaluate_model(X_train, y_train, X_test, y_test):
    print("Training the model...")
    # Define an SVM classifier
    model = SVC(kernel='linear', class_weight='balanced', probability=True)

    # Train the model
    model.fit(X_train, y_train)
    print("Model training complete!")

    # Make predictions
    print("Making predictions...")
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)
    print("Predictions complete!")

    # Evaluate the model
    accuracy_train = accuracy_score(y_train, y_train_pred)
    accuracy_test = accuracy_score(y_test, y_test_pred)
    precision_train = precision_score(y_train, y_train_pred)
    precision_test = precision_score(y_test, y_test_pred)
    recall_train = recall_score(y_train, y_train_pred)
    recall_test = recall_score(y_test, y_test_pred)
    f1_train = f1_score(y_train, y_train_pred)
    f1_test = f1_score(y_test, y_test_pred)
    auc_train = roc_auc_score(y_train, model.predict_proba(X_train)[:, 1])
    auc_test = roc_auc_score(y_test, model.predict_proba(X_test)[:, 1])

    # Confusion matrices
    conf_matrix_train = confusion_matrix(y_train, y_train_pred)
    conf_matrix_test = confusion_matrix(y_test, y_test_pred)

    print("Training Set Metrics:")
    print("Accuracy: {:.3f}".format(accuracy_train))
    print("Precision: {:.3f}".format(precision_train))
    print("Recall: {:.3f}".format(recall_train))
    print("F1 Score: {:.3f}".format(f1_train))
    print("AUC: {:.3f}".format(auc_train))
    print("Confusion Matrix:")
    print(conf_matrix_train)

    print("\nTest Set Metrics:")
    print("Accuracy: {:.3f}".format(accuracy_test))
    print("Precision: {:.3f}".format(precision_test))
    print("Recall: {:.3f}".format(recall_test))
    print("F1 Score: {:.3f}".format(f1_test))
    print("AUC: {:.3f}".format(auc_test))
    print("Confusion Matrix:")
    print(conf_matrix_test)

def main():
    trainSetPath = "Data/exoTrain.csv"
    testSetPath = "Data/exoTest.csv"

    X_train, y_train, X_test, y_test = load_data(trainSetPath, testSetPath)
    X_train, X_test = preprocess_data(X_train, X_test)

    train_and_evaluate_model(X_train, y_train, X_test, y_test)

main()


Loading data...
Preprocessing data...
Training the model...
Model training complete!
Making predictions...
Predictions complete!
Training Set Metrics:
Accuracy: 0.975
Precision: 0.189
Recall: 0.757
F1 Score: 0.303
AUC: 0.982
Confusion Matrix:
[[4930  120]
 [   9   28]]

Test Set Metrics:
Accuracy: 0.965
Precision: 0.000
Recall: 0.000
F1 Score: 0.000
AUC: 0.278
Confusion Matrix:
[[550  15]
 [  5   0]]
