# Autism Dataset w/ Decision Trees

In [38]:
# !pip3 install -U ucimlrepo 

In [46]:
from ucimlrepo import fetch_ucirepo 
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# fetch dataset 
autism_screening_adult = fetch_ucirepo(id=426) 
  
# data (as pandas dataframes) 
X = autism_screening_adult.data.features 
y = autism_screening_adult.data.targets 

ConnectionError: Error connecting to server

In [41]:
print(X.head())
print(X.isnull().sum())

AttributeError: 'numpy.ndarray' object has no attribute 'head'

In [37]:
X = X.drop(['ethnicity', 'relation', 'used_app_before'], axis=1)
X = X.dropna(subset = ['age'])
for col in X.select_dtypes(include = ['object']).columns:
    X[col] = pd.factorize(X[col])[0]
X = X.drop_duplicates()
X['age'] = X['age'].astype(int)
print(X.head())

AttributeError: 'numpy.ndarray' object has no attribute 'drop'

#### Shuffle Data

In [None]:
X_and_Y = pd.concat([X,y], axis = 1).to_numpy()
np.random.seed(1)
np.random.shuffle(X_and_Y)
X = X_and_Y[:, :-1]
y = X_and_Y[:, -1]
y[y==0] = -1
total_samples = X.shape[0]
print(total_samples)

### Classifier #1: Decision Trees

In [45]:
# Hyperparameters - need to use cross validation and compute validation accuracy
param_grid = {
    'max_depth': [5, 10, None],
    'min_samples_split': [2, 10, 20]
}

# Hyperparamter tuning in order to get the best hyperparameters - perform a 3-fold cross-validation
clf = DecisionTreeClassifier(random_state=42)
grid_search = GridSearchCV(estimator=clf, param_grid=param_grid, cv=3, n_jobs=-1, scoring='accuracy', return_train_score=True)
grid_search.fit(X, y)

# Extract and display the validation accuracy for each combination
print("Validation Accuracies for Each Hyperparameter Combination:")
print("=" * 50)
cv_results = grid_search.cv_results_
for mean_val_acc, params in zip(cv_results['mean_test_score'], cv_results['params']):
    print(f"Parameters: {params}, Validation Accuracy: {mean_val_acc:.4f}")

# Best hyperparameters based on validation accuracy
best_params = grid_search.best_params_
best_validation_accuracy = grid_search.best_score_
print("\nBest Hyperparameters:")
print(f"  {best_params}")
print(f"Validation Accuracy with Best Hyperparameters: {best_validation_accuracy:.4f}")


Validation Accuracies for Each Hyperparameter Combination:
Parameters: {'max_depth': 5, 'min_samples_split': 2}, Validation Accuracy: 0.9064
Parameters: {'max_depth': 5, 'min_samples_split': 10}, Validation Accuracy: 0.9957
Parameters: {'max_depth': 5, 'min_samples_split': 20}, Validation Accuracy: 0.9957
Parameters: {'max_depth': 10, 'min_samples_split': 2}, Validation Accuracy: 0.9064
Parameters: {'max_depth': 10, 'min_samples_split': 10}, Validation Accuracy: 0.9957
Parameters: {'max_depth': 10, 'min_samples_split': 20}, Validation Accuracy: 0.9957
Parameters: {'max_depth': None, 'min_samples_split': 2}, Validation Accuracy: 0.9064
Parameters: {'max_depth': None, 'min_samples_split': 10}, Validation Accuracy: 0.9957
Parameters: {'max_depth': None, 'min_samples_split': 20}, Validation Accuracy: 0.9957

Best Hyperparameters:
  {'max_depth': 5, 'min_samples_split': 10}
Validation Accuracy with Best Hyperparameters: 0.9957


In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

# Partition splits with 80/20, 50/50, 20/80
splits = [(.8, .2), (.5, .5), (.2, .8)]
results = {}
split_results = []  # Store split-wise metrics

# Outer loop for each partition
for split in splits:
    train_size, test_size = split
    trial_accuracies = {'train': [], 'test': []}
    trial_precisions = []
    trial_recalls = []
    trial_f1_scores = []

    # Inner loop to run 3 trials per split
    for trial in range(3): 
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=None)

        # Retrain the model with the best hyperparameters
        best_clf = DecisionTreeClassifier(
            max_depth=best_params['max_depth'], 
            min_samples_split=best_params['min_samples_split'], 
            random_state=42
        )
        best_clf.fit(X_train, y_train)

        # Compute train accuracy
        y_train_pred = best_clf.predict(X_train)
        train_accuracy = accuracy_score(y_train, y_train_pred)
        trial_accuracies['train'].append(train_accuracy)

        # Compute test accuracy
        y_test_pred = best_clf.predict(X_test)
        test_accuracy = accuracy_score(y_test, y_test_pred)
        trial_accuracies['test'].append(test_accuracy)

        # Calculate precision, recall, and F1-score
        trial_precisions.append(precision_score(y_test, y_test_pred, average="weighted", zero_division=0))
        trial_recalls.append(recall_score(y_test, y_test_pred, average="weighted"))
        trial_f1_scores.append(f1_score(y_test, y_test_pred, average="weighted"))

    # Average metrics over the 3 trials for the current split
    avg_train_accuracy = np.mean(trial_accuracies['train'])
    avg_test_accuracy = np.mean(trial_accuracies['test'])
    avg_precision = np.mean(trial_precisions)
    avg_recall = np.mean(trial_recalls)
    avg_f1_score = np.mean(trial_f1_scores)

    # Save split-level metrics
    split_results.append({
        'Split': f"{int(train_size * 100)}/{int(test_size * 100)}",
        'Test Accuracy': avg_test_accuracy,
        'Precision': avg_precision,
        'Recall': avg_recall,
        'F1-Score': avg_f1_score
    })

    # Print split-specific results
    print(f"Results for {int(train_size * 100)}/{int(test_size * 100)} Split:")
    print(f"Train Accuracy: {avg_train_accuracy:.4f}")
    print(f"Test Accuracy: {avg_test_accuracy:.4f}")
    # print(f"Precision: {avg_precision:.4f}")
    # print(f"Recall: {avg_recall:.4f}")
    # print(f"F1-Score: {avg_f1_score:.4f}")
    print("-" * 50)

# # Save the metrics to use later for the summary table
# print("Overall Metrics for Decision Tree Classifier (Autism Dataset):")
# print(f"Overall Accuracy: {overall_accuracy_1:.4f}")
# print(f"Overall Precision: {overall_precision_1:.4f}")
# print(f"Overall Recall: {overall_recall_1:.4f}")
# print(f"Overall F1-Score: {overall_f1_score_1:.4f}")

#### Results for Decision Trees Classifier

In [None]:
# Calculate overall metrics for the Decision Tree classifier
overall_accuracy_1 = np.mean([r['Test Accuracy'] for r in split_results])
overall_precision_1 = np.mean([r['Precision'] for r in split_results])
overall_recall_1 = np.mean([r['Recall'] for r in split_results])
overall_f1_score_1 = np.mean([r['F1-Score'] for r in split_results])
# Create a formatted table for the split results
print("Decision Tree Classifier Results on Autism Dataset:")
print(f"{'Split':<10}{'ACC':<15}{'Precision':<15}{'Recall':<15}{'F1-Score':<15}")
print("-" * 65)

# Loop through the split results and display each row
for result in split_results:
    print(f"{result['Split']:<10}{result['Test Accuracy']:<15.4f}{result['Precision']:<15.4f}{result['Recall']:<15.4f}{result['F1-Score']:<15.4f}")

# Print the overall metrics as the last row
print("-" * 65)
print(f"{'Overall':<10}{overall_accuracy_1:<15.4f}{overall_precision_1:<15.4f}{overall_recall_1:<15.4f}{overall_f1_score_1:<15.4f}")

### Classifier #2: SVM (with RBF kernel)

In [10]:
from sklearn.impute import SimpleImputer

# Impute missing values
imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)

# Define the parameter grid for SVM
param_grid = {
    'C': [1, 10, 100, 1000, 10000],
    'gamma': [1e-6, 1e-5, 1e-4, 1e-3, 1e-2],
    'kernel': ['rbf']
}

# Set up the SVM model
svm = SVC()

# Perform GridSearchCV on the entire dataset
grid_search = GridSearchCV(svm, param_grid, cv=3, scoring='accuracy', return_train_score=True)

# Fit the model
grid_search.fit(X_imputed, y)

# Extract and print the results
cv_results = grid_search.cv_results_
print("Validation Accuracies for Each Hyperparameter Combination:")
for mean_val_acc, params in zip(cv_results['mean_test_score'], cv_results['params']):
    print(f"Parameters: {params}, Validation Accuracy: {mean_val_acc:.4f}")

# Best hyperparameters based on validation accuracy
best_params = grid_search.best_params_
best_validation_accuracy = grid_search.best_score_

# Print the best hyperparameters and the corresponding validation accuracy
print("\nBest Hyperparameters:")
print(f"  {best_params}")
print(f"Validation Accuracy with Best Hyperparameters: {best_validation_accuracy:.4f}")

Validation Accuracies for Each Hyperparameter Combination:
Parameters: {'C': 1, 'gamma': 1e-06, 'kernel': 'rbf'}, Validation Accuracy: 0.7315
Parameters: {'C': 1, 'gamma': 1e-05, 'kernel': 'rbf'}, Validation Accuracy: 0.7315
Parameters: {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}, Validation Accuracy: 0.7315
Parameters: {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}, Validation Accuracy: 0.9346
Parameters: {'C': 1, 'gamma': 0.01, 'kernel': 'rbf'}, Validation Accuracy: 0.9460
Parameters: {'C': 10, 'gamma': 1e-06, 'kernel': 'rbf'}, Validation Accuracy: 0.7315
Parameters: {'C': 10, 'gamma': 1e-05, 'kernel': 'rbf'}, Validation Accuracy: 0.7315
Parameters: {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}, Validation Accuracy: 0.9659
Parameters: {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}, Validation Accuracy: 0.9773
Parameters: {'C': 10, 'gamma': 0.01, 'kernel': 'rbf'}, Validation Accuracy: 0.9673
Parameters: {'C': 100, 'gamma': 1e-06, 'kernel': 'rbf'}, Validation Accuracy: 0.7315
Parameters: {'C': 100

In [11]:
# Split ratios
split_ratios = [(0.8, 0.2), (0.5, 0.5), (0.2, 0.8)]
split_results_svm = []

# Best hyperparameters for the SVM
best_C = 10000
best_gamma = 1e-05

# Iterate over splits
for train_size, test_size in split_ratios:
    trial_train_accuracies = []
    trial_validation_accuracies = []
    trial_test_accuracies = []
    trial_precisions = []
    trial_recalls = []
    trial_f1_scores = []
    
    for _ in range(3):  # Perform 3 trials for each split
        # Split the data into train and temp (validation + test)
        X_train, X_temp, Y_train, Y_temp = train_test_split(X, y, train_size=train_size, random_state=None)
        
        # Now split the temp data into validation and test
        X_val, X_test, Y_val, Y_test = train_test_split(X_temp, Y_temp, test_size=test_size, random_state=None)
        
        # Impute missing values on the training data
        imputer = SimpleImputer(strategy='mean')
        X_train_imputed = imputer.fit_transform(X_train)  # Fit and transform the training data
        X_val_imputed = imputer.transform(X_val)  # Transform the validation data
        X_test_imputed = imputer.transform(X_test)  # Transform the test data

        # Train the SVM model using the best hyperparameters
        clf = SVC(C=best_C, gamma=best_gamma, kernel='rbf')
        clf.fit(X_train_imputed, Y_train)

        # Calculate train accuracy
        train_accuracy = clf.score(X_train_imputed, Y_train)
        trial_train_accuracies.append(train_accuracy)

        # Calculate test accuracy
        test_accuracy = clf.score(X_test_imputed, Y_test)
        trial_test_accuracies.append(test_accuracy)

        # Calculate precision, recall, and F1-score
        Y_test_pred = clf.predict(X_test_imputed)
        trial_precisions.append(precision_score(Y_test, Y_test_pred, average="weighted", zero_division=0))
        trial_recalls.append(recall_score(Y_test, Y_test_pred, average="weighted"))
        trial_f1_scores.append(f1_score(Y_test, Y_test_pred, average="weighted"))

    # Calculate and store split-wise average metrics
    avg_train_accuracy = np.mean(trial_train_accuracies)
    avg_test_accuracy = np.mean(trial_test_accuracies)
    avg_precision = np.mean(trial_precisions)
    avg_recall = np.mean(trial_recalls)
    avg_f1_score = np.mean(trial_f1_scores)

    split_results_svm.append({
        'Split': f"{int(train_size * 100)}/{int(test_size * 100)}",
        'Test Accuracy': avg_test_accuracy,
        'Precision': avg_precision,
        'Recall': avg_recall,
        'F1-Score': avg_f1_score
    })

    # Print split-specific results
    print(f"\nResults for {int(train_size * 100)}/{int(test_size * 100)} Split:")
    print(f"Train Accuracy: {avg_train_accuracy:.4f}")
    print(f"Test Accuracy: {avg_test_accuracy:.4f}")
    # print(f"Precision: {avg_precision:.4f}")
    # print(f"Recall: {avg_recall:.4f}")
    # print(f"F1-Score: {avg_f1_score:.4f}")
    print("-" * 50)


Results for 80/20 Split:
Train Accuracy: 0.9953
Test Accuracy: 1.0000
--------------------------------------------------

Results for 50/50 Split:
Train Accuracy: 0.9953
Test Accuracy: 0.9943
--------------------------------------------------

Results for 20/80 Split:
Train Accuracy: 0.9929
Test Accuracy: 0.9823
--------------------------------------------------


#### Results for SVM Classifier

In [12]:
# Calculate overall metrics for the SVM classifier
overall_accuracy_2 = np.mean([r['Test Accuracy'] for r in split_results_svm])
overall_precision_2 = np.mean([r['Precision'] for r in split_results_svm])
overall_recall_2 = np.mean([r['Recall'] for r in split_results_svm])
overall_f1_score_2 = np.mean([r['F1-Score'] for r in split_results_svm])

# Create a formatted table for the split results
print("SVM Classifier Results on Autism Dataset:")
print(f"{'Split':<10}{'ACC':<15}{'Precision':<15}{'Recall':<15}{'F1-Score':<15}")
print("-" * 65)

# Loop through the split results and display each row
for result in split_results_svm:
    print(f"{result['Split']:<10}{result['Test Accuracy']:<15.4f}{result['Precision']:<15.4f}{result['Recall']:<15.4f}{result['F1-Score']:<15.4f}")

# Print the overall metrics as the last row
print("-" * 65)
print(f"{'Overall':<10}{overall_accuracy_2:<15.4f}{overall_precision_2:<15.4f}{overall_recall_2:<15.4f}{overall_f1_score_2:<15.4f}")

SVM Classifier Results on Autism Dataset:
Split     ACC            Precision      Recall         F1-Score       
-----------------------------------------------------------------
80/20     1.0000         1.0000         1.0000         1.0000         
50/50     0.9943         0.9944         0.9943         0.9943         
20/80     0.9823         0.9829         0.9823         0.9824         
-----------------------------------------------------------------
Overall   0.9922         0.9924         0.9922         0.9922         


### Classifier #3: Logistic Regression

In [17]:
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV

# Impute missing values
imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)

# Define the parameter grid for Logistic Regression
param_grid = {
    'C': [0.01, 0.1, 1, 10, 100],
    'penalty': ['l1', 'l2'],  # Regularization types
    'solver': ['liblinear']   # Compatible solver for l1 and l2 penalties
}

# Set up the Logistic Regression model
log_reg = LogisticRegression()

# Perform GridSearchCV on the entire dataset
grid_search = GridSearchCV(log_reg, param_grid, cv=3, scoring='accuracy', return_train_score=True)

# Fit the model
grid_search.fit(X_imputed, y)

# Extract and print the results
cv_results = grid_search.cv_results_
print("Validation Accuracies for Each Hyperparameter Combination:")
for mean_val_acc, params in zip(cv_results['mean_test_score'], cv_results['params']):
    print(f"Parameters: {params}, Validation Accuracy: {mean_val_acc:.4f}")

# Best hyperparameters based on validation accuracy
best_params = grid_search.best_params_
best_validation_accuracy = grid_search.best_score_

# Print the best hyperparameters and the corresponding validation accuracy
print("\nBest Hyperparameters:")
print(f"  {best_params}")
print(f"Validation Accuracy with Best Hyperparameters: {best_validation_accuracy:.4f}")

Validation Accuracies for Each Hyperparameter Combination:
Parameters: {'C': 0.01, 'penalty': 'l1', 'solver': 'liblinear'}, Validation Accuracy: 0.8267
Parameters: {'C': 0.01, 'penalty': 'l2', 'solver': 'liblinear'}, Validation Accuracy: 0.8495
Parameters: {'C': 0.1, 'penalty': 'l1', 'solver': 'liblinear'}, Validation Accuracy: 0.9503
Parameters: {'C': 0.1, 'penalty': 'l2', 'solver': 'liblinear'}, Validation Accuracy: 0.8992
Parameters: {'C': 1, 'penalty': 'l1', 'solver': 'liblinear'}, Validation Accuracy: 0.9957
Parameters: {'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}, Validation Accuracy: 0.9446
Parameters: {'C': 10, 'penalty': 'l1', 'solver': 'liblinear'}, Validation Accuracy: 0.9957
Parameters: {'C': 10, 'penalty': 'l2', 'solver': 'liblinear'}, Validation Accuracy: 0.9829
Parameters: {'C': 100, 'penalty': 'l1', 'solver': 'liblinear'}, Validation Accuracy: 0.9957
Parameters: {'C': 100, 'penalty': 'l2', 'solver': 'liblinear'}, Validation Accuracy: 0.9957

Best Hyperparameters:
  

In [24]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.impute import SimpleImputer
from sklearn.metrics import precision_score, recall_score, f1_score
import matplotlib.pyplot as plt

# Define the best hyperparameters from previous tuning
best_C = 1
best_penalty = 'l1'
best_solver = 'liblinear'

# Split ratios
split_ratios = [(0.8, 0.2), (0.5, 0.5), (0.2, 0.8)]
split_results_logreg = []

# Lists for storing accuracies and metrics
train_accuracies = []
validation_accuracies = []
test_accuracies = []
precisions = []
recalls = []
f1_scores = []

# Iterate over splits
for train_size, test_size in split_ratios:
    trial_train_accuracies = []
    trial_validation_accuracies = []
    trial_test_accuracies = []
    trial_precisions = []
    trial_recalls = []
    trial_f1_scores = []
    
    for _ in range(3):  # Perform 3 trials for each split
        # Split the data into train and temp (validation + test)
        X_train, X_temp, Y_train, Y_temp = train_test_split(X, y, train_size=train_size, random_state=None)
        
        # Now split the temp data into validation and test
        X_val, X_test, Y_val, Y_test = train_test_split(X_temp, Y_temp, test_size=test_size, random_state=None)
        
        # Impute missing values on the training data
        imputer = SimpleImputer(strategy='mean')
        X_train_imputed = imputer.fit_transform(X_train)  # Fit and transform the training data
        X_val_imputed = imputer.transform(X_val)  # Transform the validation data
        X_test_imputed = imputer.transform(X_test)  # Transform the test data

        # Train the Logistic Regression model using the best hyperparameters
        clf = LogisticRegression(C=best_C, penalty=best_penalty, solver=best_solver, max_iter=10000)
        clf.fit(X_train_imputed, Y_train)

        # Calculate train accuracy
        train_accuracy = clf.score(X_train_imputed, Y_train)
        trial_train_accuracies.append(train_accuracy)

        # Calculate validation accuracy
        validation_accuracy = clf.score(X_val_imputed, Y_val)
        trial_validation_accuracies.append(validation_accuracy)

        # Calculate test accuracy
        test_accuracy = clf.score(X_test_imputed, Y_test)
        trial_test_accuracies.append(test_accuracy)

        # Calculate precision, recall, and F1-score
        Y_test_pred = clf.predict(X_test_imputed)
        trial_precisions.append(precision_score(Y_test, Y_test_pred, average="weighted", zero_division=0))
        trial_recalls.append(recall_score(Y_test, Y_test_pred, average="weighted"))
        trial_f1_scores.append(f1_score(Y_test, Y_test_pred, average="weighted"))

    # Calculate and store split-wise average metrics
    avg_train_accuracy = np.mean(trial_train_accuracies)
    avg_validation_accuracy = np.mean(trial_validation_accuracies)
    avg_test_accuracy = np.mean(trial_test_accuracies)
    avg_precision = np.mean(trial_precisions)
    avg_recall = np.mean(trial_recalls)
    avg_f1_score = np.mean(trial_f1_scores)

    split_results_logreg.append({
        'Split': f"{int(train_size * 100)}/{int(test_size * 100)}",
        'Train Accuracy': avg_train_accuracy,
        'Validation Accuracy': avg_validation_accuracy,
        'Test Accuracy': avg_test_accuracy,
        'Precision': avg_precision,
        'Recall': avg_recall,
        'F1-Score': avg_f1_score
    })

    # Print split-specific results
    print(f"\nResults for {int(train_size * 100)}/{int(test_size * 100)} Split:")
    print(f"Train Accuracy: {avg_train_accuracy:.4f}")
    print(f"Test Accuracy: {avg_test_accuracy:.4f}")
    # print(f"Precision: {avg_precision:.4f}")
    # print(f"Recall: {avg_recall:.4f}")
    # print(f"F1-Score: {avg_f1_score:.4f}")
    print("-" * 50)


Results for 80/20 Split:
Train Accuracy: 0.9953
Test Accuracy: 1.0000
--------------------------------------------------

Results for 50/50 Split:
Train Accuracy: 0.9953
Test Accuracy: 0.9962
--------------------------------------------------

Results for 20/80 Split:
Train Accuracy: 0.9929
Test Accuracy: 0.9720
--------------------------------------------------


#### Results for Logistic Regression Classifier

In [27]:
# Calculate overall metrics for the Logistic Regression classifier
overall_accuracy_3 = np.mean([r['Test Accuracy'] for r in split_results_logreg])
overall_precision_3 = np.mean([r['Precision'] for r in split_results_logreg])
overall_recall_3 = np.mean([r['Recall'] for r in split_results_logreg])
overall_f1_score_3 = np.mean([r['F1-Score'] for r in split_results_logreg])

# Create a formatted table for the split results
print("Logistic Regression Classifier Results on Autism Dataset:")
print(f"{'Split':<10}{'ACC':<15}{'Precision':<15}{'Recall':<15}{'F1-Score':<15}{'Overall Accuracy 3':<20}")
print("-" * 80)

# Loop through the split results and display each row
for result in split_results_logreg:
    print(f"{result['Split']:<10}{result['Test Accuracy']:<15.4f}{result['Precision']:<15.4f}{result['Recall']:<15.4f}{result['F1-Score']:<15.4f}{overall_accuracy_3:<20.4f}")

# Print the overall metrics as the last row
print("-" * 80)
print(f"{'Overall':<10}{overall_accuracy_3:<15.4f}{overall_precision_3:<15.4f}{overall_recall_3:<15.4f}{overall_f1_score_3:<15.4f}{overall_accuracy_3:<20.4f}")


Logistic Regression Classifier Results on Autism Dataset:
Split     ACC            Precision      Recall         F1-Score       Overall Accuracy 3  
--------------------------------------------------------------------------------
80/20     1.0000         1.0000         1.0000         1.0000         0.9894              
50/50     0.9962         0.9962         0.9962         0.9962         0.9894              
20/80     0.9720         0.9722         0.9720         0.9720         0.9894              
--------------------------------------------------------------------------------
Overall   0.9894         0.9895         0.9894         0.9894         0.9894              


## Final Summary Table

In [31]:
summary_data = {
    'Classifier': ['Decision Tree', 'Support Vector Machines', 'Logistic Regression'],  # Add names for all classifiers
    'ACC': [overall_accuracy_1, overall_accuracy_2, overall_accuracy_3],  # Add more as needed
    'Precision': [overall_precision_1, overall_precision_2, overall_precision_3],
    'Recall': [overall_recall_1, overall_recall_2, overall_recall_3],
    'F1-Score': [overall_f1_score_1, overall_f1_score_2, overall_f1_score_3]
}

summary_df = pd.DataFrame(summary_data)

# Print the summary table
print("Final Summary Table of Overall Metrics:")
print(summary_df)

Final Summary Table of Overall Metrics:
                Classifier       ACC  Precision    Recall  F1-Score
0            Decision Tree  0.964850   0.942866  0.964850  0.952067
1  Support Vector Machines  0.992206   0.992445  0.992206  0.992239
2      Logistic Regression  0.989396   0.989485  0.989396  0.989415
