# **Data**

In [63]:
# Import the pandas library for data manipulation and analysis
import pandas as pd

# Import numpy for numerical operations and handling arrays
import numpy as np

# Import LogisticRegression from sklearn for logistic regression model
from sklearn.linear_model import LogisticRegression

# Import matplotlib for creating visualizations and plots
import matplotlib.pyplot as plt

# Import seaborn for advanced statistical data visualization
import seaborn as sns

# Import StandardScaler from sklearn for standardizing features
from sklearn.preprocessing import StandardScaler

# Import statsmodels for statistical models and tests
import statsmodels.api as sm
from statsmodels.formula.api import ols  # Import OLS (Ordinary Least Squares) for linear regression models

# Import kstest and norm from scipy for statistical tests and normal distribution
from scipy.stats import kstest, norm

# Import pairwise_tukeyhsd from statsmodels for Tukey's HSD test for multiple comparisons
from statsmodels.stats.multicomp import pairwise_tukeyhsd

# Import SelectKBest and f_classif from sklearn for feature selection
from sklearn.feature_selection import SelectKBest, f_classif

# Import MinMaxScaler from sklearn for scaling features to a given range
from sklearn.preprocessing import MinMaxScaler

# Import train_test_split from sklearn for splitting data into training and testing sets
from sklearn.model_selection import train_test_split

# Import various metrics from sklearn for evaluating models
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Import LinearDiscriminantAnalysis for linear dimensionality reduction and classification
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

# Import additional regression models from sklearn
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

# Import support vector machine classifiers and regressors from sklearn
from sklearn.svm import SVC, SVR

# Import k-nearest neighbors classifier from sklearn
from sklearn.neighbors import KNeighborsClassifier

# Import regression metrics from sklearn for evaluating model performance
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Import load_workbook and Workbook from openpyxl for handling Excel files
from openpyxl import load_workbook, Workbook

# Import os for interacting with the operating system (e.g., file paths)
import os

# Import GridSearchCV and RandomizedSearchCV for hyperparameter tuning
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

# Import additional metrics from sklearn for model evaluation
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_auc_score

# Import joblib for saving and loading models
import joblib

# Import learning_curve from sklearn for plotting learning curves
from sklearn.model_selection import learning_curve

# Import cross_val_score from sklearn for cross-validation
from sklearn.model_selection import cross_val_score

*Summary*:

* **Paths Definition**: Sets file paths for data.
* **Data Loading**: Reads Excel files into DataFrames.
* **Column Renaming**: Standardizes column names.
* **Data Type Conversion**: Ensures consistent data types for merging.
* **Data Merging**: Combines DataFrames on common columns.
* **Verification**: Prints DataFrame column names and sizes, and shows the first rows of the merged DataFrame.

In [64]:
# Specify the path where the file of interest is located
# The file paths are set as raw strings to handle any special characters or escape sequences
path_TELE = rf'/content/drive/MyDrive/Sapienza/Resources/WP2 -TELEMAIA Associazione tra EEG e TELEMONITORING features in Nold e MCI.xlsx'
path_EEG = rf'/content/drive/MyDrive/Sapienza/Resources/WP2 -TELEMAIA EEG features in Nold e PD con deficit cognitivi.xlsx'
path_MRI = rf'/content/drive/MyDrive/Sapienza/Resources/WP2 -TELEMAIA MRI features in Nold e PD con deficit cognitivi.xlsx'

# Create a variable with the Excel information in a data frame structure
# Load data from specified Excel sheets into pandas DataFrames
data_tele = pd.read_excel(path_TELE, sheet_name='best')
data_eeg = pd.read_excel(path_EEG, sheet_name='all')
data_mri = pd.read_excel(path_MRI, sheet_name='all')

# Rename columns in the data_tele DataFrame for consistency
# Rename 'Codice TELEMAIA new' to 'Subj' and 'Group TELEMAIA' to 'Group'
data_tele.rename(columns={'Codice TELEMAIA new': 'Subj'}, inplace=True)
data_tele.rename(columns={'Group TELEMAIA': 'Group'}, inplace=True)

# Rename column in the data_eeg DataFrame
# Rename 'MMSEg' to 'MMSE' to match the column name in data_tele
data_eeg.rename(columns={'MMSEg': 'MMSE'}, inplace=True)

# Convert the 'Unit' column in each DataFrame to string type
# This ensures consistent data types for merging
data_tele['Unit'] = data_tele['Unit'].astype(str)
data_eeg['Unit'] = data_eeg['Unit'].astype(str)

# Set 'Unit' column in data_mri to a constant value 'MRI'
# Ensure that the column type is string for consistency
data_mri['Unit'] = 'MRI'  # Ensure size compatibility with the DataFrame
data_mri['Unit'] = data_mri['Unit'].astype(str)

# Print column names and sizes of DataFrames to verify correct loading and renaming
print(f"Column names in data_tele: {data_tele.columns}, Size: {data_tele.shape}")
print(f"\nColumn names in data_eeg:{data_eeg.columns}, Size: {data_eeg.shape}")
print(f"\nColumn names in data_mri:{data_mri.columns}, Size: {data_mri.shape}")

# Perform an outer merge of the three DataFrames on specified columns
# This combines the data based on matching values in the specified columns
df = (data_tele.merge(data_eeg, on=['Subj', 'Group', 'Age', 'Sex', 'Education', 'MMSE', 'Unit'], how='outer')
         .merge(data_mri, on=['Subj', 'Group', 'Age', 'Sex', 'Education', 'MMSE', 'Unit'], how='outer'))

# Display the first few rows of the merged DataFrame to verify the result
print("Merged Data:")
print(df)

Column names in data_tele: Index(['Subj', 'Group', 'Unit', 'Age', 'Sex', 'Education', 'MMSE', 'TASK1',
       'TASK2', 'TASK3', 'TASK4', 'TASK5', 'TASK6', 'TASK7', 'theta_P'],
      dtype='object'), Size: (47, 15)

Column names in data_eeg:Index(['Subj', 'Group', 'Unit', 'Age', 'Sex', 'Education', 'MMSE', 'MMSEcorr',
       'TF', 'IAF', 'UPDRS III', 'De-F', 'De-C', 'De-P', 'De-O', 'De-T',
       'De-L', 'Th-F', 'Th-C', 'Th-P', 'Th-O', 'Th-T', 'Th-L', 'A1-F', 'A1-C',
       'A1-P', 'A1-O', 'A1-T', 'A1-L', 'A2-F', 'A2-C', 'A2-P', 'A2-O', 'A2-T',
       'A2-L', 'A3-F', 'A3-C', 'A3-P', 'A3-O', 'A3-T', 'A3-L', 'B1-F', 'B1-C',
       'B1-P', 'B1-O', 'B1-T', 'B1-L', 'B2-F', 'B2-C', 'B2-P', 'B2-O', 'B2-T',
       'B2-L', 'Ga-F', 'Ga-C', 'Ga-P', 'Ga-O', 'Ga-T', 'Ga-L', 'De-global',
       'Th-global', 'A1-global', 'A3-global'],
      dtype='object'), Size: (104, 63)

Column names in data_mri:Index(['Subj', 'Group', 'Age', 'Sex', 'Education', 'MMSE',
       'Visual_Network_Normalized', 'SomatoMo

  df = (data_tele.merge(data_eeg, on=['Subj', 'Group', 'Age', 'Sex', 'Education', 'MMSE', 'Unit'], how='outer')
  df = (data_tele.merge(data_eeg, on=['Subj', 'Group', 'Age', 'Sex', 'Education', 'MMSE', 'Unit'], how='outer')


In [65]:
# Display the column names of the merged DataFrame
print(df.columns)

Index(['Subj', 'Group', 'Unit', 'Age', 'Sex', 'Education', 'MMSE', 'TASK1',
       'TASK2', 'TASK3', 'TASK4', 'TASK5', 'TASK6', 'TASK7', 'theta_P',
       'MMSEcorr', 'TF', 'IAF', 'UPDRS III', 'De-F', 'De-C', 'De-P', 'De-O',
       'De-T', 'De-L', 'Th-F', 'Th-C', 'Th-P', 'Th-O', 'Th-T', 'Th-L', 'A1-F',
       'A1-C', 'A1-P', 'A1-O', 'A1-T', 'A1-L', 'A2-F', 'A2-C', 'A2-P', 'A2-O',
       'A2-T', 'A2-L', 'A3-F', 'A3-C', 'A3-P', 'A3-O', 'A3-T', 'A3-L', 'B1-F',
       'B1-C', 'B1-P', 'B1-O', 'B1-T', 'B1-L', 'B2-F', 'B2-C', 'B2-P', 'B2-O',
       'B2-T', 'B2-L', 'Ga-F', 'Ga-C', 'Ga-P', 'Ga-O', 'Ga-T', 'Ga-L',
       'De-global', 'Th-global', 'A1-global', 'A3-global',
       'Visual_Network_Normalized', 'SomatoMotor_Network_Normalized',
       'DAN_Normalized', 'VAN_Normalized', 'Limbic_Network_Normalized',
       'FrontoParietal_Network_Normalized', 'DMN_Normalized'],
      dtype='object')


In [66]:
# Create a MinMaxScaler object to scale features between 0 and 1
scaler = MinMaxScaler()

# Specify the columns that need to be scaled
columns_to_scale = ['Age', 'Education']

# Fit the scaler on the specified columns and transform them
df[columns_to_scale] = scaler.fit_transform(df[columns_to_scale])

# Print the first few rows of the scaled columns to verify the scaling
print(df[columns_to_scale].head())

# Define a list of EEG feature names
EEG = [ 'De-F', 'De-C', 'De-P', 'De-O',
        'De-T', 'De-L', 'Th-F', 'Th-C', 'Th-P', 'Th-O', 'Th-T', 'Th-L', 'A1-F',
        'A1-C', 'A1-P', 'A1-O', 'A1-T', 'A1-L', 'A2-F', 'A2-C', 'A2-P', 'A2-O',
        'A2-T', 'A2-L', 'A3-F', 'A3-C', 'A3-P', 'A3-O', 'A3-T', 'A3-L', 'B1-F',
        'B1-C', 'B1-P', 'B1-O', 'B1-T', 'B1-L', 'B2-F', 'B2-C', 'B2-P', 'B2-O',
        'B2-T', 'B2-L', 'Ga-F', 'Ga-C', 'Ga-P', 'Ga-O', 'Ga-T', 'Ga-L',
        'De-global', 'Th-global', 'A1-global', 'A3-global']

# Replace hyphens with underscores in column names if they are in the EEG list
df.columns = [col.replace('-', '_') if col in EEG else col for col in df.columns]

# Define lists of column names for different categories
DEPENDENTS = ['Group', 'MMSE']
DEMOGRAPHICS = ['Age', 'Sex', 'Education']
EEG = ['De_F', 'De_C', 'De_P', 'De_O',
       'De_T', 'De_L', 'Th_F', 'Th_C', 'Th_P', 'Th_O', 'Th_T', 'Th_L', 'A1_F',
       'A1_C', 'A1_P', 'A1_O', 'A1_T', 'A1_L', 'A2_F', 'A2_C', 'A2_P', 'A2_O',
       'A2_T', 'A2_L', 'A3_F', 'A3_C', 'A3_P', 'A3_O', 'A3_T', 'A3_L', 'B1_F',
       'B1_C', 'B1_P', 'B1_O', 'B1_T', 'B1_L', 'B2_F', 'B2_C', 'B2_P', 'B2_O',
       'B2_T', 'B2_L', 'Ga_F', 'Ga_C', 'Ga_P', 'Ga_O', 'Ga_T', 'Ga_L',
       'De_global', 'Th_global', 'A1_global', 'A3_global']
MRI = ['Visual_Network_Normalized', 'SomatoMotor_Network_Normalized',
       'DAN_Normalized', 'VAN_Normalized', 'Limbic_Network_Normalized',
       'FrontoParietal_Network_Normalized', 'DMN_Normalized']
TASK = ['TASK1', 'TASK2', 'TASK3', 'TASK4', 'TASK5', 'TASK6', 'TASK7']

        Age  Education
0  0.888889   0.238095
1  0.857143   0.380952
2  0.904762   0.238095
3  0.730159   0.238095
4  0.952381   0.619048


In [67]:
# Define the dependent variables for classification and regression

# Create a mapping dictionary to convert 'Group' values into numerical classes
group_mapping = {'HC': 0, 'PDD': 1, 'MCI': 1, 'PD': 1}

# Map the values in the 'Group' column using the created dictionary to get classification labels
y_class = df[DEPENDENTS[0]].map(group_mapping)

# Extract the dependent variable for regression from the DataFrame
y_reg = df[DEPENDENTS[1]]

# Extract the dependent variable for another regression task (TASK columns)
y_reg_2 = df[TASK]

# Define the independent variables for different models

# Independent variables for classification using EEG features
X_class_EEG = df[DEMOGRAPHICS + EEG]

# Independent variables for classification using MRI features
X_class_MRI = df[DEMOGRAPHICS + MRI]

# Independent variables for classification using both EEG and MRI features
X_class_EEG_MRI = df[DEMOGRAPHICS + EEG + MRI]

# Independent variables for regression using EEG features
X_reg_EEG = df[DEMOGRAPHICS + EEG]

# Independent variables for regression using MRI features
X_reg_MRI = df[DEMOGRAPHICS + MRI]

# Independent variables for regression using both EEG features and TASK variables
X_reg_TELE_EEG = df[DEMOGRAPHICS + EEG + TASK]

# Independent variables for regression using both MRI features and TASK variables
X_reg_TELE_MRI = df[DEMOGRAPHICS + MRI + TASK]

# Independent variables for regression using only TASK variables
X_reg_TELE = df[DEMOGRAPHICS + TASK]

# Functions

In [68]:
def select_features(X, y):
    # Remove rows with NaN values from X and filter y to match X's index
    X = X.dropna()
    y = y[X.index]

    # List of all metrics (features) to be compared
    metrics = X.columns

    # Dictionary to store results for each metric
    results = {}

    for metric in metrics:
        try:
            # Perform ANOVA to check if there are statistically significant differences
            formula = f'{metric} ~ C(Group)'
            model = ols(formula, data=df).fit()  # Fit an OLS model
            anova_table = sm.stats.anova_lm(model, typ=2)  # Compute ANOVA table

            # Perform normality test (Kolmogorov-Smirnov test)
            stat, p_value = kstest(df[metric], norm.cdf)  # Test for normality

            # Store results in the dictionary
            results[metric] = {
                'anova': anova_table,
                'normality_p_value': p_value
            }

            # Apply logarithmic transformation if normality p-value is significant
            if p_value < 0.05:
                df[f'log_{metric}'] = np.log(df[metric] + 1)  # Add 1 to avoid log(0)
                # Optional: Uncomment to debug log transformation
                # print(f'Applied logarithmic transformation to {metric}')

        except Exception as e:
            # Handle exceptions and print errors for debugging
            print(f'Error processing metric {metric}: {e}')

    # Perform Tukey's post-hoc test to find specific group differences
    for metric in metrics:
        if metric in df.columns:
            tukey = pairwise_tukeyhsd(df[metric], df['Group'])
            # Optional: Uncomment to debug Tukey's HSD results
            # print(f'Tukey HSD test results for {metric}:')
            # print(tukey)

    # Feature selection using ANOVA F-value
    k = X.shape[1]  # Number of features to select (all features in this case)
    anova_selector = SelectKBest(score_func=f_classif, k=k)
    X_selected = anova_selector.fit_transform(X, y)  # Fit and transform the data

    # Get the indices of the selected features
    selected_features = anova_selector.get_support(indices=True)
    # Print the names of selected features
    print("Selected features:", [X.columns[i] for i in selected_features])

    return X_selected, y

In [69]:
def save_excel(results_df, path, sheet_name):
    """
    Save a DataFrame to an Excel file. If the file already exists, the function appends the DataFrame
    to the existing file, replacing the specified sheet if it exists.

    Parameters:
    - results_df (pd.DataFrame): The DataFrame to be saved.
    - path (str): The path to the Excel file.
    - sheet_name (str): The name of the sheet where the DataFrame will be saved.
    """
    # Round the DataFrame values to 2 decimal places for better readability
    results_df = results_df.round(2)

    # Check if the file already exists
    if os.path.exists(path):
        # Open the existing Excel file and append the DataFrame
        with pd.ExcelWriter(path, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
            # Save the DataFrame to the specified sheet, replacing it if it already exists
            results_df.to_excel(writer, sheet_name=sheet_name, index=True)
    else:
        # Create a new Excel file and save the DataFrame
        with pd.ExcelWriter(path, engine='openpyxl') as writer:
            # Save the DataFrame to the specified sheet
            results_df.to_excel(writer, sheet_name=sheet_name, index=True)

In [70]:
def plot_learning_curve(estimator, X, y, model_name,path):
    """
    Plot the learning curve for a given estimator.

    Parameters:
    - estimator: The machine learning model or estimator to evaluate.
    - X (pd.DataFrame or np.ndarray): Feature matrix.
    - y (pd.Series or np.ndarray): Target variable.
    - model_name (str): Name of the model to include in the plot title.
    """
    # Calculate learning curve data
    train_sizes, train_scores, test_scores = learning_curve(
        estimator, X, y, cv=5, n_jobs=-1,
        train_sizes=np.linspace(0.1, 1.0, 10))

    # Create a new figure
    plt.figure()

    # Plot training scores
    plt.plot(train_sizes, np.mean(train_scores, axis=1), 'o-', color='r', label='Training score')

    # Plot cross-validation scores
    plt.plot(train_sizes, np.mean(test_scores, axis=1), 'o-', color='g', label='Cross-validation score')

    # Set x-axis label
    plt.xlabel('Training examples')

    # Set y-axis label
    plt.ylabel('Score')

    # Set plot title
    plt.title(f'Learning Curve for {model_name}')

    # Add legend
    plt.legend(loc='best')

    # Add grid for better readability
    plt.grid()

    # Ensure the directory for saving the file exists.
    os.makedirs(os.path.dirname(path), exist_ok=True)

    try:
        # Save the plot to the specified path with a name based on the title, scenario name, and model name.
        plt.savefig(f"{path}/Learning Curve_{model_name}.png")
    except Exception as e:
        # Print an error message if there is a problem saving the file.
        print(f"Error saving the learning Curve plot: {e}")
    finally:
        # Close the plot to free up memory.
        plt.close()

In [71]:
def param(model, model_name, X, y):
    """
    Perform hyperparameter tuning using RandomizedSearchCV and plot learning curves.

    Parameters:
    - model: The machine learning model to tune.
    - model_name (str): The name of the model to configure hyperparameters for.
    - X (pd.DataFrame or np.ndarray): Feature matrix.
    - y (pd.Series or np.ndarray): Target variable.

    Returns:
    - best_model: The best model found by RandomizedSearchCV.
    """

    # Dictionary to hold the parameter grid for each model
    param_dist = {}

    # Define parameter grids for different models
    if model_name == 'LDA':
        param_dist = {
            'solver': ['svd', 'lsqr', 'eigen'],
            'shrinkage': [None, 'auto', 0.1, 0.5, 1.0],
            'n_components': [None, 1, 2, 3],
            'store_covariance': [True, False]
        }

    elif model_name == 'LogisticRegression':
        param_dist = {
            'penalty': ['l1', 'l2', 'elasticnet', 'none'],
            'C': [0.01, 0.1, 1, 10, 100],
            'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
            'max_iter': [100, 200, 500],
            'l1_ratio': [None, 0.1, 0.5, 0.7, 1.0]
        }

    elif model_name == 'RandomForest':
        param_dist = {
            'n_estimators': [50, 100, 200],
            'max_features': ['sqrt', 'log2'],
            'max_depth': [None, 10, 20, 30, 40],
            'min_samples_split': [2, 5, 10],
            'min_samples_leaf': [1, 2, 4],
            'bootstrap': [True, False]
        }

    elif model_name == 'KNN':
        param_dist = {
            'n_neighbors': [3, 5, 7, 9, 11],
            'weights': ['uniform', 'distance'],
            'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
            'leaf_size': [10, 20, 30, 40],
            'p': [1, 2]  # 1 for Manhattan distance, 2 for Euclidean distance
        }

    elif model_name == 'SVC':
        param_dist = {
            'C': [0.01, 0.1, 1, 10, 100],
            'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
            'degree': [2, 3, 4],  # Relevant only for 'poly' kernel
            'gamma': ['scale', 'auto'],
            'coef0': [0.0, 0.1, 0.5, 1.0],  # Relevant only for 'poly' and 'sigmoid' kernels
            'class_weight': [None, 'balanced']
        }

    elif model_name == 'LinearRegression':
        param_dist = {
            'fit_intercept': [True, False],
            'positive': [True, False],
            'copy_X': [True, False]
        }

    elif model_name == 'RandomForestRegressor':
        param_dist = {
            'n_estimators': [50, 100, 200],
            'max_features': ['sqrt', 'log2', None],
            'max_depth': [None, 10, 20, 30, 40],
            'min_samples_split': [2, 5, 10],
            'min_samples_leaf': [1, 2, 4],
            'bootstrap': [True, False]
        }

    elif model_name == 'SVR':
        param_dist = {
            'C': [0.01, 0.1, 1, 10, 100],
            'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
            'degree': [2, 3, 4],  # Relevant only for 'poly' kernel
            'gamma': ['scale', 'auto'],
            'coef0': [0.0, 0.1, 0.5, 1.0],  # Relevant only for 'poly' and 'sigmoid' kernels
            'epsilon': [0.01, 0.1, 1]
        }

    # Perform hyperparameter tuning using RandomizedSearchCV
    best_model = RandomizedSearchCV(
        estimator=model,
        param_distributions=param_dist,
        n_iter=10,
        cv=3,
        verbose=2,
        random_state=42,
        n_jobs=-1
    )
    best_model.fit(X, y)

    # Plot learning curve for the best model
    plot_learning_curve(best_model.best_estimator_, X, y, model_name,'/content/drive/MyDrive/Sapienza/Workshops/Workshop 4/Img')

    # Save all search results to Excel
    results = pd.DataFrame(best_model.cv_results_)
    save_excel(results, '/content/drive/MyDrive/Sapienza/Workshops/Workshop 4/model_results.xlsx', model_name)

    # Save the top 3 results to a separate Excel file
    top_results = results.nlargest(3, 'mean_test_score')
    save_excel(top_results, '/content/drive/MyDrive/Sapienza/Workshops/Workshop 4/model_best_results.xlsx', model_name)

    return best_model

In [72]:
import os  # Import the os module to interact with the operating system.
import numpy as np  # Import numpy for mathematical operations and array manipulations.
import matplotlib.pyplot as plt  # Import matplotlib for creating plots and visualizations.

def plot_confusion_matrix(path, scenario_name, model_name, cm, classes, normalize=False, title='Confusion matrix', cmap=plt.cm.BuGn):
    """
    Plots the confusion matrix with options for normalization and saves the plot.

    Parameters:
    - path: Directory path to save the plot.
    - scenario_name: Name of the scenario for the plot title.
    - model_name: Name of the model for the plot title.
    - cm: Confusion matrix to plot.
    - classes: List of class names.
    - normalize: Whether to normalize the confusion matrix.
    - title: Title for the plot.
    - cmap: Color map for the plot.
    """

    if normalize:
        # Normalize the confusion matrix by dividing each value by the sum of its row.
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print(f"Confusion Matrix normalized for {scenario_name}")  # Print a message indicating that the matrix is normalized.
    else:
        print(f'Confusion Matrix for {scenario_name}')  # Print a message indicating that the matrix is not normalized.

    print(cm)  # Print the confusion matrix to the console.

    # Create a new figure for the plot.
    plt.figure(figsize=(10, 10))
    # Display the confusion matrix as an image with a color map.
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    # Set the title of the plot, including the model name and scenario name.
    plt.title(f'{title} for {model_name} in {scenario_name}')
    # Add a color bar next to the plot to show the color map scale.
    plt.colorbar()

    # Create a list of tick marks on the x-axis, based on the number of classes.
    tick_marks = np.arange(len(classes))
    # Set the x-axis tick labels to the class names and rotate them 45 degrees.
    plt.xticks(tick_marks, classes, rotation=45, fontsize=16)
    # Set the y-axis tick labels to the class names.
    plt.yticks(tick_marks, classes, fontsize=16)

    # Define the format for displaying the values in the matrix.
    fmt = '.2f' if normalize else 'd'
    # Define the threshold for text color based on the maximum value in the matrix.
    thresh = cm.max() / 2.
    # Iterate over each cell in the confusion matrix.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            # Place the cell value in the plot with the specified format.
            plt.text(j, i, format(cm[i, j], fmt),
                     ha="center", va="center",
                     fontsize=16,  # Font size set to 16.
                     color="white" if cm[i, j] > thresh else "black")  # Text color is white if the value is greater than the threshold, otherwise black.

    # Adjust the layout of the subplots to prevent overlap.
    plt.tight_layout()
    # Set the y-axis label and font size.
    plt.ylabel('True label', fontsize=16)
    # Set the x-axis label and font size.
    plt.xlabel('Predicted label', fontsize=16)

    # Ensure the directory for saving the file exists.
    os.makedirs(os.path.dirname(path), exist_ok=True)

    try:
        # Save the plot to the specified path with a name based on the title, scenario name, and model name.
        plt.savefig(f"{path}/{title}_{scenario_name}_{model_name}.png")
    except Exception as e:
        # Print an error message if there is a problem saving the file.
        print(f"Error saving the confusion matrix plot: {e}")
    finally:
        # Close the plot to free up memory.
        plt.close()

In [73]:
import matplotlib.pyplot as plt  # Import matplotlib for creating plots.

def evaluate_regression_model(path, y_true, y_pred, scenario_name, model_name, title='Evaluate Regression'):
    """
    Evaluate a regression model by plotting predicted vs. actual values and residuals.

    Parameters:
    - path: Directory path to save the plots.
    - y_true: Array-like of true target values.
    - y_pred: Array-like of predicted values.
    - scenario_name: Name of the scenario for the plot title.
    - model_name: Name of the model for the plot title.
    - title: Title for the plots.
    """

    # Create a new figure with specified size.
    plt.figure(figsize=(12, 6))

    # Plot of Predictions vs True Values
    plt.tight_layout()  # Adjust the layout to prevent overlap.
    plt.scatter(y_true, y_pred, alpha=0.5)  # Scatter plot of true values vs. predicted values.
    plt.plot([min(y_true), max(y_true)], [min(y_true), max(y_true)], 'r--', lw=2)  # Add a red dashed line for perfect predictions.
    plt.xlabel('True Values')  # Label for the x-axis.
    plt.ylabel('Predictions')  # Label for the y-axis.
    plt.title(f'{title} (Predictions vs True Values) for {model_name} in {scenario_name}', fontsize=8)  # Title for the plot.
    print(f"{path}/{title}_{scenario_name}_{model_name}_predictions.png")  # Print the path where the plot will be saved.
    plt.savefig(f"{path}/{title}_{scenario_name}_{model_name}_predictions.png")  # Save the plot to the specified path.
    plt.close()  # Close the plot to free up memory.

    # Plot of Residuals
    residuals = y_true - y_pred  # Calculate residuals (true values - predicted values).
    plt.figure(figsize=(12, 6))  # Create a new figure with specified size.
    plt.tight_layout()  # Adjust the layout to prevent overlap.
    plt.scatter(y_pred, residuals, alpha=0.5)  # Scatter plot of predicted values vs. residuals.
    plt.axhline(y=0, color='r', linestyle='--')  # Add a red dashed line at y=0 to show the zero residual line.
    plt.xlabel('Predictions')  # Label for the x-axis.
    plt.ylabel('Residuals')  # Label for the y-axis.
    plt.title(f'{title} (Residuals Plot) for {model_name} in {scenario_name}', fontsize=8)  # Title for the plot.
    print(f"{path}/{title}_{scenario_name}_{model_name}_residuals.png")  # Print the path where the plot will be saved.
    plt.savefig(f"{path}/{title}_{scenario_name}_{model_name}_residuals.png")  # Save the plot to the specified path.
    plt.close()  # Close the plot to free up memory.

In [74]:
import pandas as pd  # Import pandas for data manipulation and saving results.
from sklearn.model_selection import train_test_split, cross_val_score  # Import train_test_split for splitting data and cross_val_score for cross-validation.
from sklearn.preprocessing import StandardScaler  # Import StandardScaler for feature scaling.
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_auc_score  # Import various metrics for model evaluation.
import matplotlib.pyplot as plt  # Import matplotlib for plotting confusion matrices.

def model_class(X, y, models, path, scenario_name):
    """
    Evaluates different classification models by performing feature selection, training, cross-validation, and evaluation.

    Parameters:
    - X: Feature matrix.
    - y: Target vector.
    - models: Dictionary of model names and instances.
    - path: Directory path to save the results and plots.
    - scenario_name: Name of the scenario for the plot title and file names.
    """

    X, y = select_features(X, y)  # Perform feature selection on the data.

    # Create a dictionary to store DataFrames for different scenarios.
    results_dict_class = {}

    # Split the data into training and testing sets.
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    # Standardize the features by scaling.
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)  # Fit and transform the training data.
    X_test = scaler.transform(X_test)  # Transform the test data using the same scaler.

    results = []  # Initialize an empty list to store results for each model.

    for model_name, model in models.items():
        model = param(model, model_name, X, y)  # Adjust model parameters using hyperparameter tuning.

        # Train the model with the training data.
        model.fit(X_train, y_train)

        # Perform cross-validation on the training data with the best model found.
        cross_val_scores = cross_val_score(
            estimator=model.best_estimator_,  # Use the best model from hyperparameter tuning.
            X=X_train,
            y=y_train,
            cv=10,  # 10-fold cross-validation.
            n_jobs=-1  # Use all available processors.
        )
        print(f'Cross-validation scores for {model_name}: {cross_val_scores}')

        # Make predictions on the test set.
        y_pred = model.predict(X_test)

        # Calculate evaluation metrics for the model.
        accuracy = accuracy_score(y_test, y_pred) * 100
        precision = precision_score(y_test, y_pred, average='weighted') * 100
        recall = recall_score(y_test, y_pred, average='weighted') * 100
        conf_matrix = confusion_matrix(y_test, y_pred)
        tn = conf_matrix[0, 0]  # True negatives.
        fp = conf_matrix[0, 1]  # False positives.
        specificity = tn / (tn + fp) * 100  # Calculate specificity.
        f1 = f1_score(y_test, y_pred, average='weighted') * 100  # Calculate F1 score.
        auc = roc_auc_score(y_test, y_pred)  # Calculate the AUC score.

        # Append the results to the list.
        results.append({
            'Model': model_name,
            'Accuracy (%)': accuracy,
            'Sensitivity (%)': recall,
            'Specificity (%)': specificity,
            'Precision (%)': precision,
            'F1 Score (%)': f1,
            'AUC': auc
        })

        # Define class names for confusion matrix.
        class_names = ['HC', 'PDMCI']

        # Plot and save the confusion matrix.
        plot_confusion_matrix(path, scenario_name, model_name, conf_matrix, normalize=False, classes=class_names, title='Confusion matrix')

    # Convert the list of results into a DataFrame.
    results_df = pd.DataFrame(results)
    results_df.set_index('Model', inplace=True)  # Set the model name as the index.

    # Save the results to an Excel file.
    save_excel(results_df, path + '/model_comparison_results_class.xlsx', scenario_name)

In [75]:
import numpy as np  # Import numpy for numerical operations.
import pandas as pd  # Import pandas for data manipulation and saving results.
from sklearn.model_selection import train_test_split, cross_val_score  # Import train_test_split for splitting data and cross_val_score for cross-validation.
from sklearn.preprocessing import StandardScaler  # Import StandardScaler for feature scaling.
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, roc_auc_score  # Import regression metrics for evaluation.
import matplotlib.pyplot as plt  # Import matplotlib for plotting regression results.

def model_reg(X, y, models, path, scenario_name):
    """
    Evaluates different regression models by performing feature selection, training, cross-validation, and evaluation.

    Parameters:
    - X: Feature matrix.
    - y: Target vector.
    - models: Dictionary of model names and instances.
    - path: Directory path to save the results and plots.
    - scenario_name: Name of the scenario for the plot title and file names.
    """

    X, y = select_features(X, y)  # Perform feature selection on the data.

    # Create a dictionary to store DataFrames for different scenarios.
    results_dict_reg = {}

    # Split the data into training and testing sets.
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    # Standardize the features by scaling.
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)  # Fit and transform the training data.
    X_test = scaler.transform(X_test)  # Transform the test data using the same scaler.

    results = []  # Initialize an empty list to store results for each model.

    for model_name, model in models.items():
        model = param(model, model_name, X, y)  # Adjust model parameters using hyperparameter tuning.

        # Train the model with the training data.
        model.fit(X_train, y_train)

        # Perform cross-validation on the training data with the best model found.
        cross_val_scores = cross_val_score(
            estimator=model.best_estimator_,  # Use the best model from hyperparameter tuning.
            X=X_train,
            y=y_train,
            cv=10,  # 10-fold cross-validation.
            n_jobs=-1  # Use all available processors.
        )
        print(f'Cross-validation scores for {model_name}: {cross_val_scores}')

        # Predict the test set outcomes.
        y_pred = model.predict(X_test)

        # Calculate evaluation metrics for regression models.
        mse = np.sqrt(mean_squared_error(y_test, y_pred))  # Calculate the Root Mean Squared Error (RMSE).
        mae = mean_absolute_error(y_test, y_pred)  # Calculate the Mean Absolute Error (MAE).
        r2 = r2_score(y_test, y_pred)  # Calculate the R-squared score.

        # Append the results to the list.
        results.append({
            'Model': model_name,
            'MSE': mse,
            'MAE': mae,
            'R2 Score': r2
        })

        # Plot and save the evaluation results for regression models.
        evaluate_regression_model(path, y_test, y_pred, scenario_name, model_name, title='Evaluate Regression')

    # Convert the list of results into a DataFrame.
    results_df = pd.DataFrame(results)
    results_df.set_index('Model', inplace=True)  # Set the model name as the index.

    # Save the results to an Excel file.
    save_excel(results_df, path + '/model_comparison_results_reg.xlsx', scenario_name)

# Classification model

In [76]:
# Define the path where results will be saved
path = r'/content/drive/MyDrive/Sapienza/Workshops/Workshop 4/Img'

# Create a dictionary of classification models to be evaluated
models = {
    'LDA_Classifier': LinearDiscriminantAnalysis(),  # Linear Discriminant Analysis model
    'LogisticRegressionClassifier': LogisticRegression(max_iter=1000, random_state=42),  # Logistic Regression model with increased max iterations
    'RandomForestClassifier': RandomForestClassifier(random_state=42),  # Random Forest Classifier with fixed random state
    'KNN_Classifier': KNeighborsClassifier(n_neighbors=3),  # k-Nearest Neighbors model with 3 neighbors
    'SVClassifier': SVC(random_state=42)  # Support Vector Classifier with fixed random state
}

# Define classification scenarios with corresponding feature matrices and target vectors
scenarios_class = {
    'Group vs EEG': (X_class_EEG, y_class),  # Scenario for EEG-based classification
    'Group vs MRI': (X_class_MRI, y_class),  # Scenario for MRI-based classification
    # 'Group vs EEG and MRI': (X_class_EEG_MRI, y_class)  # Scenario for combined EEG and MRI (commented out)
}

# Iterate over each scenario
for scenario_name, (X, y) in scenarios_class.items():
    # Evaluate each model for the current scenario
    model_class(X, y, models, path, scenario_name)

# Print a message indicating that the results have been saved
print("Classification results saved to Excel with separate sheets for each scenario.")



Selected features: ['Age', 'Sex', 'Education', 'De_F', 'De_C', 'De_P', 'De_O', 'De_T', 'De_L', 'Th_F', 'Th_C', 'Th_P', 'Th_O', 'Th_T', 'Th_L', 'A1_F', 'A1_C', 'A1_P', 'A1_O', 'A1_T', 'A1_L', 'A2_F', 'A2_C', 'A2_P', 'A2_O', 'A2_T', 'A2_L', 'A3_F', 'A3_C', 'A3_P', 'A3_O', 'A3_T', 'A3_L', 'B1_F', 'B1_C', 'B1_P', 'B1_O', 'B1_T', 'B1_L', 'B2_F', 'B2_C', 'B2_P', 'B2_O', 'B2_T', 'B2_L', 'Ga_F', 'Ga_C', 'Ga_P', 'Ga_O', 'Ga_T', 'Ga_L', 'De_global', 'Th_global', 'A1_global', 'A3_global']
Fitting 3 folds for each of 1 candidates, totalling 3 fits




Fitting 3 folds for each of 1 candidates, totalling 3 fits
Cross-validation scores for LDA_Classifier: [0.625      0.875      0.57142857 0.57142857 0.85714286 0.57142857
 0.71428571 0.85714286 0.85714286 0.42857143]
Confusion Matrix for Group vs EEG
[[13  6]
 [ 1 12]]




Fitting 3 folds for each of 1 candidates, totalling 3 fits




Fitting 3 folds for each of 1 candidates, totalling 3 fits
Cross-validation scores for LogisticRegressionClassifier: [0.625      0.875      0.85714286 0.85714286 1.         0.71428571
 0.85714286 0.85714286 1.         0.71428571]
Confusion Matrix for Group vs EEG
[[17  2]
 [ 2 11]]




Fitting 3 folds for each of 1 candidates, totalling 3 fits




Fitting 3 folds for each of 1 candidates, totalling 3 fits
Cross-validation scores for RandomForestClassifier: [0.75       1.         0.57142857 0.71428571 1.         0.57142857
 0.71428571 0.42857143 1.         0.57142857]
Confusion Matrix for Group vs EEG
[[17  2]
 [ 2 11]]




Fitting 3 folds for each of 1 candidates, totalling 3 fits




Fitting 3 folds for each of 1 candidates, totalling 3 fits
Cross-validation scores for KNN_Classifier: [0.75       0.875      0.71428571 0.85714286 1.         0.71428571
 0.85714286 0.85714286 1.         0.42857143]
Confusion Matrix for Group vs EEG
[[16  3]
 [ 2 11]]




Fitting 3 folds for each of 1 candidates, totalling 3 fits




Fitting 3 folds for each of 1 candidates, totalling 3 fits
Cross-validation scores for SVClassifier: [0.75       0.875      0.85714286 0.85714286 0.85714286 0.85714286
 0.71428571 0.71428571 1.         0.57142857]
Confusion Matrix for Group vs EEG
[[17  2]
 [ 2 11]]




Selected features: ['Age', 'Sex', 'Education', 'Visual_Network_Normalized', 'SomatoMotor_Network_Normalized', 'DAN_Normalized', 'VAN_Normalized', 'Limbic_Network_Normalized', 'FrontoParietal_Network_Normalized', 'DMN_Normalized']
Fitting 3 folds for each of 1 candidates, totalling 3 fits




Fitting 3 folds for each of 1 candidates, totalling 3 fits
Cross-validation scores for LDA_Classifier: [0.33333333 0.83333333 1.         0.6        1.         0.6
 0.6        1.         0.8        0.6       ]
Confusion Matrix for Group vs MRI
[[ 3  4]
 [ 3 13]]




Fitting 3 folds for each of 1 candidates, totalling 3 fits




Fitting 3 folds for each of 1 candidates, totalling 3 fits
Cross-validation scores for LogisticRegressionClassifier: [0.33333333 0.83333333 0.8        0.6        1.         0.6
 0.8        1.         0.8        0.8       ]
Confusion Matrix for Group vs MRI
[[ 3  4]
 [ 3 13]]




Fitting 3 folds for each of 1 candidates, totalling 3 fits




Fitting 3 folds for each of 1 candidates, totalling 3 fits
Cross-validation scores for RandomForestClassifier: [0.33333333 0.33333333 1.         0.8        0.8        0.6
 0.8        1.         0.8        0.6       ]
Confusion Matrix for Group vs MRI
[[4 3]
 [7 9]]




Fitting 3 folds for each of 1 candidates, totalling 3 fits




Fitting 3 folds for each of 1 candidates, totalling 3 fits
Cross-validation scores for KNN_Classifier: [0.5        0.66666667 0.8        0.6        0.6        0.6
 1.         0.8        0.8        0.6       ]
Confusion Matrix for Group vs MRI
[[ 6  1]
 [ 4 12]]




Fitting 3 folds for each of 1 candidates, totalling 3 fits




Fitting 3 folds for each of 1 candidates, totalling 3 fits
Cross-validation scores for SVClassifier: [0.5        0.83333333 1.         0.8        0.8        0.6
 0.8        0.8        0.8        0.6       ]
Confusion Matrix for Group vs MRI
[[ 4  3]
 [ 4 12]]
Classification results saved to Excel with separate sheets for each scenario.


# Regression model

In [77]:
# Define the regression models to be evaluated.
models = {
    'LinearRegression': LinearRegression(),  # Instantiate a Linear Regression model.
    'RandomForestRegressor': RandomForestRegressor(random_state=42),  # Instantiate a Random Forest Regressor model with a fixed random state for reproducibility.
    'SVRegressor': SVR()  # Instantiate a Support Vector Regressor model.
}

# Define the regression scenarios, where each scenario maps to feature matrix X and target vector y.
scenarios_reg = {
    'EEG vs MMSE': (X_reg_EEG, y_reg),  # Scenario where EEG features are used to predict MMSE scores.
    'MRI vs MMSE': (X_reg_MRI, y_reg),  # Scenario where MRI features are used to predict MMSE scores.
    'TELE vs MMSE': (X_reg_TELE, y_reg),  # Scenario where TELE features are used to predict MMSE scores.
    # Uncomment these lines if additional scenarios need to be evaluated:
    # 'EEG vs TELE': (X_reg_EEG, y_reg_2),  # Scenario where EEG features are used to predict TELE scores.
    # 'MRI vs TELE': (X_reg_MRI, y_reg_2),  # Scenario where MRI features are used to predict TELE scores.
    # 'EEG and TELE vs MMSE': (X_reg_TELE_EEG, y_reg),  # Scenario where both EEG and TELE features are used to predict MMSE scores.
    # 'MRI and TELE vs MMSE': (X_reg_TELE_MRI, y_reg)  # Scenario where both MRI and TELE features are used to predict MMSE scores.
}

# Iterate over each scenario defined in the scenarios_reg dictionary.
for scenario_name, (X, y) in scenarios_reg.items():
    # Call the model_reg function for each scenario, passing the features, target, models, path to save results, and scenario name.
    model_reg(X, y, models, path, scenario_name)

# Print a message indicating that regression results have been saved to an Excel file, with each scenario on a separate sheet.
print("Regression results saved to Excel with separate sheets for each scenario.")



Selected features: ['Age', 'Sex', 'Education', 'De_F', 'De_C', 'De_P', 'De_O', 'De_T', 'De_L', 'Th_F', 'Th_C', 'Th_P', 'Th_O', 'Th_T', 'Th_L', 'A1_F', 'A1_C', 'A1_P', 'A1_O', 'A1_T', 'A1_L', 'A2_F', 'A2_C', 'A2_P', 'A2_O', 'A2_T', 'A2_L', 'A3_F', 'A3_C', 'A3_P', 'A3_O', 'A3_T', 'A3_L', 'B1_F', 'B1_C', 'B1_P', 'B1_O', 'B1_T', 'B1_L', 'B2_F', 'B2_C', 'B2_P', 'B2_O', 'B2_T', 'B2_L', 'Ga_F', 'Ga_C', 'Ga_P', 'Ga_O', 'Ga_T', 'Ga_L', 'De_global', 'Th_global', 'A1_global', 'A3_global']
Fitting 3 folds for each of 8 candidates, totalling 24 fits




Fitting 3 folds for each of 8 candidates, totalling 24 fits
Cross-validation scores for LinearRegression: [  0.50208091   0.27589688  -0.36401029 -17.96431986   0.46995858
   0.21293425   0.31850291   0.24944532  -0.10986563  -1.0915697 ]
/content/drive/MyDrive/Sapienza/Workshops/Workshop 4/Img/Evaluate Regression_EEG vs MMSE_LinearRegression_predictions.png
/content/drive/MyDrive/Sapienza/Workshops/Workshop 4/Img/Evaluate Regression_EEG vs MMSE_LinearRegression_residuals.png
Fitting 3 folds for each of 10 candidates, totalling 30 fits
Fitting 3 folds for each of 10 candidates, totalling 30 fits
Cross-validation scores for RandomForestRegressor: [ 0.16511626  0.28392619  0.26928582 -8.59865919  0.25614147  0.2287018
  0.5243258  -0.01360112  0.18761215  0.09630215]
/content/drive/MyDrive/Sapienza/Workshops/Workshop 4/Img/Evaluate Regression_EEG vs MMSE_RandomForestRegressor_predictions.png
/content/drive/MyDrive/Sapienza/Workshops/Workshop 4/Img/Evaluate Regression_EEG vs MMSE_RandomFo



Fitting 3 folds for each of 1 candidates, totalling 3 fits




Fitting 3 folds for each of 1 candidates, totalling 3 fits
Cross-validation scores for SVRegressor: [-8.80964109e-02  3.68310140e-02  3.21023144e-01 -5.55144363e+00
 -6.59178530e-02  5.57473924e-02 -3.28047022e-03  2.67229728e-01
 -5.01267737e-03 -4.36988452e-02]
/content/drive/MyDrive/Sapienza/Workshops/Workshop 4/Img/Evaluate Regression_EEG vs MMSE_SVRegressor_predictions.png
/content/drive/MyDrive/Sapienza/Workshops/Workshop 4/Img/Evaluate Regression_EEG vs MMSE_SVRegressor_residuals.png




Selected features: ['Age', 'Sex', 'Education', 'Visual_Network_Normalized', 'SomatoMotor_Network_Normalized', 'DAN_Normalized', 'VAN_Normalized', 'Limbic_Network_Normalized', 'FrontoParietal_Network_Normalized', 'DMN_Normalized']
Fitting 3 folds for each of 8 candidates, totalling 24 fits




Fitting 3 folds for each of 8 candidates, totalling 24 fits
Cross-validation scores for LinearRegression: [-2.34587648  0.81015956  0.52825593 -1.66248017  0.46576267  0.5554648
 -1.98382087  0.4275992   0.83904006 -8.30788968]
/content/drive/MyDrive/Sapienza/Workshops/Workshop 4/Img/Evaluate Regression_MRI vs MMSE_LinearRegression_predictions.png
/content/drive/MyDrive/Sapienza/Workshops/Workshop 4/Img/Evaluate Regression_MRI vs MMSE_LinearRegression_residuals.png
Fitting 3 folds for each of 10 candidates, totalling 30 fits
Fitting 3 folds for each of 10 candidates, totalling 30 fits
Cross-validation scores for RandomForestRegressor: [-1.42525981  0.68723048  0.55490198 -5.20045566  0.71866884  0.45116386
 -0.72939441  0.29383631  0.56488845 -0.36285165]
/content/drive/MyDrive/Sapienza/Workshops/Workshop 4/Img/Evaluate Regression_MRI vs MMSE_RandomForestRegressor_predictions.png
/content/drive/MyDrive/Sapienza/Workshops/Workshop 4/Img/Evaluate Regression_MRI vs MMSE_RandomForestRegres



Fitting 3 folds for each of 1 candidates, totalling 3 fits
Cross-validation scores for SVRegressor: [-1.37522647  0.55356795  0.4517734  -2.54503844  0.59053983  0.6027281
 -0.50821653  0.46517399  0.32124333 -1.12216733]
/content/drive/MyDrive/Sapienza/Workshops/Workshop 4/Img/Evaluate Regression_MRI vs MMSE_SVRegressor_predictions.png
/content/drive/MyDrive/Sapienza/Workshops/Workshop 4/Img/Evaluate Regression_MRI vs MMSE_SVRegressor_residuals.png




Selected features: ['Age', 'Sex', 'Education', 'TASK1', 'TASK2', 'TASK3', 'TASK4', 'TASK5', 'TASK6', 'TASK7']
Fitting 3 folds for each of 8 candidates, totalling 24 fits




Fitting 3 folds for each of 8 candidates, totalling 24 fits
Cross-validation scores for LinearRegression: [ -0.46535595  -0.7860557  -14.70580413  -0.80870876   0.39329179
   0.8763016   -0.36674957  -0.74086955  -0.11414626   0.12229674]
/content/drive/MyDrive/Sapienza/Workshops/Workshop 4/Img/Evaluate Regression_TELE vs MMSE_LinearRegression_predictions.png
/content/drive/MyDrive/Sapienza/Workshops/Workshop 4/Img/Evaluate Regression_TELE vs MMSE_LinearRegression_residuals.png
Fitting 3 folds for each of 10 candidates, totalling 30 fits
Fitting 3 folds for each of 10 candidates, totalling 30 fits
Cross-validation scores for RandomForestRegressor: [ 0.62468574  0.42599709 -4.03217234 -0.77551711  0.20883849  0.86302452
 -0.14932059  0.11250336 -0.02187071  0.5494715 ]
/content/drive/MyDrive/Sapienza/Workshops/Workshop 4/Img/Evaluate Regression_TELE vs MMSE_RandomForestRegressor_predictions.png
/content/drive/MyDrive/Sapienza/Workshops/Workshop 4/Img/Evaluate Regression_TELE vs MMSE_Ran



Fitting 3 folds for each of 1 candidates, totalling 3 fits




Fitting 3 folds for each of 1 candidates, totalling 3 fits
Cross-validation scores for SVRegressor: [-0.39166438  0.03600652 -0.87824172 -2.04634128 -0.12045355  0.50965559
 -0.47632467 -0.41882795  0.16458177  0.94736587]
/content/drive/MyDrive/Sapienza/Workshops/Workshop 4/Img/Evaluate Regression_TELE vs MMSE_SVRegressor_predictions.png
/content/drive/MyDrive/Sapienza/Workshops/Workshop 4/Img/Evaluate Regression_TELE vs MMSE_SVRegressor_residuals.png
Regression results saved to Excel with separate sheets for each scenario.
