# **Load libraries**

---

* **import numpy as np**: This line imports the NumPy library and assigns it the alias np. NumPy is widely used for numerical computations and handling arrays.

* **import pandas as pd**: This line imports the pandas library with the alias pd. Pandas is a powerful library for data manipulation and analysis, particularly with tabular data structures like DataFrames.

* **import matplotlib.pyplot as plt**: This line imports the pyplot module from the Matplotlib library with the alias plt. pyplot is used for creating static, animated, and interactive visualizations in Python.

In [None]:
# Import the libraries that we are going to use in the code
import numpy as np  # Import NumPy for numerical operations and array manipulations
import pandas as pd  # Import pandas for data manipulation and analysis
import matplotlib.pyplot as plt  # Import Matplotlib for creating plots and visualizations

# **Upload files**

---
* **path_TELE**: This variable stores the file path to the Excel file containing TELEMAIA data related to EEG and telemonitoring features. The rf prefix indicates a raw string, which is useful for handling paths with backslashes in Windows or other special characters.

* **path_EEG**: This variable stores the file path to the Excel file containing EEG features for Nold and PD (Parkinson's Disease) patients with cognitive deficits.

* **path_MRI**: This variable stores the file path to the Excel file containing MRI features for the same groups as above.


In [None]:
# Specify the path where the file of interest is located
path_TELE = rf'/content/drive/MyDrive/Sapienza/Resources/WP2 -TELEMAIA Associazione tra EEG e TELEMONITORING features in Nold e MCI.xlsx'
# Define the file path for the TELEMAIA dataset related to EEG and telemonitoring features

path_EEG = rf'/content/drive/MyDrive/Sapienza/Resources/WP2 -TELEMAIA EEG features in Nold e PD con deficit cognitivi.xlsx'
# Define the file path for the EEG features dataset for Nold and PD with cognitive deficits

path_MRI = rf'/content/drive/MyDrive/Sapienza/Resources/WP2 -TELEMAIA MRI features in Nold e PD con deficit cognitivi.xlsx'
# Define the file path for the MRI features dataset for Nold and PD with cognitive deficits


In [None]:
# Create a variable with the Excel information in a data frame structure

# Read the 'best' sheet from the TELEMAIA Excel file into a DataFrame named 'tele'
tele = pd.read_excel(path_TELE, sheet_name='best')

# Read the 'all' sheet from the EEG features Excel file into a DataFrame named 'eeg'
eeg = pd.read_excel(path_EEG, sheet_name='all')

# Read the 'all' sheet from the MRI features Excel file into a DataFrame named 'mri'
mri = pd.read_excel(path_MRI, sheet_name='all')

# **Explore superficially what each dataframe has**

In [None]:
# Display the first few rows of the 'tele' DataFrame
tele.head()

Unnamed: 0,Codice TELEMAIA new,Group TELEMAIA,Unit,Age,Sex,Education,MMSE,TASK1,TASK2,TASK3,TASK4,TASK5,TASK6,TASK7,theta_P
0,Telemaia_MCI_001,MCI,SANT ANDREA,80.0,1,5,28,35,75,80,24.25,32.815,14.515,65.0,0.233976
1,Telemaia_MCI_002,MCI,SANT ANDREA,78.0,0,8,19,30,90,90,35.8325,96.875,30.0,45.0,0.455349
2,Telemaia_MCI_003,MCI,SANT ANDREA,81.0,1,5,26,35,75,70,26.5725,47.655,46.98,75.0,0.481065
3,Telemaia_MCI_004,MCI,SANT ANDREA,70.0,1,5,22,80,65,80,63.42,90.625,64.35,92.5,0.393888
4,Telemaia_MCI_005,MCI,SANT ANDREA,84.0,1,13,25,50,10,70,69.0925,71.875,78.32,60.0,0.273401


In [None]:
# Display the first few rows of the 'eeg' DataFrame
eeg.head()

Unnamed: 0,Subj,Group,Unit,Age,Sex,Education,MMSEg,MMSEcorr,TF,IAF,...,Ga-F,Ga-C,Ga-P,Ga-O,Ga-T,Ga-L,De-global,Th-global,A1-global,A3-global
0,Istanbul_HC_faledev1,HC,1,61,0,5,29,29.0,4.5,10.0,...,0.506201,0.272497,0.078661,0.053723,0.435926,0.132839,0.271208,0.174212,0.149279,0.332717
1,Istanbul_HC_falisog1,HC,1,56,0,12,29,29.0,6.0,10.0,...,0.135269,0.062833,0.086813,0.095959,0.360152,0.043232,0.33695,0.230216,0.207011,0.501268
2,Istanbul_HC_falmasi1,HC,1,61,0,5,30,30.0,5.5,8.5,...,0.0767,0.078579,0.078238,0.087077,0.208819,0.041912,0.425909,0.28499,0.260351,0.591459
3,Istanbul_HC_falmyel1,HC,1,58,0,15,29,29.0,4.0,7.0,...,0.068814,0.030874,0.027713,0.022462,0.108711,0.018712,0.450753,0.396862,0.333317,0.824551
4,Istanbul_HC_falmyil1,HC,1,56,1,16,28,28.0,6.0,9.5,...,0.108541,0.028565,0.022697,0.034459,0.125239,0.025743,0.397387,0.22454,0.156795,0.58236


In [None]:
# Display the first few rows of the 'mri' DataFrame
mri.head()

Unnamed: 0,Subj,Group,Age,Sex,Education,MMSE,Visual_Network_Normalized,SomatoMotor_Network_Normalized,DAN_Normalized,VAN_Normalized,Limbic_Network_Normalized,FrontoParietal_Network_Normalized,DMN_Normalized
0,sub-paayc1,PD,67,1,0,21,0.031523,0.029543,0.02199,0.022134,0.023608,0.028661,0.049165
1,sub-pabur1,PD,74,1,5,21,0.029358,0.028515,0.01869,0.020989,0.025147,0.027414,0.047441
2,sub-pagv1,PD,56,1,8,22,0.036929,0.035315,0.023967,0.026294,0.028691,0.032402,0.058267
3,sub-paire1,PD,67,1,5,19,0.036679,0.032625,0.023317,0.025463,0.03056,0.031957,0.058728
4,sub-payil1,PD,80,1,0,12,0.033394,0.027041,0.019274,0.021813,0.022836,0.026795,0.047139


# **Explore each dataframe individually**

In [None]:
# Display the column names of the 'tele' DataFrame
# This command will list all the column names in the tele DataFrame, giving you an overview of the available data fields.
tele.columns

Index(['Codice TELEMAIA new', 'Group TELEMAIA', 'Unit', 'Age', 'Sex',
       'Education', 'MMSE', 'TASK1', 'TASK2', 'TASK3', 'TASK4', 'TASK5',
       'TASK6', 'TASK7', 'theta_P'],
      dtype='object')

In [None]:
# Display the index of the 'tele' DataFrame
# This will provide information about the index labels of the tele DataFrame, which indicates the row identifiers.
tele.index

RangeIndex(start=0, stop=47, step=1)

In [None]:
# Display the shape of the 'tele' DataFrame
# This will return a tuple where the first element is the number of rows and the second element is the number of columns.
tele.shape

(47, 15)

In [None]:
# Display a statistical summary of the 'tele' DataFrame
tele.describe()

Unnamed: 0,Age,Sex,Education,MMSE,TASK1,TASK2,TASK3,TASK4,TASK5,TASK6,TASK7,theta_P
count,47.0,47.0,47.0,47.0,47.0,47.0,47.0,47.0,47.0,47.0,47.0,47.0
mean,63.829787,0.659574,14.468085,28.042553,71.170213,79.787234,84.468085,69.402447,73.912872,79.730532,81.914894,0.360985
std,19.812999,0.478975,4.252115,2.881406,31.488659,23.54361,20.56892,29.869027,27.907135,27.428505,22.825024,0.116499
min,24.0,0.0,5.0,18.0,0.0,10.0,20.0,0.0,0.0,0.0,0.0,0.197424
25%,61.5,0.0,13.0,26.5,50.0,70.0,77.5,59.02875,53.125,72.5,73.75,0.257926
50%,70.0,1.0,16.0,29.0,80.0,85.0,90.0,71.6675,87.5,90.0,92.5,0.325013
75%,77.5,1.0,18.0,30.0,100.0,100.0,100.0,94.7225,95.3125,100.0,98.75,0.452246
max,87.0,1.0,20.0,30.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,0.646521


In [None]:
# Display concise summary of the 'tele' DataFrame
'''This command provides information on:

Number of entries (rows)
Number of columns
Column names and their data types
Number of non-null values in each column'''
tele.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 47 entries, 0 to 46
Data columns (total 15 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Codice TELEMAIA new  47 non-null     object 
 1   Group TELEMAIA       47 non-null     object 
 2   Unit                 47 non-null     object 
 3   Age                  47 non-null     float64
 4   Sex                  47 non-null     int64  
 5   Education            47 non-null     int64  
 6   MMSE                 47 non-null     int64  
 7   TASK1                47 non-null     int64  
 8   TASK2                47 non-null     int64  
 9   TASK3                47 non-null     int64  
 10  TASK4                47 non-null     float64
 11  TASK5                47 non-null     float64
 12  TASK6                47 non-null     float64
 13  TASK7                47 non-null     float64
 14  theta_P              47 non-null     float64
dtypes: float64(6), int64(6), object(3)
memory 

In [None]:
# Group the DataFrame by the "Group TELEMAIA" column and count the number of occurrences in each group
tele.groupby("Group TELEMAIA").count()

Unnamed: 0_level_0,Codice TELEMAIA new,Unit,Age,Sex,Education,MMSE,TASK1,TASK2,TASK3,TASK4,TASK5,TASK6,TASK7,theta_P
Group TELEMAIA,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
HC,27,27,27,27,27,27,27,27,27,27,27,27,27,27
MCI,20,20,20,20,20,20,20,20,20,20,20,20,20,20


In [None]:
# Group the DataFrame by the "Group TELEMAIA" column and find the maximum value for each column in each group
tele.groupby("Group TELEMAIA").max()

Unnamed: 0_level_0,Codice TELEMAIA new,Unit,Age,Sex,Education,MMSE,TASK1,TASK2,TASK3,TASK4,TASK5,TASK6,TASK7,theta_P
Group TELEMAIA,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
HC,Telemaia_HC_027,SAPIENZA,78.333333,1,18,30,100,100,100,100.0,100.0,100.0,100.0,0.646521
MCI,Telemaia_MCI_020,SANT ANDREA,87.0,1,20,30,100,100,100,100.0,96.875,100.0,100.0,0.64356


In [None]:
# Group the DataFrame by the "Group TELEMAIA" column and find the minimum value for each column in each group
tele.groupby("Group TELEMAIA").min()

Unnamed: 0_level_0,Codice TELEMAIA new,Unit,Age,Sex,Education,MMSE,TASK1,TASK2,TASK3,TASK4,TASK5,TASK6,TASK7,theta_P
Group TELEMAIA,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
HC,Telemaia_HC_001,SANT ANDREA,24.0,0,13,25,15,50,20,51.11,59.375,70.0,70.0,0.197424
MCI,Telemaia_MCI_001,IRCCS San Raffaele,64.0,0,5,18,0,10,20,0.0,0.0,0.0,0.0,0.233976
