<a href="https://colab.research.google.com/github/varshini0317/NeubAItics/blob/main/Alzheimer_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [79]:
#import packages

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import LocalOutlierFactor
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt


In [85]:
# Loading into a dataframe
df = pd.read_csv('/content/oasis_longitudinal.csv')


print(df.head())


  Subject ID         MRI ID        Group  Visit  MR Delay M/F Hand  Age  EDUC  \
0  OAS2_0001  OAS2_0001_MR1  Nondemented      1         0   M    R   87    14   
1  OAS2_0001  OAS2_0001_MR2  Nondemented      2       457   M    R   88    14   
2  OAS2_0002  OAS2_0002_MR1     Demented      1         0   M    R   75    12   
3  OAS2_0002  OAS2_0002_MR2     Demented      2       560   M    R   76    12   
4  OAS2_0002  OAS2_0002_MR3     Demented      3      1895   M    R   80    12   

   SES  MMSE  CDR  eTIV   nWBV    ASF  
0  2.0  27.0  0.0  1987  0.696  0.883  
1  2.0  30.0  0.0  2004  0.681  0.876  
2  NaN  23.0  0.5  1678  0.736  1.046  
3  NaN  28.0  0.5  1738  0.713  1.010  
4  NaN  22.0  0.5  1698  0.701  1.034  


In [86]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 373 entries, 0 to 372
Data columns (total 15 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Subject ID  373 non-null    object 
 1   MRI ID      373 non-null    object 
 2   Group       373 non-null    object 
 3   Visit       373 non-null    int64  
 4   MR Delay    373 non-null    int64  
 5   M/F         373 non-null    object 
 6   Hand        373 non-null    object 
 7   Age         373 non-null    int64  
 8   EDUC        373 non-null    int64  
 9   SES         354 non-null    float64
 10  MMSE        371 non-null    float64
 11  CDR         373 non-null    float64
 12  eTIV        373 non-null    int64  
 13  nWBV        373 non-null    float64
 14  ASF         373 non-null    float64
dtypes: float64(5), int64(5), object(5)
memory usage: 43.8+ KB


In [87]:
# Dropping the list of columns mentioned
df = df.drop(['Subject ID', 'MRI ID', 'Hand', 'Visit', 'MR Delay'], axis=1)
df.shape

(373, 10)

In [88]:
df['Group'].value_counts()

Group
Nondemented    190
Demented       146
Converted       37
Name: count, dtype: int64

In [89]:
df['Group'] = df['Group'].replace(['Converted'], ['Demented'])

In [90]:
df['M/F'] = df['M/F'].replace(['F', 'M'], [0, 1])
df['Group'] = df['Group'].replace(['Demented', 'Nondemented'], [1, 0])

In [91]:
df.isnull().sum()

Group     0
M/F       0
Age       0
EDUC      0
SES      19
MMSE      2
CDR       0
eTIV      0
nWBV      0
ASF       0
dtype: int64

In [92]:
# Fill null values
df['SES'] = df['SES'].fillna(value= df['SES'].mode().iloc[0])
# Fill null values
df['MMSE'] = df['MMSE'].fillna(value =df['MMSE'].median())

In [93]:
Y = df['Group'].values
X = df[['M/F', 'Age', 'EDUC', 'SES', 'MMSE', 'eTIV', 'nWBV', 'ASF']]

In [94]:
numerical_features = ['Age', 'EDUC', 'MMSE', 'eTIV', 'nWBV', 'ASF']
categorical_features = ['M/F', 'SES']

In [95]:
#numerical transformer and categorical transformer
numerical = MinMaxScaler()
categorical = OneHotEncoder(drop='first')

p = ColumnTransformer(transformers=[('num', numerical, numerical_features),
									('cat', categorical, categorical_features)])

In [96]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, random_state=0)

In [97]:
# Define classifiers
classifiers = {
    'Random Forest': RandomForestClassifier(random_state=42),
    'Gradient Boosting': GradientBoostingClassifier(random_state=42),
    'SVM': SVC(random_state=42)
}

In [98]:
# Define hyperparameter grids for each model
rf_param_grid = {
    'n_estimators': [100, 200, 300],  # Number of trees
    'max_depth': [5, 8, 10]  # Maximum depth of trees
}

gb_param_grid = {
    'n_estimators': [100, 200, 300],  # Number of boosting stages
    'learning_rate': [0.1, 0.01, 0.001]  # Learning rate
}

svm_param_grid = {
    'C': [0.01, 0.1, 1, 10],  # Regularization parameter
    'kernel': ['linear', 'rbf']  # Kernel type
}


In [99]:
# Define the models
models = {
    'Random Forest': RandomForestClassifier(),
    'Gradient Boosting': GradientBoostingClassifier(),
    'SVM': SVC()
}

In [100]:
model_rf = RandomForestClassifier()
model_gb = GradientBoostingClassifier()
model_svm = SVC()

In [101]:
model_rf.fit(X_train, Y_train)

In [102]:
model_gb.fit(X_train, Y_train)

In [103]:
model_svm.fit(X_train, Y_train)

In [104]:
# Function to perform grid search and select best model
def tune_model(model_name, model, param_grid, X_train, Y_train):
    grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy')
    grid_search.fit(X_train, Y_train)
    best_model = grid_search.best_estimator_
    best_params = grid_search.best_params_
    print(f"Best model for {model_name}: {best_model}")
    print(f"Best hyperparameters for {model_name}: {best_params}")
    return best_model

In [105]:
# Tune models and select best hyperparameters
best_models = {}
for model_name, model in models.items():
    if model_name == 'Random Forest':
        best_models[model_name] = tune_model(model_name, model, rf_param_grid, X_train, Y_train)
    elif model_name == 'Gradient Boosting':
        best_models[model_name] = tune_model(model_name, model, gb_param_grid, X_train, Y_train)
    elif model_name == 'SVM':
        best_models[model_name] = tune_model(model_name, model, svm_param_grid, X_train, Y_train)

Best model for Random Forest: RandomForestClassifier(max_depth=10)
Best hyperparameters for Random Forest: {'max_depth': 10, 'n_estimators': 100}
Best model for Gradient Boosting: GradientBoostingClassifier(n_estimators=300)
Best hyperparameters for Gradient Boosting: {'learning_rate': 0.1, 'n_estimators': 300}
Best model for SVM: SVC(C=0.1, kernel='linear')
Best hyperparameters for SVM: {'C': 0.1, 'kernel': 'linear'}


In [112]:
# Make predictions
for model_name, best_model in best_models.items():
    y_pred = best_model.predict(X_test)
    accuracy = accuracy_score(Y_test, y_pred)
    classification_rep = classification_report(Y_test, y_pred)

In [107]:
# Analysis and recommendations
for name, metrics in results.items():
    print(f"Results for {name}:")
    print(f"Best Model: {metrics['Best Model']}")
    print(f"Accuracy: {metrics['Accuracy']}")
    print("Classification Report:")
    print(metrics['Classification Report'])
    print("\n")

Results for Random Forest:
Best Model: RandomForestClassifier(max_depth=10, n_estimators=200)
Accuracy: 0.7978723404255319
Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.80      0.81        50
           1       0.78      0.80      0.79        44

    accuracy                           0.80        94
   macro avg       0.80      0.80      0.80        94
weighted avg       0.80      0.80      0.80        94



Results for Gradient Boosting:
Best Model: GradientBoostingClassifier(n_estimators=300)
Accuracy: 0.8936170212765957
Classification Report:
              precision    recall  f1-score   support

           0       0.90      0.90      0.90        50
           1       0.89      0.89      0.89        44

    accuracy                           0.89        94
   macro avg       0.89      0.89      0.89        94
weighted avg       0.89      0.89      0.89        94



Results for SVM:
Best Model: SVC(C=0.1, kernel='linear')

In [108]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Sample dataset for prediction
sample_data = pd.DataFrame({
    'M/F': [1, 0],  # Gender: 1 for Male, 0 for Female
    'Age': [75, 88],  # Age of the individuals
    'EDUC': [12, 14],  # Years of education completed
    'SES': [2, 2],  # Socioeconomic status
    'MMSE': [23, 30],  # Mini-Mental State Examination score
    'eTIV': [1678, 2004],  # Estimated total intracranial volume
    'nWBV': [0.736, 0.681],  # Normalized whole brain volume
    'ASF': [1.046, 0.876]  # Atlas scaling factor
})


In [109]:
# Preprocessing the sample dataset
numerical_features = ['Age', 'EDUC', 'MMSE', 'eTIV', 'nWBV', 'ASF']
scaler = MinMaxScaler()
sample_data[numerical_features] = scaler.fit_transform(sample_data[numerical_features])

In [110]:
trained_model =  model_gb

# Making predictions on the sample dataset
predictions = trained_model.predict(sample_data)


In [111]:
# Mapping predictions to their corresponding labels
prediction_labels = ['Detected' if prediction == 1 else 'NotDetected' for prediction in predictions]

# Printing the predictions
for i, prediction_label in enumerate(prediction_labels):
    print(f"Prediction for individual {i + 1}: {prediction_label}")

Prediction for individual 1: NotDetected
Prediction for individual 2: Detected
