In [None]:
import pandas as pd

file_path = 'dementia_dataset.csv'
dataset = pd.read_csv(file_path)

dataset.head()

In [42]:
dataset = dataset[dataset['Group'] != 'Converted']

In [43]:
dataset = dataset.drop(columns=['Subject ID', 'MRI ID'])

In [None]:
missing_values = dataset.isnull().sum()
missing_values[missing_values > 0]

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer

X = dataset.drop(columns=['Group'])
y = dataset['Group']

label_encoder = LabelEncoder()
X['M/F'] = label_encoder.fit_transform(X['M/F'])
X['Hand'] = label_encoder.fit_transform(X['Hand'])

numerical_features = X.select_dtypes(include=['float64', 'int64']).columns

scaler = StandardScaler()

num_imputer = SimpleImputer(strategy='median')
X[numerical_features] = num_imputer.fit_transform(X[numerical_features])

X['M/F'] = label_encoder.fit_transform(X['M/F'])
X['Hand'] = label_encoder.fit_transform(X['Hand'])

X[numerical_features] = scaler.fit_transform(X[numerical_features])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=50)

X_train.head()


In [46]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report

models = {
    'KNN': KNeighborsClassifier(),
    'Random Forest': RandomForestClassifier(),
    'Decision Tree': DecisionTreeClassifier(),
    'SVM': SVC(probability=True),
    'Naive Bayes': GaussianNB(),
    'Neural Network': MLPClassifier()
}


In [None]:
ensemble_model = VotingClassifier(estimators=[
    ('KNN', models['KNN']),
    ('Decision Tree', models['Decision Tree']),
    ('SVM', models['SVM']),
    ('Naive Bayes', models['Naive Bayes']),
    ('Neural Network', models['Neural Network'])
], voting='soft'
)

ensemble_model.fit(X_train, y_train)

ensemble_predictions = ensemble_model.predict(X_test)
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)
ensemble_accuracy


In [None]:
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

cm = confusion_matrix(y_test, ensemble_predictions)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=["Nondemented", "Demented"], yticklabels=["Nondemented", "Demented"])
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('Confusion Matrix')
plt.show()