In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

file_path = "RealisticSyntheticCognitiveData.csv"
df = pd.read_csv(file_path)


print(df.info())
print(df.head())


X = df.drop(columns=['participant_id', 'diagnosis'])
y = df['diagnosis']

le = LabelEncoder()
y_encoded = le.fit_transform(y)


scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)


clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)


y_pred = clf.predict(X_test)


print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))


In [None]:
# Select one test sample (for example, the first one in the test set)
sample = X_test[0].reshape(1, -1)


predicted_class = clf.predict(sample)


predicted_label = le.inverse_transform(predicted_class)

print("Predicted label for the test sample:", predicted_label[0])


Predicted label for the test sample: no_alzheimers


In [238]:
joblib.dump(clf, "alzheimers_model.pkl")
print("Saved RandomForestClassifier to alzheimers_model.pkl")


Saved RandomForestClassifier to alzheimers_model.pkl


In [None]:
import coremltools as ct
from coremltools.models.utils import rename_feature  
from coremltools.proto import Model_pb2

clf_loaded = joblib.load("alzheimers_model.pkl")


coreml_model = ct.converters.sklearn.convert(
    clf_loaded,
    input_features=X.columns.tolist()  #
)



spec = coreml_model.get_spec()



if spec.WhichOneof("Type") == "pipeline":

    for model_spec in spec.pipeline.models:
        if model_spec.HasField("treeEnsembleClassifier"):
            model_spec.treeEnsembleClassifier.classLabels.stringClassLabels[:] = [
                "no_alzheimers",
                "early_alzheimers",
                "advanced_alzheimers"
            ]
elif spec.WhichOneof("Type") == "treeEnsembleClassifier":

    spec.treeEnsembleClassifier.classLabels.stringClassLabels[:] = [
        "no_alzheimers",
        "early_alzheimers",
        "advanced_alzheimers"
    ]


rename_feature(spec, spec.description.output[0].name, "diagnosis_prediction")
rename_feature(spec, spec.description.output[1].name, "diagnosis_probability")


final_model = ct.models.MLModel(spec)
final_model.save("AlzheimersPredictor.mlmodel")
print("Core ML model saved: AlzheimersPredictor.mlmodel")

Core ML model saved: AlzheimersPredictor.mlmodel


In [None]:
import numpy as np


one_test_sample = X_test[0] 
feature_names = X.columns.tolist()  #


input_dict = {}
for i, name in enumerate(feature_names):
    input_dict[name] = float(one_test_sample[i])

# Use the Core ML model to predict for testing 
coreml_output = final_model.predict(input_dict)


print("Core ML output:", coreml_output)
print("Predicted label (Core ML):", coreml_output["diagnosis_prediction"])
print("Probabilities:", coreml_output["diagnosis_probability"])


Core ML output: {'diagnosis_probability': {0: 0.0, 1: 2.0, 2: 98.0}, 'diagnosis_prediction': 2}
Predicted label (Core ML): 2
Probabilities: {0: 0.0, 1: 2.0, 2: 98.0}


In [None]:
le = LabelEncoder()
y_encoded = le.fit_transform(y)


print("LabelEncoder class order:", le.classes_)


LabelEncoder class order: ['alzheimers' 'maybe_alzheimers' 'no_alzheimers']


In [None]:
single_pred_sklearn = clf.predict(sample) 
print(le.inverse_transform(single_pred_sklearn))
coreml_output = final_model.predict(input_dict)
print(coreml_output["diagnosis_prediction"])
#Figuring out order 

['no_alzheimers']
2
