# Using ML models for PTSD Diagnosis

In [176]:
import numpy as np
import pandas as pd

In [177]:
import os
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib.patheffects as PathEffects
import pickle
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OrdinalEncoder, StandardScaler
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.metrics import classification_report
from catboost import CatBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from copy import deepcopy
from tqdm import tqdm
import mne

## Data Loading and Inspection 

In [178]:
path = '../Datasets/PTSD_data.csv'
input= pd.read_csv(path)
input.info()
input

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 104 entries, 0 to 103
Columns: 1144 entries, Unnamed: 0 to COH.F.gamma.r.O1.s.O2
dtypes: float64(1142), int64(1), object(1)
memory usage: 929.6+ KB


Unnamed: 0.1,Unnamed: 0,IQ,specific.disorder,AB.A.delta.a.FP1,AB.A.delta.b.FP2,AB.A.delta.c.F7,AB.A.delta.d.F3,AB.A.delta.e.Fz,AB.A.delta.f.F4,AB.A.delta.g.F8,...,COH.F.gamma.o.Pz.p.P4,COH.F.gamma.o.Pz.q.T6,COH.F.gamma.o.Pz.r.O1,COH.F.gamma.o.Pz.s.O2,COH.F.gamma.p.P4.q.T6,COH.F.gamma.p.P4.r.O1,COH.F.gamma.p.P4.s.O2,COH.F.gamma.q.T6.r.O1,COH.F.gamma.q.T6.s.O2,COH.F.gamma.r.O1.s.O2
0,1,99.0,Posttraumatic stress disorder,17.603385,17.243334,11.729942,15.381709,18.307108,28.369365,11.764645,...,94.727123,73.994841,75.159075,72.479599,81.756815,74.578657,78.980627,60.508215,65.113378,69.238767
1,2,120.0,Posttraumatic stress disorder,21.714048,19.579805,18.522271,26.820075,25.429025,27.345290,16.839109,...,83.821476,61.617403,63.964423,73.096645,74.161249,50.432291,77.440228,35.794362,70.592405,48.283439
2,3,116.0,Posttraumatic stress disorder,13.371076,14.028142,16.901291,21.767857,18.227411,16.825877,17.859026,...,70.585726,63.864062,46.127953,65.675414,62.455949,33.114945,56.163998,37.477109,72.732968,46.665464
3,4,137.0,Posttraumatic stress disorder,30.473244,13.954586,23.056715,26.177810,27.884679,24.520958,19.931770,...,87.768539,71.799769,65.845016,67.734909,78.328065,58.141474,68.113967,53.391012,71.111448,67.598506
4,5,89.0,Posttraumatic stress disorder,18.488575,19.603144,9.899157,21.724115,32.700666,24.510528,13.788542,...,70.188502,27.698816,24.181746,34.445899,47.797466,20.737129,47.210870,11.104500,50.442443,36.952529
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,100,114.0,Healthy control,22.737005,23.110192,20.655895,18.224802,19.315381,16.482197,15.050377,...,88.188438,48.337650,77.414168,76.804444,51.999675,76.470883,80.365068,48.841509,56.730760,82.017645
100,101,118.0,Healthy control,20.081892,17.928614,15.977752,18.119963,17.029861,13.545934,11.612536,...,99.447826,99.424714,44.181841,61.683927,99.513428,43.007308,62.525177,42.766646,62.843978,39.895496
101,102,113.0,Healthy control,46.306229,47.379694,48.808082,50.943346,43.768029,41.659826,28.124478,...,90.988675,82.588144,91.869140,88.146513,82.027254,87.799612,85.360432,84.867957,90.909785,90.730560
102,103,130.0,Healthy control,22.152399,22.698280,19.281922,28.462510,31.292110,27.909515,19.878541,...,58.751205,46.407702,55.207337,50.262542,56.166800,37.817569,55.401371,31.634053,67.817508,43.937626


In [179]:
chs = {'FP1': [-0.03, 0.08],
       'FP2': [0.03, 0.08],
       'F7': [-0.073, 0.047],
       'F3': [-0.04, 0.041],
       'Fz': [0, 0.038],
       'F4': [0.04, 0.041],
       'F8': [0.073, 0.047],
       'T3': [-0.085, 0],
       'C3': [-0.045, 0],
       'Cz': [0, 0],
       'C4': [0.045, 0],
       'T4': [0.085, 0],
       'T5': [-0.073, -0.047],
       'P3': [-0.04, -0.041],
       'Pz': [0, -0.038],
       'P4': [0.04, -0.041],
       'T6': [0.07, -0.047],
       'O1': [-0.03, -0.08],
       'O2': [0.03, -0.08]}

In [180]:
input.describe()

Unnamed: 0.1,Unnamed: 0,IQ,AB.A.delta.a.FP1,AB.A.delta.b.FP2,AB.A.delta.c.F7,AB.A.delta.d.F3,AB.A.delta.e.Fz,AB.A.delta.f.F4,AB.A.delta.g.F8,AB.A.delta.h.T3,...,COH.F.gamma.o.Pz.p.P4,COH.F.gamma.o.Pz.q.T6,COH.F.gamma.o.Pz.r.O1,COH.F.gamma.o.Pz.s.O2,COH.F.gamma.p.P4.q.T6,COH.F.gamma.p.P4.r.O1,COH.F.gamma.p.P4.s.O2,COH.F.gamma.q.T6.r.O1,COH.F.gamma.q.T6.s.O2,COH.F.gamma.r.O1.s.O2
count,104.0,104.0,104.0,104.0,104.0,104.0,104.0,104.0,104.0,104.0,...,104.0,104.0,104.0,104.0,104.0,104.0,104.0,104.0,104.0,104.0
mean,52.5,107.259615,21.260538,21.372001,17.93705,19.610451,20.943171,19.464248,16.945616,12.26737,...,75.3479,56.401757,56.570586,59.960111,71.790169,47.446789,67.544373,39.239228,67.317265,56.749926
std,30.166206,16.257632,12.448309,11.678397,10.241729,9.779486,10.162855,9.231533,8.877651,8.850023,...,15.972142,20.542231,19.25397,18.722446,19.080201,20.263877,19.101796,20.746202,20.060872,19.817551
min,1.0,53.0,5.321305,5.46846,4.137321,5.126773,5.348178,5.482973,5.119691,3.43757,...,23.319109,8.751391,9.731274,15.963727,21.075274,5.943867,24.764012,4.798063,15.759148,12.372483
25%,26.75,96.75,13.843908,14.026967,11.393004,13.395252,14.432712,13.535109,11.88759,7.589266,...,66.842082,40.204625,42.023614,45.821284,58.687972,31.454269,51.639097,22.71332,51.730235,43.732097
50%,52.5,108.0,17.816033,17.822097,15.045729,17.376796,18.26726,17.273359,14.99427,10.63065,...,78.471001,55.419122,56.767374,60.212969,74.679592,45.204766,68.325925,35.253346,69.889159,55.795163
75%,78.25,118.0,23.564529,23.966811,20.649092,23.25544,25.538638,23.673599,19.891848,13.754442,...,86.744013,71.920058,71.082679,73.78587,86.210691,62.259547,81.030173,53.615804,81.749925,72.116579
max,104.0,141.0,76.447754,69.386059,62.775437,69.193931,69.4479,69.042085,61.342567,77.283412,...,99.451182,99.424714,96.028905,98.0046,99.513428,93.084586,99.223384,91.261379,99.102105,98.32523


## Extracting the datasets for PSd and FC columns 

In [181]:
data=pd.read_csv('../Datasets/PTSD_data_mapped.csv')
data.drop('Unnamed: 0', axis=1, inplace=True)
data

Unnamed: 0,IQ,specific.disorder,AB.A.delta.a.FP1,AB.A.delta.b.FP2,AB.A.delta.c.F7,AB.A.delta.d.F3,AB.A.delta.e.Fz,AB.A.delta.f.F4,AB.A.delta.g.F8,AB.A.delta.h.T3,...,COH.F.gamma.o.Pz.p.P4,COH.F.gamma.o.Pz.q.T6,COH.F.gamma.o.Pz.r.O1,COH.F.gamma.o.Pz.s.O2,COH.F.gamma.p.P4.q.T6,COH.F.gamma.p.P4.r.O1,COH.F.gamma.p.P4.s.O2,COH.F.gamma.q.T6.r.O1,COH.F.gamma.q.T6.s.O2,COH.F.gamma.r.O1.s.O2
0,99.0,1,17.603385,17.243334,11.729942,15.381709,18.307108,28.369365,11.764645,7.650070,...,94.727123,73.994841,75.159075,72.479599,81.756815,74.578657,78.980627,60.508215,65.113378,69.238767
1,120.0,1,21.714048,19.579805,18.522271,26.820075,25.429025,27.345290,16.839109,16.482528,...,83.821476,61.617403,63.964423,73.096645,74.161249,50.432291,77.440228,35.794362,70.592405,48.283439
2,116.0,1,13.371076,14.028142,16.901291,21.767857,18.227411,16.825877,17.859026,20.892738,...,70.585726,63.864062,46.127953,65.675414,62.455949,33.114945,56.163998,37.477109,72.732968,46.665464
3,137.0,1,30.473244,13.954586,23.056715,26.177810,27.884679,24.520958,19.931770,16.350662,...,87.768539,71.799769,65.845016,67.734909,78.328065,58.141474,68.113967,53.391012,71.111448,67.598506
4,89.0,1,18.488575,19.603144,9.899157,21.724115,32.700666,24.510528,13.788542,7.538543,...,70.188502,27.698816,24.181746,34.445899,47.797466,20.737129,47.210870,11.104500,50.442443,36.952529
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,114.0,0,22.737005,23.110192,20.655895,18.224802,19.315381,16.482197,15.050377,10.358503,...,88.188438,48.337650,77.414168,76.804444,51.999675,76.470883,80.365068,48.841509,56.730760,82.017645
100,118.0,0,20.081892,17.928614,15.977752,18.119963,17.029861,13.545934,11.612536,13.578299,...,99.447826,99.424714,44.181841,61.683927,99.513428,43.007308,62.525177,42.766646,62.843978,39.895496
101,113.0,0,46.306229,47.379694,48.808082,50.943346,43.768029,41.659826,28.124478,29.444817,...,90.988675,82.588144,91.869140,88.146513,82.027254,87.799612,85.360432,84.867957,90.909785,90.730560
102,130.0,0,22.152399,22.698280,19.281922,28.462510,31.292110,27.909515,19.878541,12.891310,...,58.751205,46.407702,55.207337,50.262542,56.166800,37.817569,55.401371,31.634053,67.817508,43.937626


In [182]:
data.isnull().sum().sum()

0

In [183]:
# Selecting relevant columns for frequency bands and coherence data.
# Here we group the columns by the common terms representing bands (delta, theta, alpha, beta, gamma).

# Identifying unique band types and their columns
delta_columns = [col for col in data.columns if 'delta' in col]
theta_columns = [col for col in data.columns if 'theta' in col]
alpha_columns = [col for col in data.columns if 'alpha' in col]
beta_columns = [col for col in data.columns if 'beta' in col]
beta_columns = [col for col in data.columns if 'beta' in col and 'high' not in col.lower()]
highbeta_columns = [col for col in data.columns if 'highbeta' in col.lower() or ('beta' in col.lower() and 'high' in col.lower())]

gamma_columns = [col for col in data.columns if 'gamma' in col]


# Calculating Power Spectral Density (PSD) for each band as the mean across relevant columns.
psd = {
    'Delta PSD': data[delta_columns].mean(axis=1),
    'Theta PSD': data[theta_columns].mean(axis=1),
    'Alpha PSD': data[alpha_columns].mean(axis=1),
    'Beta PSD': data[beta_columns].mean(axis=1),
    'Gamma PSD': data[gamma_columns].mean(axis=1),
    'Highbeta PSD': data[highbeta_columns].mean(axis=1)
}

psd = pd.DataFrame(psd)

# Calculating Functional Connectivity (FC) for each band as the mean coherence values across columns.
# Assuming coherence data involves connectivity between electrode pairs.
fc = {
    'Delta FC': data[[col for col in delta_columns if 'coh' in col]].mean(axis=1),
    'Theta FC': data[[col for col in theta_columns if 'coh' in col]].mean(axis=1),
    'Alpha FC': data[[col for col in alpha_columns if 'coh' in col]].mean(axis=1),
    'Beta FC': data[[col for col in beta_columns if 'coh' in col]].mean(axis=1),
    'Gamma FC': data[[col for col in gamma_columns if 'coh' in col]].mean(axis=1),
    'Highbeta FC': data[[col for col in highbeta_columns if 'coh' in col]].mean(axis=1)
}

fc = pd.DataFrame(fc)

# Combining PSD and FC results into one DataFrame for visualization
psd_fc = pd.concat( [data['specific.disorder'],psd, fc], axis=1)
psd_fc

Unnamed: 0,specific.disorder,Delta PSD,Theta PSD,Alpha PSD,Beta PSD,Gamma PSD,Highbeta PSD,Delta FC,Theta FC,Alpha FC,Beta FC,Gamma FC,Highbeta FC
0,1,23.298492,33.172338,34.903653,45.451818,59.059986,44.379514,,,,,,
1,1,34.724713,32.561620,40.262575,41.917416,59.738209,56.774423,,,,,,
2,1,42.980692,35.980112,36.018281,35.582571,38.081604,33.373840,,,,,,
3,1,32.252248,30.867531,36.625525,30.282558,40.547496,36.523489,,,,,,
4,1,29.157718,41.258661,31.864220,28.119552,26.744541,25.417278,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,0,44.725879,38.524309,50.608574,38.966084,36.999920,35.598044,,,,,,
100,0,43.278722,43.863084,37.503043,34.575584,32.018637,29.425901,,,,,,
101,0,43.010892,43.011625,34.946071,47.018022,74.320807,63.837066,,,,,,
102,0,54.389131,47.937415,46.746583,32.150616,36.964110,30.179780,,,,,,


## Classification

In [184]:
from sklearn.metrics import accuracy_score, recall_score, confusion_matrix, f1_score, precision_score, classification_report

def classify(model, X_train, y_train, X_test, y_test):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Model: {model.__class__.__name__}")
    print(f"Accuracy: {accuracy:.4f}")
    print(classification_report(y_test, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
    print("-" * 40)

In [185]:
# Define the classify function to calculate and store accuracy
def evaluate(model, model_name, X_train, y_train, X_test, y_test, results):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    results.append({'Model': model_name, 'Accuracy': accuracy})


In [186]:
def compare_models(X_train, X_test, y_train, y_test):

    # List to store the results of each model   
    results = []

    # Logistic Regression
    logistic_regression = LogisticRegression(random_state=42, max_iter=10000)
    classify(logistic_regression, X_train, y_train, X_test, y_test)
    evaluate(logistic_regression, "Logistic Regression", X_train, y_train, X_test, y_test, results)

    # Random Forest
    random_forest = RandomForestClassifier(n_estimators=100, random_state=42)
    classify(random_forest, X_train, y_train, X_test, y_test)
    evaluate(random_forest, "Random Forest", X_train, y_train, X_test, y_test, results)

    # Elastic Net
    elastic_net = LogisticRegression(penalty='elasticnet', solver='saga', l1_ratio=0.5, random_state=42, max_iter=10000)
    classify(elastic_net, X_train, y_train, X_test, y_test)
    evaluate(elastic_net, "Elastic Net", X_train, y_train, X_test, y_test, results)

    # Linear Kernel SVC
    linear_svc = SVC(kernel='linear', random_state=42)
    classify(linear_svc, X_train, y_train, X_test, y_test)
    evaluate(linear_svc, "Linear SVM", X_train, y_train, X_test, y_test, results)

    # XGBoost
    xgboost_model = XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='logloss')
    classify(xgboost_model, X_train, y_train, X_test, y_test)
    evaluate(xgboost_model, "XGBoost", X_train, y_train, X_test, y_test, results)

    # LightGBM
    lightgbm_model = LGBMClassifier(random_state=42)
    classify(lightgbm_model, X_train, y_train, X_test, y_test)
    evaluate(lightgbm_model, "LightGBM", X_train, y_train, X_test, y_test, results)

    # CatBoost
    catboost_model = CatBoostClassifier(random_seed=42, verbose=0)
    classify(catboost_model, X_train, y_train, X_test, y_test)
    evaluate(catboost_model, "CatBoost", X_train, y_train, X_test, y_test, results)

    # Convert the results list to a DataFrame for a table format
    results_df = pd.DataFrame(results)

    # Display the results table in a formatted way
    print("Comparison of Model Accuracies:\n")
    print(results_df.to_string(index=False))

    # Conclusion based on the highest accuracy
    best_model = results_df.loc[results_df['Accuracy'].idxmax()]
    print(f"\nConclusion: The best model is **{best_model['Model']}** with an accuracy of {best_model['Accuracy']:.2%}")


In [187]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [188]:
X = data.drop('specific.disorder', axis=1)
y = data['specific.disorder']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [189]:
X_train.head(), y_train.head()

(        IQ  AB.A.delta.a.FP1  AB.A.delta.b.FP2  AB.A.delta.c.F7  \
 103  130.0         37.450821         54.405099        24.135037   
 78   115.0         34.108015         22.838567        20.646824   
 28    97.0          8.633226          8.842760         9.890202   
 79   134.0         14.581102         14.033041        13.906960   
 5    101.0          5.321305          5.468460         4.137321   
 
      AB.A.delta.d.F3  AB.A.delta.e.Fz  AB.A.delta.f.F4  AB.A.delta.g.F8  \
 103        22.605822        23.746038        25.139759        33.146333   
 78         18.203362        17.361846        16.311194        23.092874   
 28         15.020229        17.821645        15.300829         6.833061   
 79         13.086081        12.722494         9.104962         8.377018   
 5           5.126773         5.348178         5.482973         5.119691   
 
      AB.A.delta.h.T3  AB.A.delta.i.C3  ...  COH.F.gamma.o.Pz.p.P4  \
 103        13.331373        20.735334  ...              63.34

In [190]:
compare_models(X_train, X_test, y_train, y_test)

Model: LogisticRegression
Accuracy: 0.5938
              precision    recall  f1-score   support

           0       0.54      0.50      0.52        14
           1       0.63      0.67      0.65        18

    accuracy                           0.59        32
   macro avg       0.59      0.58      0.58        32
weighted avg       0.59      0.59      0.59        32

Confusion Matrix:
 [[ 7  7]
 [ 6 12]]
----------------------------------------
Model: RandomForestClassifier
Accuracy: 0.7188
              precision    recall  f1-score   support

           0       0.67      0.71      0.69        14
           1       0.76      0.72      0.74        18

    accuracy                           0.72        32
   macro avg       0.72      0.72      0.72        32
weighted avg       0.72      0.72      0.72        32

Confusion Matrix:
 [[10  4]
 [ 5 13]]
----------------------------------------
Model: LogisticRegression
Accuracy: 0.5938
              precision    recall  f1-score   support



Parameters: { "use_label_encoder" } are not used.



Model: XGBClassifier
Accuracy: 0.6875
              precision    recall  f1-score   support

           0       0.64      0.64      0.64        14
           1       0.72      0.72      0.72        18

    accuracy                           0.69        32
   macro avg       0.68      0.68      0.68        32
weighted avg       0.69      0.69      0.69        32

Confusion Matrix:
 [[ 9  5]
 [ 5 13]]
----------------------------------------


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 34, number of negative: 38
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001202 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 28520
[LightGBM] [Info] Number of data points in the train set: 72, number of used features: 1141
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.472222 -> initscore=-0.111226
[LightGBM] [Info] Start training from score -0.111226
Model: LGBMClassifier
Accuracy: 0.6250
              precision    recall  f1-score   support

           0       0.60      0.43      0.50        14
           1       0.64      0.78      0.70        18

    accuracy                           0.62        32
   macro avg       0.62      0.60      0.60        32
weighted avg       0.62      0.62      0.61        32

Confusion Matrix:
 [[ 6  8]
 [ 4 14]]
----------------------------------------
[LightGBM] [Info] Number of positive: 34, number of negative: 38
[L

## AB Data

In [191]:
AB_data=pd.read_csv("../Datasets/AB_data.csv")

In [192]:
AB_data.drop('Unnamed: 0', axis=1, inplace=True)

In [193]:

X = AB_data.drop('specific.disorder', axis=1) 
y = AB_data['specific.disorder']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [194]:
compare_models(X_train, X_test, y_train, y_test)

Model: LogisticRegression
Accuracy: 0.6875
              precision    recall  f1-score   support

           0       0.62      0.71      0.67        14
           1       0.75      0.67      0.71        18

    accuracy                           0.69        32
   macro avg       0.69      0.69      0.69        32
weighted avg       0.70      0.69      0.69        32

Confusion Matrix:
 [[10  4]
 [ 6 12]]
----------------------------------------
Model: RandomForestClassifier
Accuracy: 0.6875
              precision    recall  f1-score   support

           0       0.61      0.79      0.69        14
           1       0.79      0.61      0.69        18

    accuracy                           0.69        32
   macro avg       0.70      0.70      0.69        32
weighted avg       0.71      0.69      0.69        32

Confusion Matrix:
 [[11  3]
 [ 7 11]]
----------------------------------------
Model: LogisticRegression
Accuracy: 0.6875
              precision    recall  f1-score   support



Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 34, number of negative: 38
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000572 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2870
[LightGBM] [Info] Number of data points in the train set: 72, number of used features: 115
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.472222 -> initscore=-0.111226
[LightGBM] [Info] Start training from score -0.111226
Model: LGBMClassifier
Accuracy: 0.7812
              precision    recall  f1-score   support

           0       0.77      0.71      0.74        14
           1       0.79      0.83      0.81        18

    accuracy                           0.78        32
   macro avg       0.78      0.77      0.78        32
weighted avg       0.78      0.78      0.78        32

Confusion Matrix:
 [[10  4]
 [ 3 15]]
----------------------------------------
[LightGBM] [Info] Number of positive: 34, number of negative: 38
[Lig

## COH Data

In [195]:
COH_data = pd.read_csv("../Datasets/COH_data.csv")

In [196]:

X = data.drop('specific.disorder', axis=1) 
y = COH_data['specific.disorder']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [197]:
compare_models(X_train, X_test, y_train, y_test)

Model: LogisticRegression
Accuracy: 0.5938
              precision    recall  f1-score   support

           0       0.54      0.50      0.52        14
           1       0.63      0.67      0.65        18

    accuracy                           0.59        32
   macro avg       0.59      0.58      0.58        32
weighted avg       0.59      0.59      0.59        32

Confusion Matrix:
 [[ 7  7]
 [ 6 12]]
----------------------------------------
Model: RandomForestClassifier
Accuracy: 0.7188
              precision    recall  f1-score   support

           0       0.67      0.71      0.69        14
           1       0.76      0.72      0.74        18

    accuracy                           0.72        32
   macro avg       0.72      0.72      0.72        32
weighted avg       0.72      0.72      0.72        32

Confusion Matrix:
 [[10  4]
 [ 5 13]]
----------------------------------------
Model: LogisticRegression
Accuracy: 0.5938
              precision    recall  f1-score   support



Parameters: { "use_label_encoder" } are not used.



Model: XGBClassifier
Accuracy: 0.6875
              precision    recall  f1-score   support

           0       0.64      0.64      0.64        14
           1       0.72      0.72      0.72        18

    accuracy                           0.69        32
   macro avg       0.68      0.68      0.68        32
weighted avg       0.69      0.69      0.69        32

Confusion Matrix:
 [[ 9  5]
 [ 5 13]]
----------------------------------------


Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 34, number of negative: 38
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005621 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 28520
[LightGBM] [Info] Number of data points in the train set: 72, number of used features: 1141
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.472222 -> initscore=-0.111226
[LightGBM] [Info] Start training from score -0.111226
Model: LGBMClassifier
Accuracy: 0.6250
              precision    recall  f1-score   support

           0       0.60      0.43      0.50        14
           1       0.64      0.78      0.70        18

    accuracy                           0.62        32
   macro avg       0.62      0.60      0.60        32
weighted avg       0.62      0.62      0.61        32

Confusion Matrix:
 [[ 6  8]
 [ 4 14]]
----------------------------------------
[LightGBM] [Info] Number of positive: 34, number of negative: 38
[L

In [198]:
X = psd_fc.drop('specific.disorder', axis=1) 
y = psd_fc['specific.disorder']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
compare_models(X_train, X_test, y_train, y_test)

ValueError: Input X contains NaN.
LogisticRegression does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values