In [None]:
import pickle
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

<h5 style="color: SkyBlue;">Load Dataset</h5>

In [2]:
df = pd.read_csv('user_data_for_disease_prediction - unclassified data set.csv')
print(df.head())

   Heart Rate (bpm)  Breathing Rate (brpm)  Oxygen Saturation (%)  \
0              80.3                   12.2                   96.4   
1              73.1                   17.7                   95.9   
2              72.2                   18.0                   96.0   
3              70.6                   14.7                   95.1   
4              99.5                   19.5                   97.6   

   Blood Pressure (systolic)  Blood Pressure (diastolic)  Stress Index  \
0                      107.3                        74.2          39.6   
1                       92.4                        70.8          98.7   
2                      102.4                        75.6          45.3   
3                      110.0                        62.2          77.8   
4                      110.2                        73.0          57.3   

   Recovery Ability  PNS Index  SNS Index  RMSSD (ms)  SD2 (ms)  \
0                 0       -0.9        0.4        49.7      67.9   
1     

In [None]:
with open("balanced_data.pkl", "rb") as f:
    X_balanced, y_balanced = pickle.load(f)

print("Loaded Feature Shape:", X_balanced.shape)
print("Loaded Target Shape:", y_balanced.shape)
print("Loaded Distribution after SMOTE:\n", y_balanced.value_counts())

Disease
Atherosclerosis                         152809
Hypertension                            152809
Cardiovascular Disease (CVD)            152809
Chronic Fatigue Syndrome (CFS)          152809
Respiratory Disease (COPD or Asthma)    152809
Stress-related Disorders                152809
Arrhythmias                             152809
Healthy                                 152809
Autonomic Dysfunction                   152809
Diabetes                                152809
Anaemia                                 152809
Name: count, dtype: int64


<h5 style="color: SkyBlue;">Stratify Sampling</h5>

In [None]:
X = df.drop("Disease", axis=1)  # Features
y = df["Disease"]               # Target

sample_sizes = [25, 50, 75, 100, 250, 500, 750, 1000, 2500, 5000, 7500, 10000, 20000, 30000, 40000, 50000]

training_sample_sizes = [max(size - size // 5, 1) for size in sample_sizes]  
testing_sample_sizes = [size - train_size for size, train_size in zip(sample_sizes, training_sample_sizes)]  

print("Training sample sizes:", training_sample_sizes)
print("Testing sample sizes:", testing_sample_sizes)

Training sample sizes: [20, 40, 60, 80, 200, 400, 600, 800, 2000, 4000, 6000, 8000, 16000, 24000, 32000, 40000]
Testing sample sizes: [5, 10, 15, 20, 50, 100, 150, 200, 500, 1000, 1500, 2000, 4000, 6000, 8000, 10000]


<h5 style="color: SkyBlue;">Logistic Regression</h5>

In [6]:
log_reg = LogisticRegression(max_iter=2000)
min_class_samples = 1 

for train_size in training_sample_sizes:
    test_size = min_class_samples * 11  
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=train_size, test_size=test_size, stratify=y)
    
    log_reg.fit(X_train, y_train)
    
    y_pred = log_reg.predict(X_test)
    
    accuracy = accuracy_score(y_test, y_pred)
    accuracy_percent = accuracy * 100  # Convert to percentage for display
    
    print(f"\nLogistic Regression Analysis - Training Sample Size: {train_size}")
    print(f"Accuracy: {accuracy_percent:.2f}%")
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred, target_names=df["Disease"].unique()))


Logistic Regression Analysis - Training Sample Size: 20
Accuracy: 45.45%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       0.00      0.00      0.00         1
                     Atherosclerosis       0.50      1.00      0.67         1
                        Hypertension       0.50      1.00      0.67         1
        Cardiovascular Disease (CVD)       0.00      0.00      0.00         1
Respiratory Disease (COPD or Asthma)       1.00      1.00      1.00         1
               Autonomic Dysfunction       0.00      0.00      0.00         1
                         Arrhythmias       0.00      0.00      0.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       0.00      0.00      0.00         1
                            Diabetes       0.00      0.00      0.00         1
                             Healthy       

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Logistic Regression Analysis - Training Sample Size: 40
Accuracy: 18.18%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       0.00      0.00      0.00         1
                     Atherosclerosis       0.00      0.00      0.00         1
                        Hypertension       0.00      0.00      0.00         1
        Cardiovascular Disease (CVD)       0.00      0.00      0.00         1
Respiratory Disease (COPD or Asthma)       0.50      1.00      0.67         1
               Autonomic Dysfunction       0.00      0.00      0.00         1
                         Arrhythmias       0.00      0.00      0.00         1
                             Anaemia       0.00      0.00      0.00         1
            Stress-related Disorders       0.50      1.00      0.67         1
                            Diabetes       0.00      0.00      0.00         1
                             Healthy       

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Logistic Regression Analysis - Training Sample Size: 60
Accuracy: 36.36%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       0.00      0.00      0.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       0.00      0.00      0.00         1
        Cardiovascular Disease (CVD)       0.00      0.00      0.00         1
Respiratory Disease (COPD or Asthma)       0.50      1.00      0.67         1
               Autonomic Dysfunction       0.50      1.00      0.67         1
                         Arrhythmias       0.00      0.00      0.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       0.00      0.00      0.00         1
                            Diabetes       0.00      0.00      0.00         1
                             Healthy       

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Logistic Regression Analysis - Training Sample Size: 80
Accuracy: 45.45%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       0.00      0.00      0.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       0.00      0.00      0.00         1
        Cardiovascular Disease (CVD)       0.00      0.00      0.00         1
Respiratory Disease (COPD or Asthma)       0.00      0.00      0.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       0.50      1.00      0.67         1
                             Anaemia       0.00      0.00      0.00         1
            Stress-related Disorders       0.00      0.00      0.00         1
                            Diabetes       0.33      1.00      0.50         1
                             Healthy       

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Logistic Regression Analysis - Training Sample Size: 200
Accuracy: 45.45%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       0.00      0.00      0.00         1
                     Atherosclerosis       0.00      0.00      0.00         1
                        Hypertension       0.00      0.00      0.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       0.50      1.00      0.67         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       0.00      0.00      0.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       0.00      0.00      0.00         1
                            Diabetes       0.50      1.00      0.67         1
                             Healthy      

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Logistic Regression Analysis - Training Sample Size: 400
Accuracy: 72.73%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       0.00      0.00      0.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       0.00      0.00      0.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       1.00      1.00      1.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       0.33      1.00      0.50         1
                            Diabetes       0.00      0.00      0.00         1
                             Healthy      

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Logistic Regression Analysis - Training Sample Size: 600
Accuracy: 72.73%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       0.00      0.00      0.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       0.50      1.00      0.67         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       0.00      0.00      0.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       0.00      0.00      0.00         1
                            Diabetes       0.50      1.00      0.67         1
                             Healthy      

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Logistic Regression Analysis - Training Sample Size: 800
Accuracy: 63.64%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       0.50      1.00      0.67         1
                     Atherosclerosis       0.00      0.00      0.00         1
                        Hypertension       0.00      0.00      0.00         1
        Cardiovascular Disease (CVD)       0.00      0.00      0.00         1
Respiratory Disease (COPD or Asthma)       1.00      1.00      1.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       0.33      1.00      0.50         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       1.00      1.00      1.00         1
                            Diabetes       0.50      1.00      0.67         1
                             Healthy      

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Logistic Regression Analysis - Training Sample Size: 2000
Accuracy: 72.73%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       0.00      0.00      0.00         1
                        Hypertension       0.00      0.00      0.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       1.00      1.00      1.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       0.00      0.00      0.00         1
                            Diabetes       0.50      1.00      0.67         1
                             Healthy     

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Logistic Regression Analysis - Training Sample Size: 4000
Accuracy: 72.73%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       0.00      0.00      0.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       1.00      1.00      1.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       0.00      0.00      0.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       0.50      1.00      0.67         1
                            Diabetes       0.00      0.00      0.00         1
                             Healthy     

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Logistic Regression Analysis - Training Sample Size: 6000
Accuracy: 54.55%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       0.33      1.00      0.50         1
                        Hypertension       0.00      0.00      0.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       0.33      1.00      0.50         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       0.00      0.00      0.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       0.00      0.00      0.00         1
                            Diabetes       0.00      0.00      0.00         1
                             Healthy     

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Logistic Regression Analysis - Training Sample Size: 8000
Accuracy: 90.91%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       0.00      0.00      0.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       0.50      1.00      0.67         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       1.00      1.00      1.00         1
                            Diabetes       1.00      1.00      1.00         1
                             Healthy     

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Logistic Regression Analysis - Training Sample Size: 16000
Accuracy: 81.82%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       1.00      1.00      1.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       1.00      1.00      1.00         1
               Autonomic Dysfunction       0.50      1.00      0.67         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       0.00      0.00      0.00         1
            Stress-related Disorders       0.00      0.00      0.00         1
                            Diabetes       1.00      1.00      1.00         1
                             Healthy    

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Logistic Regression Analysis - Training Sample Size: 24000
Accuracy: 90.91%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       0.50      1.00      0.67         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       1.00      1.00      1.00         1
        Cardiovascular Disease (CVD)       0.00      0.00      0.00         1
Respiratory Disease (COPD or Asthma)       1.00      1.00      1.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       1.00      1.00      1.00         1
                            Diabetes       1.00      1.00      1.00         1
                             Healthy    

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Logistic Regression Analysis - Training Sample Size: 32000
Accuracy: 72.73%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       0.00      0.00      0.00         1
                     Atherosclerosis       0.50      1.00      0.67         1
                        Hypertension       1.00      1.00      1.00         1
        Cardiovascular Disease (CVD)       0.50      1.00      0.67         1
Respiratory Disease (COPD or Asthma)       1.00      1.00      1.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       0.00      0.00      0.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       1.00      1.00      1.00         1
                            Diabetes       0.00      0.00      0.00         1
                             Healthy    

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


<h5 style="color: SkyBlue;">Decision Tree</h5>

In [7]:
decision_tree_model = DecisionTreeClassifier(random_state=42)

min_class_samples = 1 

for train_size in training_sample_sizes:
    test_size = min_class_samples * 11  
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=train_size, test_size=test_size, stratify=y)
    
    decision_tree_model.fit(X_train, y_train)
    
    y_pred = decision_tree_model.predict(X_test)
    
    accuracy = accuracy_score(y_test, y_pred)
    accuracy_percent = accuracy * 100  # Convert to percentage for display
    
    print(f"\nDecision Tree Analysis - Training Sample Size: {train_size}")
    print(f"Accuracy: {accuracy_percent:.2f}%")
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred, target_names=df["Disease"].unique()))


Decision Tree Analysis - Training Sample Size: 20
Accuracy: 45.45%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       0.00      0.00      0.00         1
                     Atherosclerosis       0.00      0.00      0.00         1
                        Hypertension       0.00      0.00      0.00         1
        Cardiovascular Disease (CVD)       0.00      0.00      0.00         1
Respiratory Disease (COPD or Asthma)       1.00      1.00      1.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       0.00      0.00      0.00         1
                             Anaemia       0.50      1.00      0.67         1
            Stress-related Disorders       0.50      1.00      0.67         1
                            Diabetes       1.00      1.00      1.00         1
                             Healthy       0.00  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Decision Tree Analysis - Training Sample Size: 40
Accuracy: 36.36%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       0.00      0.00      0.00         1
                     Atherosclerosis       0.00      0.00      0.00         1
                        Hypertension       0.00      0.00      0.00         1
        Cardiovascular Disease (CVD)       0.00      0.00      0.00         1
Respiratory Disease (COPD or Asthma)       0.50      1.00      0.67         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       0.00      0.00      0.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       0.00      0.00      0.00         1
                            Diabetes       0.00      0.00      0.00         1
                             Healthy       1.00  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Decision Tree Analysis - Training Sample Size: 60
Accuracy: 63.64%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       0.00      0.00      0.00         1
                        Hypertension       0.50      1.00      0.67         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       0.00      0.00      0.00         1
               Autonomic Dysfunction       0.00      0.00      0.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       1.00      1.00      1.00         1
                            Diabetes       0.00      0.00      0.00         1
                             Healthy       0.50  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Decision Tree Analysis - Training Sample Size: 80
Accuracy: 45.45%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       0.33      1.00      0.50         1
                     Atherosclerosis       0.00      0.00      0.00         1
                        Hypertension       1.00      1.00      1.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       1.00      1.00      1.00         1
               Autonomic Dysfunction       0.00      0.00      0.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       0.00      0.00      0.00         1
            Stress-related Disorders       0.00      0.00      0.00         1
                            Diabetes       0.00      0.00      0.00         1
                             Healthy       0.00  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Decision Tree Analysis - Training Sample Size: 200
Accuracy: 81.82%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       0.50      1.00      0.67         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       1.00      1.00      1.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       1.00      1.00      1.00         1
               Autonomic Dysfunction       0.50      1.00      0.67         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       0.00      0.00      0.00         1
            Stress-related Disorders       0.00      0.00      0.00         1
                            Diabetes       1.00      1.00      1.00         1
                             Healthy       1.00 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Decision Tree Analysis - Training Sample Size: 400
Accuracy: 81.82%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       0.50      1.00      0.67         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       1.00      1.00      1.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       0.00      0.00      0.00         1
                            Diabetes       0.00      0.00      0.00         1
                             Healthy       1.00 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Decision Tree Analysis - Training Sample Size: 600
Accuracy: 100.00%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       1.00      1.00      1.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       1.00      1.00      1.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       1.00      1.00      1.00         1
                            Diabetes       1.00      1.00      1.00         1
                             Healthy       1.00

<h5 style="color: SkyBlue;">Random Forest</h5>

In [8]:
random_forest_model = RandomForestClassifier(random_state=42)

min_class_samples = 1 

for train_size in training_sample_sizes:
    test_size = min_class_samples * 11  
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=train_size, test_size=test_size, stratify=y)
    
    random_forest_model.fit(X_train, y_train)
    
    y_pred = random_forest_model.predict(X_test)
    
    accuracy = accuracy_score(y_test, y_pred)
    accuracy_percent = accuracy * 100
    
    print(f"\nRandom Forest Analysis - Training Sample Size: {train_size}")
    print(f"Accuracy: {accuracy_percent:.2f}%")
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred, target_names=df["Disease"].unique()))


Random Forest Analysis - Training Sample Size: 20
Accuracy: 27.27%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       0.00      0.00      0.00         1
                     Atherosclerosis       0.50      1.00      0.67         1
                        Hypertension       0.00      0.00      0.00         1
        Cardiovascular Disease (CVD)       0.00      0.00      0.00         1
Respiratory Disease (COPD or Asthma)       0.00      0.00      0.00         1
               Autonomic Dysfunction       0.00      0.00      0.00         1
                         Arrhythmias       0.00      0.00      0.00         1
                             Anaemia       0.33      1.00      0.50         1
            Stress-related Disorders       0.00      0.00      0.00         1
                            Diabetes       0.00      0.00      0.00         1
                             Healthy       1.00  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Random Forest Analysis - Training Sample Size: 40
Accuracy: 54.55%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       0.33      1.00      0.50         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       0.50      1.00      0.67         1
        Cardiovascular Disease (CVD)       0.00      0.00      0.00         1
Respiratory Disease (COPD or Asthma)       0.00      0.00      0.00         1
               Autonomic Dysfunction       0.00      0.00      0.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       0.00      0.00      0.00         1
            Stress-related Disorders       0.00      0.00      0.00         1
                            Diabetes       0.33      1.00      0.50         1
                             Healthy       1.00  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Random Forest Analysis - Training Sample Size: 60
Accuracy: 81.82%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       0.00      0.00      0.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       0.00      0.00      0.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       1.00      1.00      1.00         1
                            Diabetes       1.00      1.00      1.00         1
                             Healthy       1.00  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Random Forest Analysis - Training Sample Size: 200
Accuracy: 90.91%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       0.00      0.00      0.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       1.00      1.00      1.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       1.00      1.00      1.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       0.50      1.00      0.67         1
            Stress-related Disorders       1.00      1.00      1.00         1
                            Diabetes       1.00      1.00      1.00         1
                             Healthy       1.00 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Random Forest Analysis - Training Sample Size: 400
Accuracy: 90.91%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       0.00      0.00      0.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       1.00      1.00      1.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       0.50      1.00      0.67         1
                            Diabetes       1.00      1.00      1.00         1
                             Healthy       1.00 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Random Forest Analysis - Training Sample Size: 600
Accuracy: 100.00%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       1.00      1.00      1.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       1.00      1.00      1.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       1.00      1.00      1.00         1
                            Diabetes       1.00      1.00      1.00         1
                             Healthy       1.00

<h5 style="color: SkyBlue;">Gradient Boosting (GBM)</h5>

In [9]:
gradient_boosting_model = GradientBoostingClassifier(random_state=42)

min_class_samples = 1 

for train_size in training_sample_sizes:
    test_size = min_class_samples * 11  
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=train_size, test_size=test_size, stratify=y)
    
    gradient_boosting_model.fit(X_train, y_train)
    
    y_pred = gradient_boosting_model.predict(X_test)
    
    accuracy = accuracy_score(y_test, y_pred)
    accuracy_percent = accuracy * 100
    
    print(f"\nGradient Boosting (GBM) Analysis - Training Sample Size: {train_size}")
    print(f"Accuracy: {accuracy_percent:.2f}%")
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred, target_names=df["Disease"].unique()))


Gradient Boosting (GBM) Analysis - Training Sample Size: 20
Accuracy: 18.18%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       0.00      0.00      0.00         1
                     Atherosclerosis       0.00      0.00      0.00         1
                        Hypertension       0.00      0.00      0.00         1
        Cardiovascular Disease (CVD)       0.00      0.00      0.00         1
Respiratory Disease (COPD or Asthma)       0.00      0.00      0.00         1
               Autonomic Dysfunction       0.00      0.00      0.00         1
                         Arrhythmias       0.00      0.00      0.00         1
                             Anaemia       0.00      0.00      0.00         1
            Stress-related Disorders       0.33      1.00      0.50         1
                            Diabetes       0.00      0.00      0.00         1
                             Healthy   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Gradient Boosting (GBM) Analysis - Training Sample Size: 40
Accuracy: 9.09%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       0.00      0.00      0.00         1
                     Atherosclerosis       0.00      0.00      0.00         1
                        Hypertension       0.00      0.00      0.00         1
        Cardiovascular Disease (CVD)       0.00      0.00      0.00         1
Respiratory Disease (COPD or Asthma)       0.00      0.00      0.00         1
               Autonomic Dysfunction       0.00      0.00      0.00         1
                         Arrhythmias       0.00      0.00      0.00         1
                             Anaemia       0.00      0.00      0.00         1
            Stress-related Disorders       0.25      1.00      0.40         1
                            Diabetes       0.00      0.00      0.00         1
                             Healthy    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Gradient Boosting (GBM) Analysis - Training Sample Size: 60
Accuracy: 63.64%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       0.00      0.00      0.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       0.00      0.00      0.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       0.00      0.00      0.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       0.50      1.00      0.67         1
            Stress-related Disorders       1.00      1.00      1.00         1
                            Diabetes       0.00      0.00      0.00         1
                             Healthy   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Gradient Boosting (GBM) Analysis - Training Sample Size: 80
Accuracy: 72.73%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       0.00      0.00      0.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       1.00      1.00      1.00         1
        Cardiovascular Disease (CVD)       0.33      1.00      0.50         1
Respiratory Disease (COPD or Asthma)       0.00      0.00      0.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       0.50      1.00      0.67         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       0.00      0.00      0.00         1
                            Diabetes       1.00      1.00      1.00         1
                             Healthy   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Gradient Boosting (GBM) Analysis - Training Sample Size: 200
Accuracy: 100.00%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       1.00      1.00      1.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       1.00      1.00      1.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       1.00      1.00      1.00         1
                            Diabetes       1.00      1.00      1.00         1
                             Healthy 

<h5 style="color: SkyBlue;">K-Nearest Neighbors (KNN)</h5>

In [10]:
knn_model = KNeighborsClassifier()

min_class_samples = 1 

for train_size in training_sample_sizes:
    test_size = min_class_samples * 11  
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=train_size, test_size=test_size, stratify=y)
    
    knn_model.fit(X_train, y_train)
    
    y_pred = knn_model.predict(X_test)
    
    accuracy = accuracy_score(y_test, y_pred)
    accuracy_percent = accuracy * 100
    
    print(f"\nK-Nearest Neighbors (KNN) Analysis - Training Sample Size: {train_size}")
    print(f"Accuracy: {accuracy_percent:.2f}%")
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred, target_names=df["Disease"].unique()))


K-Nearest Neighbors (KNN) Analysis - Training Sample Size: 20
Accuracy: 27.27%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       0.00      0.00      0.00         1
                     Atherosclerosis       0.25      1.00      0.40         1
                        Hypertension       0.00      0.00      0.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       0.00      0.00      0.00         1
               Autonomic Dysfunction       0.33      1.00      0.50         1
                         Arrhythmias       0.00      0.00      0.00         1
                             Anaemia       0.00      0.00      0.00         1
            Stress-related Disorders       0.00      0.00      0.00         1
                            Diabetes       0.00      0.00      0.00         1
                             Healthy 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



K-Nearest Neighbors (KNN) Analysis - Training Sample Size: 40
Accuracy: 18.18%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       0.33      1.00      0.50         1
                     Atherosclerosis       0.50      1.00      0.67         1
                        Hypertension       0.00      0.00      0.00         1
        Cardiovascular Disease (CVD)       0.00      0.00      0.00         1
Respiratory Disease (COPD or Asthma)       0.00      0.00      0.00         1
               Autonomic Dysfunction       0.00      0.00      0.00         1
                         Arrhythmias       0.00      0.00      0.00         1
                             Anaemia       0.00      0.00      0.00         1
            Stress-related Disorders       0.00      0.00      0.00         1
                            Diabetes       0.00      0.00      0.00         1
                             Healthy 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



K-Nearest Neighbors (KNN) Analysis - Training Sample Size: 60
Accuracy: 18.18%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       0.00      0.00      0.00         1
                     Atherosclerosis       0.50      1.00      0.67         1
                        Hypertension       1.00      1.00      1.00         1
        Cardiovascular Disease (CVD)       0.00      0.00      0.00         1
Respiratory Disease (COPD or Asthma)       0.00      0.00      0.00         1
               Autonomic Dysfunction       0.00      0.00      0.00         1
                         Arrhythmias       0.00      0.00      0.00         1
                             Anaemia       0.00      0.00      0.00         1
            Stress-related Disorders       0.00      0.00      0.00         1
                            Diabetes       0.00      0.00      0.00         1
                             Healthy 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



K-Nearest Neighbors (KNN) Analysis - Training Sample Size: 80
Accuracy: 18.18%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       0.00      0.00      0.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       0.00      0.00      0.00         1
        Cardiovascular Disease (CVD)       0.00      0.00      0.00         1
Respiratory Disease (COPD or Asthma)       0.00      0.00      0.00         1
               Autonomic Dysfunction       0.00      0.00      0.00         1
                         Arrhythmias       0.00      0.00      0.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       0.00      0.00      0.00         1
                            Diabetes       0.00      0.00      0.00         1
                             Healthy 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



K-Nearest Neighbors (KNN) Analysis - Training Sample Size: 200
Accuracy: 45.45%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       0.00      0.00      0.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       0.00      0.00      0.00         1
        Cardiovascular Disease (CVD)       0.00      0.00      0.00         1
Respiratory Disease (COPD or Asthma)       0.00      0.00      0.00         1
               Autonomic Dysfunction       0.33      1.00      0.50         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       0.00      0.00      0.00         1
                            Diabetes       0.00      0.00      0.00         1
                             Healthy

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



K-Nearest Neighbors (KNN) Analysis - Training Sample Size: 400
Accuracy: 36.36%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       0.00      0.00      0.00         1
                     Atherosclerosis       0.00      0.00      0.00         1
                        Hypertension       1.00      1.00      1.00         1
        Cardiovascular Disease (CVD)       0.00      0.00      0.00         1
Respiratory Disease (COPD or Asthma)       1.00      1.00      1.00         1
               Autonomic Dysfunction       0.25      1.00      0.40         1
                         Arrhythmias       0.00      0.00      0.00         1
                             Anaemia       0.00      0.00      0.00         1
            Stress-related Disorders       0.00      0.00      0.00         1
                            Diabetes       1.00      1.00      1.00         1
                             Healthy

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



K-Nearest Neighbors (KNN) Analysis - Training Sample Size: 600
Accuracy: 36.36%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       0.50      1.00      0.67         1
                        Hypertension       0.00      0.00      0.00         1
        Cardiovascular Disease (CVD)       0.33      1.00      0.50         1
Respiratory Disease (COPD or Asthma)       0.00      0.00      0.00         1
               Autonomic Dysfunction       0.50      1.00      0.67         1
                         Arrhythmias       0.00      0.00      0.00         1
                             Anaemia       0.00      0.00      0.00         1
            Stress-related Disorders       0.00      0.00      0.00         1
                            Diabetes       0.00      0.00      0.00         1
                             Healthy

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



K-Nearest Neighbors (KNN) Analysis - Training Sample Size: 800
Accuracy: 45.45%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       0.00      0.00      0.00         1
                        Hypertension       0.00      0.00      0.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       1.00      1.00      1.00         1
               Autonomic Dysfunction       0.00      0.00      0.00         1
                         Arrhythmias       0.00      0.00      0.00         1
                             Anaemia       0.50      1.00      0.67         1
            Stress-related Disorders       0.00      0.00      0.00         1
                            Diabetes       0.00      0.00      0.00         1
                             Healthy

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



K-Nearest Neighbors (KNN) Analysis - Training Sample Size: 2000
Accuracy: 54.55%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       0.00      0.00      0.00         1
        Cardiovascular Disease (CVD)       0.50      1.00      0.67         1
Respiratory Disease (COPD or Asthma)       1.00      1.00      1.00         1
               Autonomic Dysfunction       0.00      0.00      0.00         1
                         Arrhythmias       0.33      1.00      0.50         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       0.00      0.00      0.00         1
                            Diabetes       0.00      0.00      0.00         1
                             Health

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



K-Nearest Neighbors (KNN) Analysis - Training Sample Size: 4000
Accuracy: 63.64%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       1.00      1.00      1.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       0.00      0.00      0.00         1
               Autonomic Dysfunction       0.00      0.00      0.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       0.33      1.00      0.50         1
            Stress-related Disorders       0.00      0.00      0.00         1
                            Diabetes       0.00      0.00      0.00         1
                             Health

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



K-Nearest Neighbors (KNN) Analysis - Training Sample Size: 6000
Accuracy: 72.73%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       0.50      1.00      0.67         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       1.00      1.00      1.00         1
               Autonomic Dysfunction       0.00      0.00      0.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       0.00      0.00      0.00         1
                            Diabetes       0.00      0.00      0.00         1
                             Health

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



K-Nearest Neighbors (KNN) Analysis - Training Sample Size: 8000
Accuracy: 81.82%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       0.00      0.00      0.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       1.00      1.00      1.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       0.00      0.00      0.00         1
                            Diabetes       1.00      1.00      1.00         1
                             Health

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



K-Nearest Neighbors (KNN) Analysis - Training Sample Size: 16000
Accuracy: 63.64%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       0.00      0.00      0.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       0.00      0.00      0.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       0.50      1.00      0.67         1
            Stress-related Disorders       0.00      0.00      0.00         1
                            Diabetes       0.00      0.00      0.00         1
                             Healt

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



K-Nearest Neighbors (KNN) Analysis - Training Sample Size: 24000
Accuracy: 81.82%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       1.00      1.00      1.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       0.50      1.00      0.67         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       0.00      0.00      0.00         1
                            Diabetes       0.00      0.00      0.00         1
                             Healt

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



K-Nearest Neighbors (KNN) Analysis - Training Sample Size: 32000
Accuracy: 100.00%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       1.00      1.00      1.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       1.00      1.00      1.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       1.00      1.00      1.00         1
                            Diabetes       1.00      1.00      1.00         1
                             Heal

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


<h5 style="color: SkyBlue;">XGBoost</h5>

In [11]:
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)

min_class_samples = 1 

for train_size in training_sample_sizes:
    test_size = min_class_samples * 11  
    
    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, train_size=train_size, test_size=test_size, stratify=y_encoded)
    
    xgb_model.fit(X_train, y_train)
    
    y_pred = xgb_model.predict(X_test)
    
    accuracy = accuracy_score(y_test, y_pred)
    accuracy_percent = accuracy * 100
    
    print(f"\nXGBoost Analysis - Training Sample Size: {train_size}")
    print(f"Accuracy: {accuracy_percent:.2f}%")
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred, target_names=df["Disease"].unique()))

Parameters: { "use_label_encoder" } are not used.




XGBoost Analysis - Training Sample Size: 20
Accuracy: 54.55%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       0.50      1.00      0.67         1
                     Atherosclerosis       0.50      1.00      0.67         1
                        Hypertension       0.00      0.00      0.00         1
        Cardiovascular Disease (CVD)       0.00      0.00      0.00         1
Respiratory Disease (COPD or Asthma)       0.50      1.00      0.67         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       0.00      0.00      0.00         1
                             Anaemia       0.00      0.00      0.00         1
            Stress-related Disorders       1.00      1.00      1.00         1
                            Diabetes       1.00      1.00      1.00         1
                             Healthy       0.00      0.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Parameters: { "use_label_encoder" } are not used.




XGBoost Analysis - Training Sample Size: 40
Accuracy: 45.45%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       0.00      0.00      0.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       0.00      0.00      0.00         1
        Cardiovascular Disease (CVD)       0.00      0.00      0.00         1
Respiratory Disease (COPD or Asthma)       0.00      0.00      0.00         1
               Autonomic Dysfunction       0.50      1.00      0.67         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       0.33      1.00      0.50         1
            Stress-related Disorders       0.00      0.00      0.00         1
                            Diabetes       0.00      0.00      0.00         1
                             Healthy       1.00      1.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Parameters: { "use_label_encoder" } are not used.




XGBoost Analysis - Training Sample Size: 60
Accuracy: 54.55%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       0.00      0.00      0.00         1
        Cardiovascular Disease (CVD)       0.50      1.00      0.67         1
Respiratory Disease (COPD or Asthma)       0.00      0.00      0.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       0.50      1.00      0.67         1
                             Anaemia       0.00      0.00      0.00         1
            Stress-related Disorders       0.00      0.00      0.00         1
                            Diabetes       0.00      0.00      0.00         1
                             Healthy       0.50      1.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Parameters: { "use_label_encoder" } are not used.




XGBoost Analysis - Training Sample Size: 80
Accuracy: 81.82%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       0.50      1.00      0.67         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       1.00      1.00      1.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       1.00      1.00      1.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       0.00      0.00      0.00         1
                            Diabetes       0.00      0.00      0.00         1
                             Healthy       1.00      1.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Parameters: { "use_label_encoder" } are not used.




XGBoost Analysis - Training Sample Size: 200
Accuracy: 90.91%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       0.50      1.00      0.67         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       0.00      0.00      0.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       1.00      1.00      1.00         1
                            Diabetes       1.00      1.00      1.00         1
                             Healthy       1.00      1

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Parameters: { "use_label_encoder" } are not used.




XGBoost Analysis - Training Sample Size: 400
Accuracy: 90.91%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       0.50      1.00      0.67         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       1.00      1.00      1.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       1.00      1.00      1.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       1.00      1.00      1.00         1
                            Diabetes       0.00      0.00      0.00         1
                             Healthy       1.00      1

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Parameters: { "use_label_encoder" } are not used.




XGBoost Analysis - Training Sample Size: 600
Accuracy: 90.91%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       0.00      0.00      0.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       1.00      1.00      1.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       0.50      1.00      0.67         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       1.00      1.00      1.00         1
                            Diabetes       1.00      1.00      1.00         1
                             Healthy       1.00      1

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Parameters: { "use_label_encoder" } are not used.




XGBoost Analysis - Training Sample Size: 800
Accuracy: 100.00%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       1.00      1.00      1.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       1.00      1.00      1.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       1.00      1.00      1.00         1
                            Diabetes       1.00      1.00      1.00         1
                             Healthy       1.00      

Parameters: { "use_label_encoder" } are not used.




XGBoost Analysis - Training Sample Size: 2000
Accuracy: 100.00%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       1.00      1.00      1.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       1.00      1.00      1.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       1.00      1.00      1.00         1
                            Diabetes       1.00      1.00      1.00         1
                             Healthy       1.00     

Parameters: { "use_label_encoder" } are not used.




XGBoost Analysis - Training Sample Size: 4000
Accuracy: 100.00%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       1.00      1.00      1.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       1.00      1.00      1.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       1.00      1.00      1.00         1
                            Diabetes       1.00      1.00      1.00         1
                             Healthy       1.00     

Parameters: { "use_label_encoder" } are not used.




XGBoost Analysis - Training Sample Size: 6000
Accuracy: 100.00%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       1.00      1.00      1.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       1.00      1.00      1.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       1.00      1.00      1.00         1
                            Diabetes       1.00      1.00      1.00         1
                             Healthy       1.00     

Parameters: { "use_label_encoder" } are not used.




XGBoost Analysis - Training Sample Size: 8000
Accuracy: 100.00%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       1.00      1.00      1.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       1.00      1.00      1.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       1.00      1.00      1.00         1
                            Diabetes       1.00      1.00      1.00         1
                             Healthy       1.00     

Parameters: { "use_label_encoder" } are not used.




XGBoost Analysis - Training Sample Size: 16000
Accuracy: 100.00%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       1.00      1.00      1.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       1.00      1.00      1.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       1.00      1.00      1.00         1
                            Diabetes       1.00      1.00      1.00         1
                             Healthy       1.00    

Parameters: { "use_label_encoder" } are not used.




XGBoost Analysis - Training Sample Size: 24000
Accuracy: 100.00%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       1.00      1.00      1.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       1.00      1.00      1.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       1.00      1.00      1.00         1
                            Diabetes       1.00      1.00      1.00         1
                             Healthy       1.00    

Parameters: { "use_label_encoder" } are not used.




XGBoost Analysis - Training Sample Size: 32000
Accuracy: 100.00%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       1.00      1.00      1.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       1.00      1.00      1.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       1.00      1.00      1.00         1
                            Diabetes       1.00      1.00      1.00         1
                             Healthy       1.00    

Parameters: { "use_label_encoder" } are not used.




XGBoost Analysis - Training Sample Size: 40000
Accuracy: 100.00%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       1.00      1.00      1.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       1.00      1.00      1.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       1.00      1.00      1.00         1
                            Diabetes       1.00      1.00      1.00         1
                             Healthy       1.00    

<h5 style="color: SkyBlue;">Extra Trees Classifier</h5>

In [12]:
extra_trees_model = ExtraTreesClassifier(random_state=42)

min_class_samples = 1 

for train_size in training_sample_sizes:
    test_size = min_class_samples * 11  
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=train_size, test_size=test_size, stratify=y)
    
    extra_trees_model.fit(X_train, y_train)
    
    y_pred = extra_trees_model.predict(X_test)
    
    accuracy = accuracy_score(y_test, y_pred)
    accuracy_percent = accuracy * 100
    
    print(f"\nExtra Trees Analysis - Training Sample Size: {train_size}")
    print(f"Accuracy: {accuracy_percent:.2f}%")
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred, target_names=df["Disease"].unique()))


Extra Trees Analysis - Training Sample Size: 20
Accuracy: 54.55%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       0.00      0.00      0.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       1.00      1.00      1.00         1
        Cardiovascular Disease (CVD)       0.00      0.00      0.00         1
Respiratory Disease (COPD or Asthma)       0.50      1.00      0.67         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       0.33      1.00      0.50         1
            Stress-related Disorders       0.00      0.00      0.00         1
                            Diabetes       0.00      0.00      0.00         1
                             Healthy       0.00    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Extra Trees Analysis - Training Sample Size: 40
Accuracy: 36.36%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       0.00      0.00      0.00         1
                     Atherosclerosis       0.00      0.00      0.00         1
                        Hypertension       0.00      0.00      0.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       0.00      0.00      0.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       0.50      1.00      0.67         1
                             Anaemia       0.00      0.00      0.00         1
            Stress-related Disorders       1.00      1.00      1.00         1
                            Diabetes       0.00      0.00      0.00         1
                             Healthy       0.00    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Extra Trees Analysis - Training Sample Size: 60
Accuracy: 45.45%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       0.00      0.00      0.00         1
                        Hypertension       0.00      0.00      0.00         1
        Cardiovascular Disease (CVD)       0.00      0.00      0.00         1
Respiratory Disease (COPD or Asthma)       0.00      0.00      0.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       0.00      0.00      0.00         1
            Stress-related Disorders       0.50      1.00      0.67         1
                            Diabetes       1.00      1.00      1.00         1
                             Healthy       0.00    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Extra Trees Analysis - Training Sample Size: 80
Accuracy: 36.36%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       0.00      0.00      0.00         1
                     Atherosclerosis       0.00      0.00      0.00         1
                        Hypertension       0.50      1.00      0.67         1
        Cardiovascular Disease (CVD)       0.50      1.00      0.67         1
Respiratory Disease (COPD or Asthma)       0.00      0.00      0.00         1
               Autonomic Dysfunction       0.50      1.00      0.67         1
                         Arrhythmias       0.00      0.00      0.00         1
                             Anaemia       0.50      1.00      0.67         1
            Stress-related Disorders       0.00      0.00      0.00         1
                            Diabetes       0.00      0.00      0.00         1
                             Healthy       0.00    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Extra Trees Analysis - Training Sample Size: 200
Accuracy: 72.73%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       0.00      0.00      0.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       0.00      0.00      0.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       0.50      1.00      0.67         1
                            Diabetes       0.00      0.00      0.00         1
                             Healthy       1.00   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Extra Trees Analysis - Training Sample Size: 400
Accuracy: 100.00%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       1.00      1.00      1.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       1.00      1.00      1.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       1.00      1.00      1.00         1
                            Diabetes       1.00      1.00      1.00         1
                             Healthy       1.00  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Extra Trees Analysis - Training Sample Size: 2000
Accuracy: 100.00%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       1.00      1.00      1.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       1.00      1.00      1.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       1.00      1.00      1.00         1
                            Diabetes       1.00      1.00      1.00         1
                             Healthy       1.00 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Extra Trees Analysis - Training Sample Size: 40000
Accuracy: 100.00%

Classification Report:
                                      precision    recall  f1-score   support

      Chronic Fatigue Syndrome (CFS)       1.00      1.00      1.00         1
                     Atherosclerosis       1.00      1.00      1.00         1
                        Hypertension       1.00      1.00      1.00         1
        Cardiovascular Disease (CVD)       1.00      1.00      1.00         1
Respiratory Disease (COPD or Asthma)       1.00      1.00      1.00         1
               Autonomic Dysfunction       1.00      1.00      1.00         1
                         Arrhythmias       1.00      1.00      1.00         1
                             Anaemia       1.00      1.00      1.00         1
            Stress-related Disorders       1.00      1.00      1.00         1
                            Diabetes       1.00      1.00      1.00         1
                             Healthy       1.00