In [1]:
import os

import pandas as pd
from matplotlib import pyplot as plt
from sklearn import linear_model, metrics


# Linear Regression


In [2]:
HYPER_max_pad = 10

os.makedirs("./results/stats", exist_ok=True)
categorical_vars = []
for i in range(HYPER_max_pad):
    categorical_vars.extend(
        [
            f"{x}_{i}"
            for x in (
                "Glasgow Coma Score - Verbal Response",
                "Glasgow Coma Score - Motor Response",
                "Glasgow Coma Score - Eye Opening",
                "Glasgow Coma Score - Total",
                "Circadian rhythm",
                "Richmond agitation-sedation scale",
                "Ventilator Airway Code",
            )
        ]
    )

for cl, cl_name in ((1, "Normal"), (2, "Mild"), (3, "Severe")):
    print(f"Class: {cl_name}")
    X_train = pd.read_csv(
        f"./results/splits/X_train_{cl}_{cl_name}_padded_translated.csv",
        dtype={k: "category" for k in categorical_vars},
    )
    y_train = pd.read_csv(f"./results/splits/y_train_{cl}_{cl_name}.csv")
    X_test = pd.read_csv(
        f"./results/splits/X_test_{cl}_{cl_name}_padded_translated.csv",
        dtype={k: "category" for k in categorical_vars},
    )
    y_test = pd.read_csv(f"./results/splits/y_test_{cl}_{cl_name}.csv")

    # drop useless columns (both X_train and X_test have the same columns)
    cols_to_drop = list()
    for col in X_train.columns:
        if (
            "Patient ID" in col  # remove all "Patient ID_N"
            or "Sequential ID" in col  # remove all "Sequential ID_N"
        ):
            cols_to_drop.append(col)

    X_train.drop(cols_to_drop, axis=1, inplace=True)
    X_test.drop(cols_to_drop, axis=1, inplace=True)
    model = linear_model.LogisticRegression(penalty="none", n_jobs=-1, random_state=666)
    model.fit(X_train, y_train.values.ravel())

    y_predicted = model.predict(X_test)
    print(
        metrics.classification_report(
            y_test, y_predicted, labels=[0, 1], target_names=["Positive", "Negative"]
        )
    )
    print(f"ROC AUC Score: {metrics.roc_auc_score(y_test, y_predicted)}")
    metrics.plot_roc_curve(model, X_test, y_test, pos_label=0)
    plt.tight_layout()
    plt.savefig(f"./results/stats/logistic_regression_{cl}_{cl_name}.png")
    plt.close()


Class: Normal


  return f(*args, **kwargs)


              precision    recall  f1-score   support

    Positive       0.91      1.00      0.95     13031
    Negative       0.00      0.00      0.00      1242

    accuracy                           0.91     14273
   macro avg       0.46      0.50      0.48     14273
weighted avg       0.83      0.91      0.87     14273

ROC AUC Score: 0.4999232599186555


  return f(*args, **kwargs)


Class: Mild


  return f(*args, **kwargs)
  return f(*args, **kwargs)


              precision    recall  f1-score   support

    Positive       0.58      0.30      0.39      1982
    Negative       0.69      0.88      0.77      3532

    accuracy                           0.67      5514
   macro avg       0.63      0.59      0.58      5514
weighted avg       0.65      0.67      0.64      5514

ROC AUC Score: 0.5876226925683359
Class: Severe


  return f(*args, **kwargs)
  return f(*args, **kwargs)


              precision    recall  f1-score   support

    Positive       0.60      0.25      0.35       793
    Negative       0.76      0.94      0.84      2049

    accuracy                           0.74      2842
   macro avg       0.68      0.59      0.59      2842
weighted avg       0.72      0.74      0.70      2842

ROC AUC Score: 0.5909840065925802
