In [1]:
import os

import pandas as pd
from matplotlib import pyplot as plt
from sklearn import linear_model, metrics


# Linear Regression


In [2]:
os.makedirs("./results/stats", exist_ok=True)

for cl, cl_name in ((1, "Normal"), (2, "Mild"), (3, "Severe")):
    print(f"Class: {cl_name}")
    X_train = pd.read_csv(
        f"./results/splits/X_train_{cl}_{cl_name}_padded_translated.csv"
    )
    y_train = pd.read_csv(f"./results/splits/y_train_{cl}_{cl_name}.csv")
    X_test = pd.read_csv(
        f"./results/splits/X_test_{cl}_{cl_name}_padded_translated.csv"
    )
    y_test = pd.read_csv(f"./results/splits/y_test_{cl}_{cl_name}.csv")

    # drop useless columns (both X_train and X_test have the same columns)
    cols_to_drop = list()
    for col in X_train.columns:
        if (
            "Patient ID" in col  # remove all "Patient ID_N"
            or "Sequential ID" in col  # remove all "Sequential ID_N"
        ):
            cols_to_drop.append(col)

    X_train.drop(cols_to_drop, axis=1, inplace=True)
    X_test.drop(cols_to_drop, axis=1, inplace=True)
    model = linear_model.LogisticRegression(penalty="none", n_jobs=-1, random_state=666)
    model.fit(X_train, y_train.values.ravel())

    y_predicted = model.predict(X_test)
    print(
        metrics.classification_report(
            y_test, y_predicted, labels=[0, 1], target_names=["Positive", "Negative"]
        )
    )
    print(f"ROC AUC Score: {metrics.roc_auc_score(y_test, y_predicted)}")
    metrics.plot_roc_curve(model, X_test, y_test, pos_label=0)
    plt.tight_layout()
    plt.savefig(f"./results/stats/logistic_regression_{cl}_{cl_name}.jpg")
    plt.close()


Class: Normal
              precision    recall  f1-score   support

    Positive       0.91      1.00      0.96     13041
    Negative       0.00      0.00      0.00      1220

    accuracy                           0.91     14261
   macro avg       0.46      0.50      0.48     14261
weighted avg       0.84      0.91      0.87     14261

ROC AUC Score: 0.4998849781458477
Class: Mild
              precision    recall  f1-score   support

    Positive       0.61      0.30      0.40      2016
    Negative       0.69      0.89      0.77      3494

    accuracy                           0.67      5510
   macro avg       0.65      0.59      0.59      5510
weighted avg       0.66      0.67      0.64      5510

ROC AUC Score: 0.5944112810168907
Class: Severe
              precision    recall  f1-score   support

    Positive       0.58      0.23      0.33       822
    Negative       0.75      0.93      0.83      2018

    accuracy                           0.73      2840
   macro avg       0