## model_eicu_compact


## Prepare


In [12]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from catboost import CatBoostClassifier
from sklearn.model_selection import cross_validate
from common_eicu import KEY_IDENTITY, KEY_OFFSET, \
    KEY_FLAG, CATEGORICAL_COLUMNS
from common_model import SEED, SCORING, cv


In [7]:
COMPACT_COLUMNS = [
    KEY_OFFSET,
    'albumin',
    'lymph',
    'heart rate',
    'respiration rate',
    'total protein',
    'pH',
    'creatinine',
    'Hct',
    'ROX index',
    'calcium',
    'AST',
    'WBC',
    'platelet',
    'potassium',
]


In [8]:
df_data = pd.read_csv('./data/data_eicu_full.csv.gz')

# clamp infinite values
df_data.replace(np.inf, 9999, inplace=True)

# set categorical columns
for column_name in CATEGORICAL_COLUMNS:
    df_data[column_name] = df_data[column_name].astype('category')

X_raw = df_data[COMPACT_COLUMNS]
y = df_data[KEY_FLAG].ravel()
groups = df_data[KEY_IDENTITY].ravel()


In [15]:
def test_model(model, X):

    cv_scores = cross_validate(
        model,
        X,
        y,
        cv=cv,
        scoring=SCORING,
        groups=groups,
    )

    cv_scores_accuracy = cv_scores['test_accuracy']
    cv_accuracy_mean = cv_scores_accuracy.mean()
    cv_accuracy_std = cv_scores_accuracy.std()
    cv_scores_auc = cv_scores['test_roc_auc']
    cv_auc_mean = cv_scores_auc.mean()
    cv_auc_std = cv_scores_auc.std()

    print(f'>>> CV Result')
    print(f'accuracy_mean: {cv_accuracy_mean:.4f}')
    print(f'accuracy_std:  {cv_accuracy_std:.4f}')
    print(f'auc_mean:      {cv_auc_mean:.4f}')
    print(f'auc_std:       {cv_auc_std:.4f}')


## CatBoost


In [18]:
catboost_model = CatBoostClassifier(
    cat_features=list(
        filter(
            lambda col: col in COMPACT_COLUMNS,
            CATEGORICAL_COLUMNS,
        )
    ),
    task_type='GPU',
    devices='0',
    random_state=SEED,
    verbose=False,
)


In [19]:
test_model(catboost_model, X_raw)


>>> CV Result
accuracy_mean: 0.9602
accuracy_std:  0.0056
auc_mean:      0.9003
auc_std:       0.0113
