In [None]:
import numpy as np
import pandas as pd

np.random.seed(32)

In [None]:
labels_file = pd.read_csv('../data/in-hospital-mortality/listfile.csv')

In [None]:
def split(data: pd.DataFrame, ratio: float) -> (pd.DataFrame, pd.DataFrame):
    split_index = int(np.floor(len(data) * ratio))
    return data[0: split_index], data[split_index:]

In [None]:
negative_samples = labels_file[labels_file['y_true'] == 0]
positive_samples = labels_file[labels_file['y_true'] == 1]

In [None]:
(test_negative_samples, train_negative_samples) = split(negative_samples, 0.15)
(test_positive_samples, train_positive_samples) = split(positive_samples, 0.15)

(test_negative_samples, val_negative_samples) = split(test_negative_samples, 0.8)
(test_positive_samples, val_positive_samples) = split(test_positive_samples, 0.8)

test_samples = pd.concat([test_negative_samples, test_positive_samples])
test_samples = test_samples.iloc[np.random.permutation(np.arange(len(test_samples)))]  #shuffle
train_samples = pd.concat([train_negative_samples, train_positive_samples])
train_samples = train_samples.iloc[np.random.permutation(np.arange(len(train_samples)))]  #shuffle
val_samples = pd.concat([val_negative_samples, val_positive_samples])
val_samples = val_samples.iloc[np.random.permutation(np.arange(len(val_samples)))]  #shuffle



In [None]:
print("TEST: positive samples: ", len(test_positive_samples))
print("TEST: negative samples: ", len(test_negative_samples))
print("TEST: ratio of positive samples: ",
      len(test_positive_samples) / (len(test_positive_samples) + len(test_negative_samples)))

print("TRAIN: positive samples: ", len(train_positive_samples))
print("TRAIN: negative samples: ", len(train_negative_samples))
print("TRAIN: ratio of positive samples: ",
      len(train_positive_samples) / (len(train_positive_samples) + len(train_negative_samples)))

print("VAL: positive samples: ", len(val_positive_samples))
print("VAL: negative samples: ", len(val_negative_samples))
print("VAL: ratio of positive samples: ",
      len(val_positive_samples) / (len(val_positive_samples) + len(val_negative_samples)))

In [None]:

test_samples.to_csv('../data/in-hospital-mortality/test_listfile.csv', index=False)
train_samples.to_csv('../data/in-hospital-mortality/train_listfile.csv', index=False)
val_samples.to_csv('../data/in-hospital-mortality/val_listfile.csv', index=False)

In [None]:
listfile = np.loadtxt("../data/in-hospital-mortality/listfile.csv", delimiter=",", skiprows=1, dtype=str)

In [None]:
files = np.array([x for x in listfile[:, 0]])

In [None]:
import os

data = [
    pd.read_csv(
        os.path.join("../data/in-hospital-mortality", episode_file),
    )
    for episode_file in listfile[:, 0]
]

In [None]:
combined_dataframe = pd.concat(data)
unique_values = combined_dataframe.apply(lambda col: col.unique())

In [None]:
continuos_column = ['Hours', 'Diastolic blood pressure', 'Fraction inspired oxygen',
 'Glucose', 'Heart Rate', 'Height', 'Mean blood pressure', 'Oxygen saturation',
 'Respiratory rate', 'Systolic blood pressure', 'Temperature', 'Weight', 'pH']
unique_values["Diastolic blood pressure"]

In [None]:
gcs_eye_mapping = {
    'No Response': 1,
    'To Pain': 2,
    'To Speech': 3,
    'Spontaneously': 4
}
gcs_verbal_mapping = {
    'No Response': 1,
    'Incomprehensible sounds': 2,
    'Inappropriate Words': 3,
    'Confused': 4,
    'Oriented': 5
}
gcs_motor_mapping = {
    'No Response': 1,
    'Abnormal extension': 2,
    'Abnormal Flexion': 3,
    'Flex-withdraws': 4,
    'Localizes Pain': 5,
    'Obeys Commands': 6
}


def sliding_window_iter(series, size):
    """series is a column of a dataframe"""
    for start_row in range(0, len(series), size):
        if (start_row + size) >= len(series) - 1:
            left_index = len(series) - 1 - size
            assert left_index + size == len(series) - 1
            yield series[left_index:left_index + size]
            break
        else:
            yield series[start_row:start_row + size]


samples = None
labels = None
for ep_index, episode in enumerate(data):
    episode["Glascow coma scale eye opening"] = episode["Glascow coma scale eye opening"].map(gcs_eye_mapping)
    episode["Glascow coma scale motor response"] = episode["Glascow coma scale motor response"].map(gcs_motor_mapping)
    episode["Glascow coma scale verbal response"] = episode["Glascow coma scale verbal response"].map(
        gcs_verbal_mapping)

    episode["Glascow coma scale total"] = episode["Glascow coma scale eye opening"] + episode[
        "Glascow coma scale motor response"] + episode["Glascow coma scale verbal response"]

    windows = [window for window in sliding_window_iter(episode, 48)]
    windows = [windows[0]]

    for index, window in enumerate(windows):
        if len(window) < 48:
            empty_measurements = pd.DataFrame(np.nan, index=range(48 - len(window)), columns=window.columns)
            empty_measurements["Hours"] = 0
            windows[index] = pd.concat([empty_measurements, window])
    if samples is None:
        samples = np.array([w.values for w in windows])
        labels = np.repeat(int(listfile[ep_index][1]), len(windows))
    else:
        samples = np.concatenate([samples, np.array([w.values for w in windows])])
        labels = np.concatenate([labels, np.repeat(int(listfile[ep_index][1]), len(windows))])

In [None]:
labels = labels.astype(int)
labels.sum()