In [12]:
from sklearn.neighbors import NearestNeighbors as KNN
from sklearn.model_selection import train_test_split
from time import time
import pickle as pkl
import pandas as pd
import numpy as np

In [13]:
observ_cols = ['elixhauser','re_admission', 'SOFA', 'SIRS', 'Weight_kg', 'GCS', 'HR',
                'SysBP', 'MeanBP', 'DiaBP', 'RR', 'SpO2',
                'Temp_C', 'FiO2_1', 'Potassium', 'Sodium', 'Chloride',
                'Glucose', 'BUN', 'Creatinine', 'Magnesium', 'Calcium',
                'Ionised_Ca', 'CO2_mEqL', 'SGOT', 'SGPT', 'Total_bili',
                'Albumin', 'Hb', 'WBC_count', 'Platelets_count', 'PTT',
                'PT', 'INR', 'Arterial_pH', 'paO2', 'paCO2',
                'Arterial_BE', 'Arterial_lactate', 'HCO3', 'PaO2_FiO2',
                'output_total', 'output_4hourly',
                'sedation', 'mechvent', 'rrt']

In [17]:
train_embeddings = pkl.load(open('path/to/train_embeddings', 'rb'))
test_embeddings = pkl.load(open('path/to/test_embeddings', 'rb'))
train_set = pd.read_csv('path/to/trainset')
test_set = pd.read_csv('path/to/testset')

In [15]:
train_actions = train_set['vaso_input'].values + 5 * train_set['iv_input'].values # discretized
test_actions = test_set['vaso_input'].values + 5 * test_set['iv_input'].values
train_survivors = np.where(train_set['died_in_hosp'].values == 0)[0]
test_survivors = np.where(test_set['died_in_hosp'].values == 0)[0]

In [5]:
# given actions from physicians, compute prob. for each action
def actions_2_probs(ind, src_actions, survivors, num_actions=25, expert='kernel'):
    
    selected_actions = src_actions[ind] # search neighbors from trainset and get their actions
    selected_actions[selected_actions == 0] = -1
    
    action_probs = np.zeros((selected_actions.shape[0], num_actions))
    
    if expert == 'kernel': # only look at the survivors, phy looks at all patients
        actor_actions = selected_actions * np.isin(ind, survivors) # survivors calculated from trainset
    else:
        actor_actions = selected_actions # look at all neighbors
    
    for i in range(actor_actions.shape[0]):
        actions = actor_actions[i]
        a, c = np.unique(actions[actions != 0], return_counts=True)
        a[a == -1] = 0
        action_probs[i, a] = c / np.sum(c)
    
    return action_probs

In [18]:
# fit knn, distance criterion: euclidean
knn = KNN(300)
knn.fit(train_embeddings)

NearestNeighbors(algorithm='auto', leaf_size=30, metric='minkowski',
         metric_params=None, n_jobs=1, n_neighbors=300, p=2, radius=1.0)

In [19]:
# this may take a while, grab a coffee ...
train_dist, train_ind = knn.kneighbors(train_embeddings)
# test_dist, test_ind = knn.kneighbors(test_embeddings)

In [20]:
# Note that when deriving kernel policy on test set, one should look at the similar patient states from trainset.
test_from_train_dist, test_from_train_ind = knn.kneighbors(test_embeddings)

In [21]:
knn_phy = KNN(300)
knn_phy.fit(test_embeddings)
# Note that when deriving physician policy for test set, 
# one should look at the similar patient states within the test set
# this may take a while too, grab a beer ...
test_dist_phy, test_ind_phy = knn_phy.kneighbors(test_embeddings)

In [16]:
kernel_policy_train = actions_2_probs(train_ind, train_actions, train_survivors)
kernel_policy_test = actions_2_probs(test_ind, train_actions, train_survivors)

In [21]:
phy_policy_train = actions_2_probs(train_ind, train_actions, train_survivors, expert='phy')

In [23]:
phy_policy_test = actions_2_probs(test_ind_phy, test_actions, test_survivors, expert='phy')

In [37]:
pkl.dump((kernel_policy_train, kernel_policy_test), open('kernel_actions.pkl', 'wb'))

In [38]:
pkl.dump((phy_policy_train, phy_policy_test), open('phy_actions.pkl', 'wb'))

In [23]:
pkl.dump((train_dist, train_ind), open('kernel_knn_train.pkl', 'wb'))
pkl.dump((test_from_train_dist, test_from_train_ind ), open('kernel_knn_test_from_train.pkl', 'wb'))
pkl.dump((test_dist_phy, test_ind_phy), open('kernel_knn_test.pkl', 'wb'))