In [1]:
import os

import numpy as np
import pandas as pd
import torch
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier

In [2]:
action_space = 'NormThreshold'
timestep = '8'  
# F:/time_step/OfflineRL_FactoredActions/RL_mimic_sepsis
base_dir = (f'F:/time_step/OfflineRL_FactoredActions/RL_mimic_sepsis/data'
            f'/data_as{action_space}_dt{timestep}h/episodes+encoded_state_128')

train_data = torch.load(f'{base_dir}/train_data.pt')
val_data = torch.load(f'{base_dir}/val_data.pt')
test_data = torch.load(f'{base_dir}/test_data.pt')

In [3]:
train_data.keys()

dict_keys(['index', 'icustayids', 'lengths', 'times', 'notdones', 'demographics', 'observations', 'acuities', 'rewards', 'actions', 'actionvecs', 'subactions', 'subactionvecs', 'statevecs'])

In [4]:
print(len(train_data['icustayids']), len(val_data['icustayids']), len(test_data['icustayids']))

12863 2757 2757


### Make Flattened (S,A) Dataset From Episodic Dataset

In [5]:
train_statevecs, train_actions = [], []
for i in range(len(train_data['icustayids'])):
    lng = train_data['lengths'][i]
    train_statevecs.append(train_data['statevecs'][i][:lng-1].cpu().numpy())
    train_actions.append(train_data['actions'][i][1:lng].cpu().numpy())

train_statevecs = np.vstack(train_statevecs)
train_actions = np.concatenate(train_actions)
print(train_statevecs.shape, train_actions.shape)

(79212, 128) (79212,)


In [6]:
val_statevecs, val_actions = [], []
for i in range(len(val_data['icustayids'])):
    lng = val_data['lengths'][i]
    val_statevecs.append(val_data['statevecs'][i][:lng-1].cpu().numpy())
    val_actions.append(val_data['actions'][i][1:lng].cpu().numpy())

val_statevecs = np.vstack(val_statevecs)
val_actions = np.concatenate(val_actions)
print(val_statevecs.shape, val_actions.shape)

(17004, 128) (17004,)


In [7]:
test_statevecs, test_actions = [], []
for i in range(len(test_data['icustayids'])):
    lng = test_data['lengths'][i]
    test_statevecs.append(test_data['statevecs'][i][:lng-1].cpu().numpy())
    test_actions.append(test_data['actions'][i][1:lng].cpu().numpy())

test_statevecs = np.vstack(test_statevecs)
test_actions = np.concatenate(test_actions)
print(test_statevecs.shape, test_actions.shape)

(16909, 128) (16909,)


## Analyze

In [8]:
train_statevecs.shape

(79212, 128)

In [9]:
train_actions.shape

(79212,)

In [10]:
np.unique(train_actions)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24], dtype=int64)

## KNN behavior policy

In [11]:
K_train = 1407
K_val = 651
weight = 'uniform'  # 'distance'

In [12]:
clf_train = KNeighborsClassifier(
    n_neighbors=K_train,
    weights= weight,
    metric='minkowski',
    p=2
    )

clf_train.fit(train_statevecs, train_actions)
train_pibs = clf_train.predict_proba(train_statevecs)
train_estm_pibs = train_pibs

In [13]:
clf_val = KNeighborsClassifier(
    n_neighbors=K_val,
    weights=weight,
    metric='minkowski',
    p=2
    )
clf_val.fit(val_statevecs, val_actions)
val_pibs = clf_val.predict_proba(val_statevecs)
val_estm_pibs = clf_train.predict_proba(val_statevecs)

In [14]:
clf_test = KNeighborsClassifier(
    n_neighbors=K_val,
    weights=weight,
    metric='minkowski',
    p=2
    )
clf_test.fit(test_statevecs, test_actions)
test_pibs = clf_test.predict_proba(test_statevecs)
test_estm_pibs = clf_train.predict_proba(test_statevecs)

In [15]:
with open(f'F:/time_step/OfflineRL_FactoredActions/RL_mimic_sepsis/data'
            f'/data_as{action_space}_dt{timestep}h/knn_output_ktrain{K_train}_kval{K_val}_latent128.npz', 'wb') as f:
    np.savez(
        f,
        train_pibs=train_pibs,
        train_estm_pibs=train_estm_pibs,
        val_pibs=val_pibs,
        val_estm_pibs=val_estm_pibs,
        test_pibs=test_pibs,
        test_estm_pibs=test_estm_pibs,
    )


## Assign KNN results back to episodes

In [16]:
train_data['pibs'] = torch.zeros_like(train_data['actionvecs'])
train_data['estm_pibs'] = torch.zeros_like(train_data['actionvecs'])
ptr = 0
for i in range(len(train_data['icustayids'])):
    lng = train_data['lengths'][i]
    train_data['pibs'][i, :lng-1, :] = torch.tensor(train_pibs[ptr:ptr+lng-1])
    train_data['estm_pibs'][i, :lng-1, :] = torch.tensor(train_estm_pibs[ptr:ptr+lng-1])
    ptr += lng-1

In [17]:
val_data['pibs'] = torch.zeros_like(val_data['actionvecs'])
val_data['estm_pibs'] = torch.zeros_like(val_data['actionvecs'])
ptr = 0
for i in range(len(val_data['icustayids'])):
    lng = val_data['lengths'][i]
    val_data['pibs'][i, :lng-1, :] = torch.tensor(val_pibs[ptr:ptr+lng-1])
    val_data['estm_pibs'][i, :lng-1, :] = torch.tensor(val_estm_pibs[ptr:ptr+lng-1])
    ptr += lng-1

In [18]:
test_data['pibs'] = torch.zeros_like(test_data['actionvecs'])
test_data['estm_pibs'] = torch.zeros_like(test_data['actionvecs'])
ptr = 0
for i in range(len(test_data['icustayids'])):
    lng = test_data['lengths'][i]
    test_data['pibs'][i, :lng-1, :] = torch.tensor(test_pibs[ptr:ptr+lng-1])
    test_data['estm_pibs'][i, :lng-1, :] = torch.tensor(test_estm_pibs[ptr:ptr+lng-1])
    ptr += lng-1

In [19]:
# Check consistency of sample size of outputs
for data in [train_data, val_data, test_data]:
    num_episodes = len(data['index'])
    for k, v in data.items():
        assert len(v) == num_episodes

In [20]:
save_dir = (f'F:/time_step/OfflineRL_FactoredActions/RL_mimic_sepsis/data'
            f'/data_as{action_space}_dt{timestep}h/episodes+encoded_state_128+knn_pibs')
os.makedirs(save_dir, exist_ok=True) 

torch.save(train_data, f'{save_dir}/train_data.pt')
torch.save(val_data, f'{save_dir}/val_data.pt')
torch.save(test_data, f'{save_dir}/test_data.pt')

In [21]:
# Check consistency of episode length and feature vector
for i in range(100):
    lng = train_data['lengths'][i]
    assert (train_data['observations'][i][lng:] == 0).all()
    assert (train_data['pibs'][i][lng-1:] == 0).all()
    assert (train_data['estm_pibs'][i][lng-1:] == 0).all()