In [1]:
""" Calculate behavior cloning performance.
To use this file, you should:
- change timestep
"""
import sys

import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader

sys.path.append('F:/time_step/OfflineRL_FactoredActions')
from RL_mimic_sepsis.utils.timestep_util import (
    get_state_dim,
    get_horizon
    )


In [2]:
timestep = 8
dataset_type = 'val'  # 'train' or 'test'
num_actions = 25
horizon = get_horizon(timestep)
state_dim = get_state_dim(timestep, 'NormThreshold')

In [3]:

class EpisodicBufferO(Dataset):
    def __init__(self, state_dim, num_actions, horizon, buffer_size=0):
        self.max_size = int(buffer_size)
        self.horizon = horizon
        self.state = torch.zeros((self.max_size, horizon, state_dim))
        self.action = torch.zeros((self.max_size, horizon, 1), dtype=torch.long)
        self.reward = torch.zeros((self.max_size, horizon, 1))
        self.not_done = torch.zeros((self.max_size, horizon, 1))
        self.pibs = torch.zeros((self.max_size, horizon, num_actions))
        self.estm_pibs = torch.zeros((self.max_size, horizon, num_actions))
    
    def __len__(self):
        return len(self.state)
    
    def __getitem__(self, idx):
        return (
            self.state[idx],
            self.action[idx],
            self.reward[idx],
            self.not_done[idx],
            self.pibs[idx],
            self.estm_pibs[idx],
        )
    
    def load(self, filename):
        data = torch.load(filename)
        self.state = data['statevecs'][:, :-1, :]
        self.action = data['actions'][:, 1:].unsqueeze(-1)  # Need to offset by 1 so that we predict actions that have not yet occurred
        self.reward = data['rewards'][:, 1:].unsqueeze(-1)  # Need to offset by 1
        self.not_done = data['notdones'][:, 1:].unsqueeze(-1)
        self.pibs = data['pibs'][:, :-1, :]
        self.estm_pibs = data['estm_pibs'][:, :-1, :]
        print(f"Episodic Buffer loaded with {len(self)} episides.")


In [4]:
from types import SimpleNamespace

def remap_rewards(R, args):
    R = np.select([R == 0, R == -1, R == 1], [args.R_immed, args.R_death, args.R_disch,], R)
    return torch.tensor(R)

In [5]:
test_episodes_O = EpisodicBufferO(state_dim, num_actions, horizon)
test_episodes_O.load((rf'F:\time_step\OfflineRL_FactoredActions\RL_mimic_sepsis\data'
                      rf'\data_asNormThreshold_dt{timestep}h\episodes+encoded_state+knn_pibs_final/{dataset_type}_data.pt'))
test_episodes_O.reward = remap_rewards(test_episodes_O.reward, SimpleNamespace(**{'R_immed': 0.0, 'R_death': 0.0, 'R_disch': 100.0}))

tmp_test_episodes_loader_O = DataLoader(test_episodes_O, batch_size=len(test_episodes_O), shuffle=False)
test_batch_O = next(iter(tmp_test_episodes_loader_O))

Episodic Buffer loaded with 2757 episides.


In [6]:
# get knn highest probability action index, check agreement with 

In [7]:
states, actions, rewards, not_dones, pibs, estm_pibs = test_batch_O
rewards = rewards[:, :, 0].cpu().numpy()
n, horizon, _ = states.shape
discounted_rewards = rewards * (1.0 ** np.arange(horizon))

In [8]:
estm_pibs.argmax(dim=2).shape

torch.Size([2757, 10])

In [9]:
actions.shape

torch.Size([2757, 10, 1])

In [10]:
# rough estimate
(estm_pibs.argmax(dim=2) == actions.squeeze()).to(float).mean()

tensor(0.7103, dtype=torch.float64)

In [11]:
# top 1 knn action
cnt_match, cnt_all = 0.0, 0.0
for idx in range(n):
    lng = (not_dones[idx, :, 0].sum() + 1).item()  # all but the final transition has notdone==1
    a_obs = actions[idx, :lng, 0]
    a_prd = estm_pibs[idx, :lng].argmax(dim=-1)
    cnt_all += lng
    cnt_match += (a_obs == a_prd).to(float).sum()

print(cnt_match/cnt_all)

tensor(0.5302, dtype=torch.float64)


In [12]:
# top 2 knn actions
cnt_match, cnt_all = 0.0, 0.0
for idx in range(n):
    lng = (not_dones[idx, :, 0].sum() + 1).item()  # all but the final transition has notdone==1
    a_obs = actions[idx, :lng, 0]
    a_prd = torch.argsort(estm_pibs[idx, :lng], descending=True, dim=-1)
    cnt_all += lng
    cnt_match += (a_obs.unsqueeze(1) == a_prd[:, :2]).to(float).sum(dim=1).sum()

print(cnt_match/cnt_all)

tensor(0.7500, dtype=torch.float64)


In [13]:
# top 5 knn actions
cnt_match, cnt_all = 0.0, 0.0
for idx in range(n):
    lng = (not_dones[idx, :, 0].sum() + 1).item()  # all but the final transition has notdone==1
    a_obs = actions[idx, :lng, 0]
    a_prd = torch.argsort(estm_pibs[idx, :lng], descending=True, dim=-1)
    cnt_all += lng
    cnt_match += (a_obs.unsqueeze(1) == a_prd[:, :5]).to(float).sum(dim=1).sum()

print(cnt_match/cnt_all)

tensor(0.8974, dtype=torch.float64)


In [14]:
# top 10 knn actions
cnt_match, cnt_all = 0.0, 0.0
for idx in range(n):
    lng = (not_dones[idx, :, 0].sum() + 1).item()  # all but the final transition has notdone==1
    a_obs = actions[idx, :lng, 0]
    a_prd = torch.argsort(estm_pibs[idx, :lng], descending=True, dim=-1)
    cnt_all += lng
    cnt_match += (a_obs.unsqueeze(1) == a_prd[:, :10]).to(float).sum(dim=1).sum()

print(cnt_match/cnt_all)

tensor(0.9601, dtype=torch.float64)


In [15]:
cnt_match, cnt_all = 0.0, 0.0
for idx in range(n):
    lng = (not_dones[idx, :, 0].sum() + 1).item()  # all but the final transition has notdone==1
    a_obs = actions[idx, :lng, 0]
    a_prd = pibs[idx, :lng].argmax(dim=-1)
    cnt_all += lng
    cnt_match += (a_obs == a_prd).to(float).sum()

print(cnt_match/cnt_all)

tensor(0.5276, dtype=torch.float64)


In [16]:
import numpy as np
from sklearn.metrics import roc_auc_score

def compute_bootstrapped_auroc(y_true, y_score, n_bootstraps=1000, seed=42, avg=['macro']):
    rng = np.random.RandomState(seed)
    bootstrapped_scores = []

    for _ in range(n_bootstraps):
        indices = rng.randint(0, len(y_true), len(y_true))
        try:
            score = roc_auc_score(y_true[indices], y_score[indices], multi_class='ovr', average=avg)
            bootstrapped_scores.append(score)
        except ValueError:
            continue  

    scores = np.array(bootstrapped_scores)
    mean = np.mean(scores)
    lower = np.percentile(scores, 2.5)
    upper = np.percentile(scores, 97.5)
    return mean, lower, upper

In [17]:
y_true = []
y_score = []

for idx in range(n):
    # length of this traj (all but final have notdone==1, plus the terminal step)
    lng = (not_dones[idx, :, 0].sum() + 1).item()

    # observed actions and predicted "probabilities" for this traj
    a_obs = actions[idx, :lng, 0]               
    pibs  = estm_pibs[idx, :lng, :]             

    # extend our global lists
    y_true.extend(a_obs.cpu().tolist())
    y_score.extend(pibs.cpu().tolist())

# convert to numpy arrays
y_true  = np.array(y_true)                       # shape [total_steps]
y_score = np.vstack(y_score)                     # shape [total_steps, num_actions]

# compute multi‐class AUROC (macro‐avg)
macro_auroc = roc_auc_score(y_true, y_score, multi_class='ovr', average='macro')
weighted_auroc = roc_auc_score(y_true, y_score, multi_class='ovr', average='weighted')
micro_auroc = roc_auc_score(y_true, y_score, multi_class='ovr', average='micro')

print(f"Macro-averaged AUROC: {macro_auroc:.4f}")
print(f"Weighted-averaged AUROC: {weighted_auroc:.4f}")
print(f"Micro-averaged AUROC: {micro_auroc:.4f}")


Macro-averaged AUROC: 0.7748
Weighted-averaged AUROC: 0.7168
Micro-averaged AUROC: 0.9484


In [18]:
pibs

tensor([[0.2459, 0.0028, 0.0036, 0.0007, 0.0007, 0.4940, 0.0334, 0.0220, 0.0192,
         0.0107, 0.0931, 0.0092, 0.0071, 0.0036, 0.0050, 0.0320, 0.0007, 0.0036,
         0.0028, 0.0028, 0.0028, 0.0007, 0.0021, 0.0007, 0.0007],
        [0.2210, 0.0014, 0.0014, 0.0014, 0.0028, 0.6105, 0.0156, 0.0156, 0.0092,
         0.0135, 0.0668, 0.0050, 0.0028, 0.0021, 0.0036, 0.0178, 0.0014, 0.0021,
         0.0021, 0.0021, 0.0014, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.2786, 0.0078, 0.0028, 0.0014, 0.0028, 0.5529, 0.0128, 0.0220, 0.0213,
         0.0178, 0.0362, 0.0050, 0.0036, 0.0036, 0.0071, 0.0121, 0.0007, 0.0036,
         0.0021, 0.0050, 0.0007, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.2921, 0.0114, 0.0036, 0.0021, 0.0036, 0.5259, 0.0235, 0.0263, 0.0284,
         0.0220, 0.0320, 0.0036, 0.0028, 0.0021, 0.0064, 0.0071, 0.0007, 0.0021,
         0.0028, 0.0014, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.3369, 0.0135, 0.0007, 0.0036, 0.0021, 0.4968, 0.0263, 0.0142, 0.0249,
       

In [19]:
estm_pibs

tensor([[[0.2587, 0.0028, 0.0036,  ..., 0.0000, 0.0007, 0.0014],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         ...,
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],

        [[0.2090, 0.0043, 0.0021,  ..., 0.0028, 0.0028, 0.0036],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         ...,
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],

        [[0.2402, 0.0021, 0.0014,  ..., 0.0014, 0.0092, 0.0114],
         [0.2566, 0.0021, 0.0014,  ..., 0.0021, 0.0036, 0.0028],
         [0.2168, 0.0014, 0.0000,  ..., 0.0014, 0.0007, 0.

In [20]:
mean, lower, upper = compute_bootstrapped_auroc(y_true, y_score, avg='macro')
print(f"Macro AUROC over all steps: {mean:.4f} [95% CI: {lower:.4f}, {upper:.4f}]")

Macro AUROC over all steps: 0.7754 [95% CI: 0.7601, 0.7883]


In [21]:
mean, lower, upper = compute_bootstrapped_auroc(y_true, y_score, avg='weighted')
print(f"Weighted AUROC over all steps: {mean:.4f} [95% CI: {lower:.4f}, {upper:.4f}]")

Weighted AUROC over all steps: 0.7169 [95% CI: 0.7111, 0.7223]


In [22]:
mean, lower, upper = compute_bootstrapped_auroc(y_true, y_score, avg='micro')
print(f"Micro AUROC over all steps: {mean:.4f} [95% CI: {lower:.4f}, {upper:.4f}]")

Micro AUROC over all steps: 0.9483 [95% CI: 0.9466, 0.9501]


In [23]:
'''  Example Output For Timestep 1:
Test:
Macro-averaged AUROC: 0.7811
Weighted-averaged AUROC: 0.7860
Micro-averaged AUROC: 0.9477

Val:
Macro-averaged AUROC: 0.7714
Weighted-averaged AUROC: 0.7752
Micro-averaged AUROC: 0.9449

Train:
Macro-averaged AUROC: 1.0000
Weighted-averaged AUROC: 1.0000
Micro-averaged AUROC: 1.0000

'''

'  Example Output For Timestep 1:\nTest:\nMacro-averaged AUROC: 0.7811\nWeighted-averaged AUROC: 0.7860\nMicro-averaged AUROC: 0.9477\n\nVal:\nMacro-averaged AUROC: 0.7714\nWeighted-averaged AUROC: 0.7752\nMicro-averaged AUROC: 0.9449\n\nTrain:\nMacro-averaged AUROC: 1.0000\nWeighted-averaged AUROC: 1.0000\nMicro-averaged AUROC: 1.0000\n\n'