# Off-Policy Evaluation for DrinkLess Dataset

This notebook demonstrates off-policy evaluation (OPE) using importance sampling for various bandit agents on the DrinkLess dataset.

In [1]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
from typing import Dict, List, Tuple

# Set random seed for reproducibility
np.random.seed(42)

from bart_playground.bandit.ope import evaluate_agents, instantiate_agents
from drinkless import get_DrinkLess

## Import Agents

In [2]:
from bart_playground.bandit.basic_agents import SillyAgent, LinearTSAgent
from bart_playground.bandit.TEagents import TEAgent
from bart_playground.bandit.bart_agent import DefaultBARTAgent, LogisticBARTAgent
from compare_agents import AgentSpec

def get_agent_specs() -> List[AgentSpec]:
    """Generate agent specifications similar to gen_agents in R"""
    return [
        ("Random", SillyAgent, {'random_state': 0}),
        ("LinearTS", LinearTSAgent, {'v': 1, 'random_state': 0}),
        ("XGBoostTS", TEAgent, {'agent_type': 'xgboost', 'random_state': 0}),
        ("RFTS", TEAgent, {'agent_type': 'random_forest', 'random_state': 0}),
        ("DefaultBARTo", DefaultBARTAgent, {
            'ndpost': 50, 'nskip': 20, 'nadd': 1, 'n_trees': 25,
            'encoding': 'one-hot', 'random_state': 0
        }),
        ('LogisticBARTo', LogisticBARTAgent, {
            'ndpost': 50, 'nskip': 20, 'nadd': 1, 'n_trees': 25,
            'encoding': 'one-hot', 'random_state': 0
        }),
        ("DefaultBARTm", DefaultBARTAgent, {
            'ndpost': 50, 'nskip': 20, 'nadd': 1, 'n_trees': 25,
            'encoding': 'multi', 'random_state': 0
        }),
        ('LogisticBARTm', LogisticBARTAgent, {
            'ndpost': 50, 'nskip': 20, 'nadd': 1, 'n_trees': 25,
            'encoding': 'multi', 'random_state': 0
        }),
    ]

## Load DrinkLess Dataset

In [3]:
# Load the DrinkLess dataset
dt = get_DrinkLess()

print("Dataset info:")
print(f"Context shape: {dt['context'].shape}")
print(f"Actions: {np.unique(dt['action'])}")
print(f"Rewards range: [{dt['reward'].min():.2f}, {dt['reward'].max():.2f}]")

# Filter out the 'ID' column if it exists
if 'ID' in dt['context']:
    dt['context'] = dt['context'].drop(columns=['ID'])

n_features = dt['context'].shape[1]
n_arms = len(np.unique(dt['action']))
n_draw = len(dt['action'])

print(f"\nParameters: n_features={n_features}, n_arms={n_arms}, n_draw={n_draw}")

No mismatches in Dataset A or C.
Dataset A and C align.
Loaded DrinkLess data for outcome simulation.
Dataset info:
Context shape: (10470, 4)
Actions: [1 2]
Rewards range: [0.00, 2.00]

Parameters: n_features=3, n_arms=2, n_draw=10470


In [4]:
# Percentage of data with reward > 1
print(f"Percentage of data with reward > 1: {np.mean(dt['reward'] > 1) * 100:.2f}%")

# Flatten >1 rewards to 1
dt['reward'] = np.where(dt['reward'] > 1, 1, dt['reward'])

Percentage of data with reward > 1: 0.11%


## Bootstrap OPE Evaluation

Following the R code pattern with bootstrap resampling and reduced sample size.

In [5]:
# Bootstrap parameters
B = 8  # Number of bootstrap samples
sample_fraction = 0.1  # Use x% of the data per bootstrap

# Get agent specifications
agent_specs = get_agent_specs()
agent_names = [spec[0] for spec in agent_specs]

print(f"Evaluating {len(agent_specs)} agents: {agent_names}")
print(f"Bootstrap samples: {B}, Sample size per bootstrap: {int(n_draw * sample_fraction)}")

Evaluating 8 agents: ['Random', 'LinearTS', 'XGBoostTS', 'RFTS', 'DefaultBARTo', 'LogisticBARTo', 'DefaultBARTm', 'LogisticBARTm']
Bootstrap samples: 8, Sample size per bootstrap: 1047


In [6]:
# Bootstrap evaluation loop
ev_total = []

for b in range(B):
    print(f"\nBootstrap sample {b+1}/{B}")
    
    # Resample indices (matching R: sample(1:n_draw, n_draw/10, replace=TRUE))
    resampled_size = int(n_draw * sample_fraction)
    resampled_ind = np.random.choice(n_draw, size=resampled_size, replace=True)
    
    # Create resampled dataset
    dt_re = {
        'context': dt['context'].iloc[resampled_ind].values if hasattr(dt['context'], 'iloc') else dt['context'][resampled_ind],
        'action': np.array(dt['action'])[resampled_ind],
        'reward': np.array(dt['reward'])[resampled_ind]
    }
    
    # Create fresh agents for this bootstrap sample
    agents = instantiate_agents(agent_specs, n_arms, n_features, sim=b)
    
    # DrinkLess propensity scores are [0.4, 0.6] for all data
    propensity_scores = np.array([[0.4, 0.6]] * dt_re['action'].shape[0])

    # Evaluate agents using OPE
    evres = evaluate_agents(
        dt_re, agents, n_arms, agent_names,
        propensity_scores=propensity_scores,
        show_progress=False
    )

    # Store results
    ev_total.append(evres)
    print(f"Bootstrap {b+1} results: {evres}")

INFO:root:t = 10 - re-trained model



Bootstrap sample 1/8
Random Forest parameters - exploration_variance: 1.0 n_estimators: 100
Evaluating agent: Random
Estimated policy value for Random: 0.0701
Evaluating agent: LinearTS
Estimated policy value for LinearTS: 0.0948
Evaluating agent: XGBoostTS


INFO:root:t = 11 - re-trained model
INFO:root:t = 13 - re-trained model
INFO:root:t = 14 - re-trained model
INFO:root:t = 16 - re-trained model
INFO:root:t = 18 - re-trained model
INFO:root:t = 21 - re-trained model
INFO:root:t = 23 - re-trained model
INFO:root:t = 26 - re-trained model
INFO:root:t = 30 - re-trained model
INFO:root:t = 34 - re-trained model
INFO:root:t = 38 - re-trained model
INFO:root:t = 43 - re-trained model
INFO:root:t = 49 - re-trained model
INFO:root:t = 55 - re-trained model
INFO:root:t = 62 - re-trained model
INFO:root:t = 71 - re-trained model
INFO:root:t = 80 - re-trained model
INFO:root:t = 91 - re-trained model
INFO:root:t = 103 - re-trained model
INFO:root:t = 116 - re-trained model
INFO:root:t = 131 - re-trained model
INFO:root:t = 149 - re-trained model
INFO:root:t = 169 - re-trained model
INFO:root:t = 191 - re-trained model
INFO:root:t = 216 - re-trained model
INFO:root:t = 245 - re-trained model
INFO:root:t = 278 - re-trained model
INFO:root:t = 315 -

Estimated policy value for XGBoostTS: 0.0821
Evaluating agent: RFTS


INFO:root:t = 11 - re-trained model
INFO:root:t = 13 - re-trained model
INFO:root:t = 14 - re-trained model
INFO:root:t = 16 - re-trained model
INFO:root:t = 18 - re-trained model
INFO:root:t = 21 - re-trained model
INFO:root:t = 23 - re-trained model
INFO:root:t = 26 - re-trained model
INFO:root:t = 30 - re-trained model
INFO:root:t = 34 - re-trained model
INFO:root:t = 38 - re-trained model
INFO:root:t = 43 - re-trained model
INFO:root:t = 49 - re-trained model
INFO:root:t = 55 - re-trained model
INFO:root:t = 62 - re-trained model
INFO:root:t = 71 - re-trained model
INFO:root:t = 80 - re-trained model
INFO:root:t = 91 - re-trained model
INFO:root:t = 103 - re-trained model
INFO:root:t = 116 - re-trained model
INFO:root:t = 131 - re-trained model
INFO:root:t = 149 - re-trained model
INFO:root:t = 169 - re-trained model
INFO:root:t = 191 - re-trained model
INFO:root:t = 216 - re-trained model
INFO:root:t = 245 - re-trained model
INFO:root:t = 278 - re-trained model
INFO:root:t = 315 -

Estimated policy value for RFTS: 0.1123
Evaluating agent: DefaultBARTo
Fitting initial BART model with first 6 observation(s)... Done.
Estimated policy value for DefaultBARTo: 0.1203
Evaluating agent: LogisticBARTo
Fitting initial BART model with first 6 observation(s)... Done.
Estimated policy value for LogisticBARTo: 0.1213
Evaluating agent: DefaultBARTm
Fitting initial BART model with first 6 observation(s)... Done.
Estimated policy value for DefaultBARTm: 0.1090
Evaluating agent: LogisticBARTm
Fitting initial BART model with first 6 observation(s)... Done.


INFO:root:t = 10 - re-trained model


Estimated policy value for LogisticBARTm: 0.1123
Bootstrap 1 results: {'Random': np.float64(0.07013574660633502), 'LinearTS': np.float64(0.09477911646586407), 'XGBoostTS': np.float64(0.08212927756654018), 'RFTS': np.float64(0.11230388109000931), 'DefaultBARTo': np.float64(0.12033195020747005), 'LogisticBARTo': np.float64(0.12133550488599472), 'DefaultBARTm': np.float64(0.10895883777239804), 'LogisticBARTm': np.float64(0.11229508196721417)}

Bootstrap sample 2/8
Random Forest parameters - exploration_variance: 1.0 n_estimators: 100
Evaluating agent: Random
Estimated policy value for Random: 0.0776
Evaluating agent: LinearTS
Estimated policy value for LinearTS: 0.0901
Evaluating agent: XGBoostTS


INFO:root:t = 11 - re-trained model
INFO:root:t = 13 - re-trained model
INFO:root:t = 14 - re-trained model
INFO:root:t = 16 - re-trained model
INFO:root:t = 18 - re-trained model
INFO:root:t = 21 - re-trained model
INFO:root:t = 23 - re-trained model
INFO:root:t = 26 - re-trained model
INFO:root:t = 30 - re-trained model
INFO:root:t = 34 - re-trained model
INFO:root:t = 38 - re-trained model
INFO:root:t = 43 - re-trained model
INFO:root:t = 49 - re-trained model
INFO:root:t = 55 - re-trained model
INFO:root:t = 62 - re-trained model
INFO:root:t = 71 - re-trained model
INFO:root:t = 80 - re-trained model
INFO:root:t = 91 - re-trained model
INFO:root:t = 103 - re-trained model
INFO:root:t = 116 - re-trained model
INFO:root:t = 131 - re-trained model
INFO:root:t = 149 - re-trained model
INFO:root:t = 169 - re-trained model
INFO:root:t = 191 - re-trained model
INFO:root:t = 216 - re-trained model
INFO:root:t = 245 - re-trained model
INFO:root:t = 278 - re-trained model
INFO:root:t = 315 -

Estimated policy value for XGBoostTS: 0.0893
Evaluating agent: RFTS


INFO:root:t = 11 - re-trained model
INFO:root:t = 13 - re-trained model
INFO:root:t = 14 - re-trained model
INFO:root:t = 16 - re-trained model
INFO:root:t = 18 - re-trained model
INFO:root:t = 21 - re-trained model
INFO:root:t = 23 - re-trained model
INFO:root:t = 26 - re-trained model
INFO:root:t = 30 - re-trained model
INFO:root:t = 34 - re-trained model
INFO:root:t = 38 - re-trained model
INFO:root:t = 43 - re-trained model
INFO:root:t = 49 - re-trained model
INFO:root:t = 55 - re-trained model
INFO:root:t = 62 - re-trained model
INFO:root:t = 71 - re-trained model
INFO:root:t = 80 - re-trained model
INFO:root:t = 91 - re-trained model
INFO:root:t = 103 - re-trained model
INFO:root:t = 116 - re-trained model
INFO:root:t = 131 - re-trained model
INFO:root:t = 149 - re-trained model
INFO:root:t = 169 - re-trained model
INFO:root:t = 191 - re-trained model
INFO:root:t = 216 - re-trained model
INFO:root:t = 245 - re-trained model
INFO:root:t = 278 - re-trained model
INFO:root:t = 315 -

Estimated policy value for RFTS: 0.1136
Evaluating agent: DefaultBARTo
Fitting initial BART model with first 7 observation(s)... Done.
Estimated policy value for DefaultBARTo: 0.1157
Evaluating agent: LogisticBARTo
Fitting initial BART model with first 7 observation(s)... Done.
Estimated policy value for LogisticBARTo: 0.1074
Evaluating agent: DefaultBARTm
Fitting initial BART model with first 7 observation(s)... Done.
Estimated policy value for DefaultBARTm: 0.1114
Evaluating agent: LogisticBARTm
Fitting initial BART model with first 7 observation(s)... Done.


INFO:root:t = 10 - re-trained model


Estimated policy value for LogisticBARTm: 0.1164
Bootstrap 2 results: {'Random': np.float64(0.07759303246239141), 'LinearTS': np.float64(0.09009009009009068), 'XGBoostTS': np.float64(0.08928571428571465), 'RFTS': np.float64(0.11363636363636455), 'DefaultBARTo': np.float64(0.11566265060241065), 'LogisticBARTo': np.float64(0.10743134087237581), 'DefaultBARTm': np.float64(0.11138014527845136), 'LogisticBARTm': np.float64(0.11642743221690688)}

Bootstrap sample 3/8
Random Forest parameters - exploration_variance: 1.0 n_estimators: 100
Evaluating agent: Random
Estimated policy value for Random: 0.0702
Evaluating agent: LinearTS
Estimated policy value for LinearTS: 0.1049
Evaluating agent: XGBoostTS


INFO:root:t = 11 - re-trained model
INFO:root:t = 13 - re-trained model
INFO:root:t = 14 - re-trained model
INFO:root:t = 16 - re-trained model
INFO:root:t = 18 - re-trained model
INFO:root:t = 21 - re-trained model
INFO:root:t = 23 - re-trained model
INFO:root:t = 26 - re-trained model
INFO:root:t = 30 - re-trained model
INFO:root:t = 34 - re-trained model
INFO:root:t = 38 - re-trained model
INFO:root:t = 43 - re-trained model
INFO:root:t = 49 - re-trained model
INFO:root:t = 55 - re-trained model
INFO:root:t = 62 - re-trained model
INFO:root:t = 71 - re-trained model
INFO:root:t = 80 - re-trained model
INFO:root:t = 91 - re-trained model
INFO:root:t = 103 - re-trained model
INFO:root:t = 116 - re-trained model
INFO:root:t = 131 - re-trained model
INFO:root:t = 149 - re-trained model
INFO:root:t = 169 - re-trained model
INFO:root:t = 191 - re-trained model
INFO:root:t = 216 - re-trained model
INFO:root:t = 245 - re-trained model
INFO:root:t = 278 - re-trained model
INFO:root:t = 315 -

Estimated policy value for XGBoostTS: 0.0874
Evaluating agent: RFTS


INFO:root:t = 11 - re-trained model
INFO:root:t = 13 - re-trained model
INFO:root:t = 14 - re-trained model
INFO:root:t = 16 - re-trained model
INFO:root:t = 18 - re-trained model
INFO:root:t = 21 - re-trained model
INFO:root:t = 23 - re-trained model
INFO:root:t = 26 - re-trained model
INFO:root:t = 30 - re-trained model
INFO:root:t = 34 - re-trained model
INFO:root:t = 38 - re-trained model
INFO:root:t = 43 - re-trained model
INFO:root:t = 49 - re-trained model
INFO:root:t = 55 - re-trained model
INFO:root:t = 62 - re-trained model
INFO:root:t = 71 - re-trained model
INFO:root:t = 80 - re-trained model
INFO:root:t = 91 - re-trained model
INFO:root:t = 103 - re-trained model
INFO:root:t = 116 - re-trained model
INFO:root:t = 131 - re-trained model
INFO:root:t = 149 - re-trained model
INFO:root:t = 169 - re-trained model
INFO:root:t = 191 - re-trained model
INFO:root:t = 216 - re-trained model
INFO:root:t = 245 - re-trained model
INFO:root:t = 278 - re-trained model
INFO:root:t = 315 -

Estimated policy value for RFTS: 0.1107
Evaluating agent: DefaultBARTo
Fitting initial BART model with first 4 observation(s)... Done.
Estimated policy value for DefaultBARTo: 0.1145
Evaluating agent: LogisticBARTo
Fitting initial BART model with first 4 observation(s)... Done.
Estimated policy value for LogisticBARTo: 0.1205
Evaluating agent: DefaultBARTm
Fitting initial BART model with first 4 observation(s)... Done.
Estimated policy value for DefaultBARTm: 0.1109
Evaluating agent: LogisticBARTm
Fitting initial BART model with first 4 observation(s)... Done.


INFO:root:t = 10 - re-trained model


Estimated policy value for LogisticBARTm: 0.1108
Bootstrap 3 results: {'Random': np.float64(0.07021604938271626), 'LinearTS': np.float64(0.10485736314572144), 'XGBoostTS': np.float64(0.0874363327674029), 'RFTS': np.float64(0.11066235864297351), 'DefaultBARTo': np.float64(0.11451612903225908), 'LogisticBARTo': np.float64(0.12052117263843772), 'DefaultBARTm': np.float64(0.11093502377179167), 'LogisticBARTm': np.float64(0.11075441412520162)}

Bootstrap sample 4/8
Random Forest parameters - exploration_variance: 1.0 n_estimators: 100
Evaluating agent: Random
Estimated policy value for Random: 0.0665
Evaluating agent: LinearTS
Estimated policy value for LinearTS: 0.0826
Evaluating agent: XGBoostTS


INFO:root:t = 11 - re-trained model
INFO:root:t = 13 - re-trained model
INFO:root:t = 14 - re-trained model
INFO:root:t = 16 - re-trained model
INFO:root:t = 18 - re-trained model
INFO:root:t = 21 - re-trained model
INFO:root:t = 23 - re-trained model
INFO:root:t = 26 - re-trained model
INFO:root:t = 30 - re-trained model
INFO:root:t = 34 - re-trained model
INFO:root:t = 38 - re-trained model
INFO:root:t = 43 - re-trained model
INFO:root:t = 49 - re-trained model
INFO:root:t = 55 - re-trained model
INFO:root:t = 62 - re-trained model
INFO:root:t = 71 - re-trained model
INFO:root:t = 80 - re-trained model
INFO:root:t = 91 - re-trained model
INFO:root:t = 103 - re-trained model
INFO:root:t = 116 - re-trained model
INFO:root:t = 131 - re-trained model
INFO:root:t = 149 - re-trained model
INFO:root:t = 169 - re-trained model
INFO:root:t = 191 - re-trained model
INFO:root:t = 216 - re-trained model
INFO:root:t = 245 - re-trained model
INFO:root:t = 278 - re-trained model
INFO:root:t = 315 -

Estimated policy value for XGBoostTS: 0.0804
Evaluating agent: RFTS


INFO:root:t = 11 - re-trained model
INFO:root:t = 13 - re-trained model
INFO:root:t = 14 - re-trained model
INFO:root:t = 16 - re-trained model
INFO:root:t = 18 - re-trained model
INFO:root:t = 21 - re-trained model
INFO:root:t = 23 - re-trained model
INFO:root:t = 26 - re-trained model
INFO:root:t = 30 - re-trained model
INFO:root:t = 34 - re-trained model
INFO:root:t = 38 - re-trained model
INFO:root:t = 43 - re-trained model
INFO:root:t = 49 - re-trained model
INFO:root:t = 55 - re-trained model
INFO:root:t = 62 - re-trained model
INFO:root:t = 71 - re-trained model
INFO:root:t = 80 - re-trained model
INFO:root:t = 91 - re-trained model
INFO:root:t = 103 - re-trained model
INFO:root:t = 116 - re-trained model
INFO:root:t = 131 - re-trained model
INFO:root:t = 149 - re-trained model
INFO:root:t = 169 - re-trained model
INFO:root:t = 191 - re-trained model
INFO:root:t = 216 - re-trained model
INFO:root:t = 245 - re-trained model
INFO:root:t = 278 - re-trained model
INFO:root:t = 315 -

Estimated policy value for RFTS: 0.1103
Evaluating agent: DefaultBARTo
Fitting initial BART model with first 7 observation(s)... Done.
Estimated policy value for DefaultBARTo: 0.1005
Evaluating agent: LogisticBARTo
Fitting initial BART model with first 7 observation(s)... Done.
Estimated policy value for LogisticBARTo: 0.0997
Evaluating agent: DefaultBARTm
Fitting initial BART model with first 7 observation(s)... Done.
Estimated policy value for DefaultBARTm: 0.0968
Evaluating agent: LogisticBARTm
Fitting initial BART model with first 7 observation(s)... Done.


INFO:root:t = 10 - re-trained model


Estimated policy value for LogisticBARTm: 0.0975
Bootstrap 4 results: {'Random': np.float64(0.06651198762567695), 'LinearTS': np.float64(0.08263836239575478), 'XGBoostTS': np.float64(0.0804140127388539), 'RFTS': np.float64(0.11026033690658577), 'DefaultBARTo': np.float64(0.1005291005291011), 'LogisticBARTo': np.float64(0.09969788519637524), 'DefaultBARTm': np.float64(0.09682416731216177), 'LogisticBARTm': np.float64(0.09752438109527437)}

Bootstrap sample 5/8
Random Forest parameters - exploration_variance: 1.0 n_estimators: 100
Evaluating agent: Random
Estimated policy value for Random: 0.0934
Evaluating agent: LinearTS
Estimated policy value for LinearTS: 0.1056
Evaluating agent: XGBoostTS


INFO:root:t = 11 - re-trained model
INFO:root:t = 13 - re-trained model
INFO:root:t = 14 - re-trained model
INFO:root:t = 16 - re-trained model
INFO:root:t = 18 - re-trained model
INFO:root:t = 21 - re-trained model
INFO:root:t = 23 - re-trained model
INFO:root:t = 26 - re-trained model
INFO:root:t = 30 - re-trained model
INFO:root:t = 34 - re-trained model
INFO:root:t = 38 - re-trained model
INFO:root:t = 43 - re-trained model
INFO:root:t = 49 - re-trained model
INFO:root:t = 55 - re-trained model
INFO:root:t = 62 - re-trained model
INFO:root:t = 71 - re-trained model
INFO:root:t = 80 - re-trained model
INFO:root:t = 91 - re-trained model
INFO:root:t = 103 - re-trained model
INFO:root:t = 116 - re-trained model
INFO:root:t = 131 - re-trained model
INFO:root:t = 149 - re-trained model
INFO:root:t = 169 - re-trained model
INFO:root:t = 191 - re-trained model
INFO:root:t = 216 - re-trained model
INFO:root:t = 245 - re-trained model
INFO:root:t = 278 - re-trained model
INFO:root:t = 315 -

Estimated policy value for XGBoostTS: 0.0988
Evaluating agent: RFTS


INFO:root:t = 11 - re-trained model
INFO:root:t = 13 - re-trained model
INFO:root:t = 14 - re-trained model
INFO:root:t = 16 - re-trained model
INFO:root:t = 18 - re-trained model
INFO:root:t = 21 - re-trained model
INFO:root:t = 23 - re-trained model
INFO:root:t = 26 - re-trained model
INFO:root:t = 30 - re-trained model
INFO:root:t = 34 - re-trained model
INFO:root:t = 38 - re-trained model
INFO:root:t = 43 - re-trained model
INFO:root:t = 49 - re-trained model
INFO:root:t = 55 - re-trained model
INFO:root:t = 62 - re-trained model
INFO:root:t = 71 - re-trained model
INFO:root:t = 80 - re-trained model
INFO:root:t = 91 - re-trained model
INFO:root:t = 103 - re-trained model
INFO:root:t = 116 - re-trained model
INFO:root:t = 131 - re-trained model
INFO:root:t = 149 - re-trained model
INFO:root:t = 169 - re-trained model
INFO:root:t = 191 - re-trained model
INFO:root:t = 216 - re-trained model
INFO:root:t = 245 - re-trained model
INFO:root:t = 278 - re-trained model
INFO:root:t = 315 -

Estimated policy value for RFTS: 0.1191
Evaluating agent: DefaultBARTo
Fitting initial BART model with first 17 observation(s)... Done.
Estimated policy value for DefaultBARTo: 0.1244
Evaluating agent: LogisticBARTo
Fitting initial BART model with first 17 observation(s)... Done.
Estimated policy value for LogisticBARTo: 0.1206
Evaluating agent: DefaultBARTm
Fitting initial BART model with first 17 observation(s)... Done.
Estimated policy value for DefaultBARTm: 0.1154
Evaluating agent: LogisticBARTm
Fitting initial BART model with first 17 observation(s)... Done.


INFO:root:t = 10 - re-trained model


Estimated policy value for LogisticBARTm: 0.1247
Bootstrap 5 results: {'Random': np.float64(0.09339774557165907), 'LinearTS': np.float64(0.10559495665878717), 'XGBoostTS': np.float64(0.09877488514548287), 'RFTS': np.float64(0.11908646003262753), 'DefaultBARTo': np.float64(0.12439418416801414), 'LogisticBARTo': np.float64(0.12057877813504939), 'DefaultBARTm': np.float64(0.11538461538461647), 'LogisticBARTm': np.float64(0.12470023980815453)}

Bootstrap sample 6/8
Random Forest parameters - exploration_variance: 1.0 n_estimators: 100
Evaluating agent: Random
Estimated policy value for Random: 0.0932
Evaluating agent: LinearTS
Estimated policy value for LinearTS: 0.1190
Evaluating agent: XGBoostTS


INFO:root:t = 11 - re-trained model
INFO:root:t = 13 - re-trained model
INFO:root:t = 14 - re-trained model
INFO:root:t = 16 - re-trained model
INFO:root:t = 18 - re-trained model
INFO:root:t = 21 - re-trained model
INFO:root:t = 23 - re-trained model
INFO:root:t = 26 - re-trained model
INFO:root:t = 30 - re-trained model
INFO:root:t = 34 - re-trained model
INFO:root:t = 38 - re-trained model
INFO:root:t = 43 - re-trained model
INFO:root:t = 49 - re-trained model
INFO:root:t = 55 - re-trained model
INFO:root:t = 62 - re-trained model
INFO:root:t = 71 - re-trained model
INFO:root:t = 80 - re-trained model
INFO:root:t = 91 - re-trained model
INFO:root:t = 103 - re-trained model
INFO:root:t = 116 - re-trained model
INFO:root:t = 131 - re-trained model
INFO:root:t = 149 - re-trained model
INFO:root:t = 169 - re-trained model
INFO:root:t = 191 - re-trained model
INFO:root:t = 216 - re-trained model
INFO:root:t = 245 - re-trained model
INFO:root:t = 278 - re-trained model
INFO:root:t = 315 -

Estimated policy value for XGBoostTS: 0.1012
Evaluating agent: RFTS


INFO:root:t = 11 - re-trained model
INFO:root:t = 13 - re-trained model
INFO:root:t = 14 - re-trained model
INFO:root:t = 16 - re-trained model
INFO:root:t = 18 - re-trained model
INFO:root:t = 21 - re-trained model
INFO:root:t = 23 - re-trained model
INFO:root:t = 26 - re-trained model
INFO:root:t = 30 - re-trained model
INFO:root:t = 34 - re-trained model
INFO:root:t = 38 - re-trained model
INFO:root:t = 43 - re-trained model
INFO:root:t = 49 - re-trained model
INFO:root:t = 55 - re-trained model
INFO:root:t = 62 - re-trained model
INFO:root:t = 71 - re-trained model
INFO:root:t = 80 - re-trained model
INFO:root:t = 91 - re-trained model
INFO:root:t = 103 - re-trained model
INFO:root:t = 116 - re-trained model
INFO:root:t = 131 - re-trained model
INFO:root:t = 149 - re-trained model
INFO:root:t = 169 - re-trained model
INFO:root:t = 191 - re-trained model
INFO:root:t = 216 - re-trained model
INFO:root:t = 245 - re-trained model
INFO:root:t = 278 - re-trained model
INFO:root:t = 315 -

Estimated policy value for RFTS: 0.1364
Evaluating agent: DefaultBARTo
Fitting initial BART model with first 6 observation(s)... Done.
Estimated policy value for DefaultBARTo: 0.1281
Evaluating agent: LogisticBARTo
Fitting initial BART model with first 6 observation(s)... Done.
Estimated policy value for LogisticBARTo: 0.1310
Evaluating agent: DefaultBARTm
Fitting initial BART model with first 6 observation(s)... Done.
Estimated policy value for DefaultBARTm: 0.1169
Evaluating agent: LogisticBARTm
Fitting initial BART model with first 6 observation(s)... Done.


INFO:root:t = 10 - re-trained model


Estimated policy value for LogisticBARTm: 0.1259
Bootstrap 6 results: {'Random': np.float64(0.0931989924433254), 'LinearTS': np.float64(0.11895321173671783), 'XGBoostTS': np.float64(0.10118577075098865), 'RFTS': np.float64(0.136400322841002), 'DefaultBARTo': np.float64(0.12812500000000102), 'LogisticBARTo': np.float64(0.13098039215686375), 'DefaultBARTm': np.float64(0.11694510739856895), 'LogisticBARTm': np.float64(0.12594268476621487)}

Bootstrap sample 7/8
Random Forest parameters - exploration_variance: 1.0 n_estimators: 100
Evaluating agent: Random
Estimated policy value for Random: 0.0890
Evaluating agent: LinearTS
Estimated policy value for LinearTS: 0.1082
Evaluating agent: XGBoostTS


INFO:root:t = 11 - re-trained model
INFO:root:t = 13 - re-trained model
INFO:root:t = 14 - re-trained model
INFO:root:t = 16 - re-trained model
INFO:root:t = 18 - re-trained model
INFO:root:t = 21 - re-trained model
INFO:root:t = 23 - re-trained model
INFO:root:t = 26 - re-trained model
INFO:root:t = 30 - re-trained model
INFO:root:t = 34 - re-trained model
INFO:root:t = 38 - re-trained model
INFO:root:t = 43 - re-trained model
INFO:root:t = 49 - re-trained model
INFO:root:t = 55 - re-trained model
INFO:root:t = 62 - re-trained model
INFO:root:t = 71 - re-trained model
INFO:root:t = 80 - re-trained model
INFO:root:t = 91 - re-trained model
INFO:root:t = 103 - re-trained model
INFO:root:t = 116 - re-trained model
INFO:root:t = 131 - re-trained model
INFO:root:t = 149 - re-trained model
INFO:root:t = 169 - re-trained model
INFO:root:t = 191 - re-trained model
INFO:root:t = 216 - re-trained model
INFO:root:t = 245 - re-trained model
INFO:root:t = 278 - re-trained model
INFO:root:t = 315 -

Estimated policy value for XGBoostTS: 0.0948
Evaluating agent: RFTS


INFO:root:t = 11 - re-trained model
INFO:root:t = 13 - re-trained model
INFO:root:t = 14 - re-trained model
INFO:root:t = 16 - re-trained model
INFO:root:t = 18 - re-trained model
INFO:root:t = 21 - re-trained model
INFO:root:t = 23 - re-trained model
INFO:root:t = 26 - re-trained model
INFO:root:t = 30 - re-trained model
INFO:root:t = 34 - re-trained model
INFO:root:t = 38 - re-trained model
INFO:root:t = 43 - re-trained model
INFO:root:t = 49 - re-trained model
INFO:root:t = 55 - re-trained model
INFO:root:t = 62 - re-trained model
INFO:root:t = 71 - re-trained model
INFO:root:t = 80 - re-trained model
INFO:root:t = 91 - re-trained model
INFO:root:t = 103 - re-trained model
INFO:root:t = 116 - re-trained model
INFO:root:t = 131 - re-trained model
INFO:root:t = 149 - re-trained model
INFO:root:t = 169 - re-trained model
INFO:root:t = 191 - re-trained model
INFO:root:t = 216 - re-trained model
INFO:root:t = 245 - re-trained model
INFO:root:t = 278 - re-trained model
INFO:root:t = 315 -

Estimated policy value for RFTS: 0.1195
Evaluating agent: DefaultBARTo
Fitting initial BART model with first 20 observation(s)... Done.
Estimated policy value for DefaultBARTo: 0.1289
Evaluating agent: LogisticBARTo
Fitting initial BART model with first 20 observation(s)... Done.
Estimated policy value for LogisticBARTo: 0.1343
Evaluating agent: DefaultBARTm
Fitting initial BART model with first 20 observation(s)... Done.
Estimated policy value for DefaultBARTm: 0.1245
Evaluating agent: LogisticBARTm
Fitting initial BART model with first 20 observation(s)... Done.


INFO:root:t = 10 - re-trained model


Estimated policy value for LogisticBARTm: 0.1273
Bootstrap 7 results: {'Random': np.float64(0.08897637795275622), 'LinearTS': np.float64(0.10819411296738338), 'XGBoostTS': np.float64(0.09479409479409519), 'RFTS': np.float64(0.11951219512195241), 'DefaultBARTo': np.float64(0.12887828162291282), 'LogisticBARTo': np.float64(0.1343042071197424), 'DefaultBARTm': np.float64(0.1244813278008311), 'LogisticBARTm': np.float64(0.12727272727272854)}

Bootstrap sample 8/8
Random Forest parameters - exploration_variance: 1.0 n_estimators: 100
Evaluating agent: Random
Estimated policy value for Random: 0.0789
Evaluating agent: LinearTS
Estimated policy value for LinearTS: 0.1014
Evaluating agent: XGBoostTS


INFO:root:t = 11 - re-trained model
INFO:root:t = 13 - re-trained model
INFO:root:t = 14 - re-trained model
INFO:root:t = 16 - re-trained model
INFO:root:t = 18 - re-trained model
INFO:root:t = 21 - re-trained model
INFO:root:t = 23 - re-trained model
INFO:root:t = 26 - re-trained model
INFO:root:t = 30 - re-trained model
INFO:root:t = 34 - re-trained model
INFO:root:t = 38 - re-trained model
INFO:root:t = 43 - re-trained model
INFO:root:t = 49 - re-trained model
INFO:root:t = 55 - re-trained model
INFO:root:t = 62 - re-trained model
INFO:root:t = 71 - re-trained model
INFO:root:t = 80 - re-trained model
INFO:root:t = 91 - re-trained model
INFO:root:t = 103 - re-trained model
INFO:root:t = 116 - re-trained model
INFO:root:t = 131 - re-trained model
INFO:root:t = 149 - re-trained model
INFO:root:t = 169 - re-trained model
INFO:root:t = 191 - re-trained model
INFO:root:t = 216 - re-trained model
INFO:root:t = 245 - re-trained model
INFO:root:t = 278 - re-trained model
INFO:root:t = 315 -

Estimated policy value for XGBoostTS: 0.0792
Evaluating agent: RFTS


INFO:root:t = 11 - re-trained model
INFO:root:t = 13 - re-trained model
INFO:root:t = 14 - re-trained model
INFO:root:t = 16 - re-trained model
INFO:root:t = 18 - re-trained model
INFO:root:t = 21 - re-trained model
INFO:root:t = 23 - re-trained model
INFO:root:t = 26 - re-trained model
INFO:root:t = 30 - re-trained model
INFO:root:t = 34 - re-trained model
INFO:root:t = 38 - re-trained model
INFO:root:t = 43 - re-trained model
INFO:root:t = 49 - re-trained model
INFO:root:t = 55 - re-trained model
INFO:root:t = 62 - re-trained model
INFO:root:t = 71 - re-trained model
INFO:root:t = 80 - re-trained model
INFO:root:t = 91 - re-trained model
INFO:root:t = 103 - re-trained model
INFO:root:t = 116 - re-trained model
INFO:root:t = 131 - re-trained model
INFO:root:t = 149 - re-trained model
INFO:root:t = 169 - re-trained model
INFO:root:t = 191 - re-trained model
INFO:root:t = 216 - re-trained model
INFO:root:t = 245 - re-trained model
INFO:root:t = 278 - re-trained model
INFO:root:t = 315 -

Estimated policy value for RFTS: 0.1145
Evaluating agent: DefaultBARTo
Fitting initial BART model with first 22 observation(s)... Done.
Estimated policy value for DefaultBARTo: 0.1111
Evaluating agent: LogisticBARTo
Fitting initial BART model with first 22 observation(s)... Done.
Estimated policy value for LogisticBARTo: 0.1111
Evaluating agent: DefaultBARTm
Fitting initial BART model with first 22 observation(s)... Done.
Estimated policy value for DefaultBARTm: 0.1078
Evaluating agent: LogisticBARTm
Fitting initial BART model with first 22 observation(s)... Done.
Estimated policy value for LogisticBARTm: 0.1119
Bootstrap 8 results: {'Random': np.float64(0.07888631090487266), 'LinearTS': np.float64(0.10137875101378821), 'XGBoostTS': np.float64(0.0792000000000004), 'RFTS': np.float64(0.11449752883031403), 'DefaultBARTo': np.float64(0.1111111111111121), 'LogisticBARTo': np.float64(0.11111111111111209), 'DefaultBARTm': np.float64(0.1077551020408172), 'LogisticBARTm': np.float64(0.11192810

## Results Summary

Display mean performance across bootstrap samples, matching the R output format.

In [7]:
# Convert results to DataFrame for easier analysis
results_df = pd.DataFrame(ev_total)

print("Bootstrap Results Summary:")
print("=" * 50)
print("\nMean policy values across bootstrap samples:")
mean_results = results_df.mean()
for agent_name in agent_names:
    print(f"{agent_name:>12}: {mean_results[agent_name]:.4f}")

print(f"\nStandard errors:")
std_results = results_df.std()
for agent_name in agent_names:
    print(f"{agent_name:>12}: {std_results[agent_name]:.4f}")

Bootstrap Results Summary:

Mean policy values across bootstrap samples:
      Random: 0.0799
    LinearTS: 0.1008
   XGBoostTS: 0.0892
        RFTS: 0.1170
DefaultBARTo: 0.1179
LogisticBARTo: 0.1182
DefaultBARTm: 0.1116
LogisticBARTm: 0.1159

Standard errors:
      Random: 0.0108
    LinearTS: 0.0114
   XGBoostTS: 0.0084
        RFTS: 0.0086
DefaultBARTo: 0.0095
LogisticBARTo: 0.0117
DefaultBARTm: 0.0080
LogisticBARTm: 0.0100


In [8]:
# Additional analysis: confidence intervals
print("\n95% Confidence Intervals:")
print("=" * 30)
for agent_name in agent_names:
    values = results_df[agent_name].values
    ci_lower = np.percentile(values, 2.5)
    ci_upper = np.percentile(values, 97.5)
    mean_val = np.mean(values)
    print(f"{agent_name:>12}: {mean_val:.4f} [{ci_lower:.4f}, {ci_upper:.4f}]")


95% Confidence Intervals:
      Random: 0.0799 [0.0671, 0.0934]
    LinearTS: 0.1008 [0.0839, 0.1171]
   XGBoostTS: 0.0892 [0.0794, 0.1008]
        RFTS: 0.1170 [0.1103, 0.1334]
DefaultBARTo: 0.1179 [0.1024, 0.1287]
LogisticBARTo: 0.1182 [0.1011, 0.1337]
DefaultBARTm: 0.1116 [0.0987, 0.1232]
LogisticBARTm: 0.1159 [0.0998, 0.1270]


## Further analysis

The reward improvement for all competing algorithms

In [9]:
if 'Random' in agent_names:
    print("\nRandom agent baseline:")
    for agent_name in agent_names:
        if agent_name != 'Random':
            improvement = results_df[agent_name] - results_df['Random']
            print(f"{agent_name:>12} improvement over Random: {np.mean(improvement):.4f}")
            ci_lower = np.percentile(improvement, 2.5)
            ci_upper = np.percentile(improvement, 97.5)
            print(f"{agent_name:>12} CI: [{ci_lower:.4f}, {ci_upper:.4f}]")


Random agent baseline:
    LinearTS improvement over Random: 0.0209
    LinearTS CI: [0.0122, 0.0331]
   XGBoostTS improvement over Random: 0.0093
   XGBoostTS CI: [0.0012, 0.0166]
        RFTS improvement over Random: 0.0372
        RFTS CI: [0.0265, 0.0437]
DefaultBARTo improvement over Random: 0.0381
DefaultBARTo CI: [0.0312, 0.0492]
LogisticBARTo improvement over Random: 0.0384
LogisticBARTo CI: [0.0276, 0.0510]
DefaultBARTm improvement over Random: 0.0317
DefaultBARTm CI: [0.0223, 0.0404]
LogisticBARTm improvement over Random: 0.0360
LogisticBARTm CI: [0.0311, 0.0419]


In [10]:
# Save results for further analysis
import pickle
import os

results_dir = "./results/ope"
os.makedirs(results_dir, exist_ok=True)

# Save detailed results
results_data = {
    'bootstrap_results': ev_total,
    'mean_results': mean_results.to_dict(),
    'std_results': std_results.to_dict(),
    'agent_specs': agent_specs,
    'n_bootstrap': B,
    'sample_fraction': sample_fraction
}

with open(f"{results_dir}/drinkless_ope_results.pkl", "wb") as f:
    pickle.dump(results_data, f)

print(f"\nResults saved to {results_dir}/drinkless_ope_results.pkl")


Results saved to ./results/ope/drinkless_ope_results.pkl
