In [1]:
from obp.ope import (
    OffPolicyEvaluation,
    InverseProbabilityWeighting,
    DoublyRobust,
    SwitchDoublyRobust,
    SubGaussianDoublyRobust,
    DoublyRobustTuning,
    RegressionModel,
)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
#############################################################
# 5) OPE
#############################################################

def run_ope(fb, action_dist, reward_model):
    ope = OffPolicyEvaluation(
        fb,
        ope_estimators=[
            InverseProbabilityWeighting(),
            DoublyRobust(),
            SwitchDoublyRobust(),
            SubGaussianDoublyRobust(),
            #####################
            # MRDR
            DoublyRobustTuning(
                lambdas=[0.0, 0.01, 0.05, 0.1, 0.2, 0.5, 1.0],
                tuning_method="slope",
                estimator_name="mrdr",
            ),
            #####################
        ],
    )

    print("[OPE] Estimating policy values ...")
    start = time()

    values = ope.estimate_policy_values(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=reward_model,
    )
    intervals = ope.estimate_interval(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=reward_model,
        alpha=0.05,
    )

    print(f"[DONE] OPE took {time() - start:.2f} sec\n")
    return values, intervals
