In [None]:
import sys
sys.path.append("../")
from src.environment.ac_control.env import ACControl
from src.environment.ac_control.agent import Agent
from src.environment.ac_control.interaction import behavior_policy_interaction, estimate_policy_interaction
from src.environment.interaction_buffer import Buffer

from src.ope.data import train_test_split
from src.ope.distribution_evaluation import eval_policy_distance
from src.ope.ope_evaluation import execute_ope
from src.ope.visualize import visualize_error_per_distance



import numpy as np
import pandas as pd

In [None]:
agent = Agent()
b_buffer = Buffer()

behavior_policy_history = behavior_policy_interaction(
    env = ACControl(),
    buffer = b_buffer,
    policy_name = 0, 
    columns = ['ID', 'State', 'Action', 'Reward', 'Next_state', 'Behavior_Policy'], 
    trial_len = 30_000, 
    model = agent
)

In [None]:
train_dataset, test_dataset, train_X, train_Y, test_X, test_Y = train_test_split(behavior_policy_history, 'half')

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.calibration import CalibratedClassifierCV


models = [
    RandomForestClassifier(),
    KNeighborsClassifier(),
    LogisticRegression(),
    CalibratedClassifierCV(RandomForestClassifier(), cv=2, method="isotonic"),
    #CalibratedClassifierCV(RandomForestClassifier(), cv=2, method="sigmoid"),
    CalibratedClassifierCV(KNeighborsClassifier(), cv=2, method="isotonic"),
    #CalibratedClassifierCV(KNeighborsClassifier(), cv=2, method="sigmoid"),
    CalibratedClassifierCV(LogisticRegression(), cv=2, method="isotonic"),
    #CalibratedClassifierCV(LogisticRegression(), cv=2, method="sigmoid"),
    
]

model_name = [
    'RF',
    'KN',
    'LR',
    'RF_isotonic',
    #'RF_sigmoid',
    'KN_isotonic',
    #'KN_sigmoid',
    'LR_isotonic',
    #'LR_sigmoid',

]

In [None]:
opes = np.zeros((len(models), 11))

for idx, model in enumerate(models):
    model.fit(train_X, train_Y)
    dist = model.predict_proba(test_X)
    
    history = estimate_policy_interaction(
        env=ACControl(),
        buffer=Buffer(),
        policy_name=0,
        columns=['ID', 'State', 'Action', 'Reward', 'Next_state', 'Behavior_Policy'],
        trial_len=1_000,
        model=model,
    )
    policy_distace = eval_policy_distance(
        test_dataset = test_dataset, 
        estimate_policy = [d[test_Y[idx]] for idx, d in enumerate(dist)],
        coef = 1,
        distace_func = 'total'
    )
    ope_list = execute_ope(
        test_dataset = test_dataset,
        estimate_policy_history = history,
        model = model,
        sample_size = 3000,
        n_len = 10,
        model_type = 'sklearn',
        v_estimator = 'IPS',
        error_function = 'relative',
    )
    opes[idx, 0] = policy_distace
    opes[idx, 1:] = ope_list

In [None]:
visualize_error_per_distance(opes, model_name)
