In [None]:
import sys
sys.path.append("../")
from src.environment.ac_control.env import ACControl
from src.environment.ac_control.agent import Agent
from src.environment.ac_control.interaction import behavior_policy_interaction, estimate_policy_interaction
from src.environment.interaction_buffer import Buffer

from src.ope.data import train_test_split
from src.ope.distribution_evaluation import total_variation_distance_score, brier_score
from src.ope.ope_evaluation import execute_ope
from src.ope.ranking_evaluation import nDCG

from src.ope.visualize import vis_multiclass_calibration_curve

import numpy as np
import pandas as pd

np.random.seed(42)

In [None]:
agent = Agent()
b_buffer = Buffer()

behavior_policy_history = behavior_policy_interaction(
    env = ACControl(),
    buffer = b_buffer,
    policy_name = 0, 
    columns = ['ID', 'State', 'Action', 'Reward', 'Next_state', 'Behavior_Policy'], 
    trial_len = 100_000, 
    model = agent
)

In [None]:
train_dataset, test_dataset, train_X, train_Y, test_X, test_Y = train_test_split(behavior_policy_history, 'half')

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.calibration import CalibratedClassifierCV

models = {
    'rf_5' : RandomForestClassifier(max_depth=5),
    'rf_10' : RandomForestClassifier(max_depth=10),

    'knn_30' : KNeighborsClassifier(n_neighbors=30),
    'knn_100' : KNeighborsClassifier(n_neighbors=100),

    'lr_10' : LogisticRegression(C=10),
    'lr_1' : LogisticRegression(C=1),

    'rf_10_iso' : CalibratedClassifierCV(RandomForestClassifier(max_depth=10), cv=2, method="isotonic"),
    'knn_100_iso' : CalibratedClassifierCV(KNeighborsClassifier(n_neighbors=100), cv=2, method="isotonic"),
    'lr_10_iso' : CalibratedClassifierCV(LogisticRegression(C=10), cv=2, method="isotonic")  ,

    'rf_sig' : CalibratedClassifierCV(RandomForestClassifier(max_depth=10), cv=2, method="sigmoid"),
    'knn_100_sig' : CalibratedClassifierCV(KNeighborsClassifier(n_neighbors=100), cv=2, method="sigmoid"),
    'lr_10_sig' : CalibratedClassifierCV(LogisticRegression(C=10), cv=2, method="sigmoid") 
}


'''
vis_multiclass_calibration_curve(
    test_dataset = test_dataset,
    models = models,
    data = (train_X, train_Y, test_X),
)
'''

In [None]:
n_ope_iter = 100
n_ope_sample_size = 10_000
n_interaction=30_000

'''
提案手法が良かった

n_ope_iter = 50
n_ope_sample_size = 5_000
n_interaction=10_000

n_dcg :  0.8447762970745545 , coef :  0.49711895766580066
n_dcg :  0.8447762970745545 , coef :  0.4978606579852835
n_dcg :  0.8747248725691102 , coef :  0.34398906419564845
n_dcg :  0.8279003362987507 , coef :  0.9992369165923656

提案手法が劣る

n_ope_iter = 100
n_ope_sample_size = 10_000
n_interaction=30_000

n_dcg :  0.910824379579457 , coef :  0.3323746430513848
n_dcg :  0.9089511138678534 , coef :  0.3471672655984594
n_dcg :  0.8427266995642954 , coef :  0.26709644913265
n_dcg :  0.7323147414714958 , coef :  0.962537869818187
'''

opes = np.zeros((len(models), n_ope_iter+4))


for idx, model_key in enumerate(models):
    model = models[model_key]
    model.fit(train_X, train_Y)
    dist = model.predict_proba(test_X)
    
    history = estimate_policy_interaction(
        env=ACControl(),
        buffer=Buffer(),
        policy_name=0,
        columns=['ID', 'State', 'Action', 'Reward', 'Next_state', 'Behavior_Policy'],
        trial_len=n_interaction,
        model=model,
    )
    total_variation_distance = total_variation_distance_score(
        test_dataset = test_dataset, 
        estimate_policy = [d[test_Y[idx]] for idx, d in enumerate(dist)],
        coef = 1,
    )
    brier_score_0_0 = brier_score(
        test_dataset = test_dataset,
        predict_probs = dist, 
        thresh = 0.0
    )
    brier_score_0_5 = brier_score(
        test_dataset = test_dataset,
        predict_probs = dist, 
        thresh = 0.5
    )
    brier_score_1 = brier_score(
        test_dataset = test_dataset,
        predict_probs = dist, 
        thresh = 1.0
    )
    ope_list = execute_ope(
        test_dataset = test_dataset,
        estimate_policy_history = history,
        model = model,
        sample_size = n_ope_sample_size,
        n_len = n_ope_iter,
        model_type = 'sklearn',
        v_estimator = 'IPS',
        error_function = 'relative',
    )
    opes[idx, 0] = total_variation_distance
    opes[idx, 1] = brier_score_0_0
    opes[idx, 2] = brier_score_0_5
    opes[idx, 3] = brier_score_1
    opes[idx, 4:] = ope_list

In [None]:
ranks = np.zeros((len(models), 5))

def get_rank(arr: np.array):
    return np.argsort(np.argsort(arr))

ranks[:, 0] = get_rank(np.mean(opes[:, 4:], axis=1))
ranks[:, 1] = get_rank(opes[:, 0])
ranks[:, 2] = get_rank(opes[:, 1])
ranks[:, 3] = get_rank(opes[:, 2])
ranks[:, 4] = get_rank(opes[:, 3])



In [None]:
mean_ope = np.mean(opes[:, 4:], axis=1)

for idx in np.arange(4):
    idx = idx + 1
    print(
        'n_dcg : ', nDCG(ranks[:, 0], ranks[:, idx]), ', coef : ', np.min(np.corrcoef(opes[:, idx], mean_ope))
    )