In [None]:
import sys
sys.path.append("../")
from src.environment.ac_control.env import ACControl
from src.environment.ac_control.agent import Agent
from src.environment.ac_control.interaction import behavior_policy_interaction, estimate_policy_interaction
from src.environment.interaction_buffer import Buffer

from src.ope.data import train_test_split
from src.ope.distribution_evaluation import total_variation_distance_score, brier_score
from src.ope.ope_evaluation import execute_ope
from src.ope.ranking_evaluation import nDCG

from src.ope.visualize import vis_multiclass_calibration_curve

import numpy as np
import pandas as pd

np.random.seed(42)

In [None]:
agent = Agent()
b_buffer = Buffer()

behavior_policy_history = behavior_policy_interaction(
    env = ACControl(),
    buffer = b_buffer,
    policy_name = 0, 
    columns = ['ID', 'State', 'Action', 'Reward', 'Next_state', 'Behavior_Policy'], 
    trial_len = 100_000, 
    model = agent
)

In [None]:

train_dataset, test_dataset, train_X, train_Y, test_X, test_Y = train_test_split(behavior_policy_history, 'half')

In [None]:
def _acc(data: pd.DataFrame):
    return np.sum(data.high_conf_action == data.Action)

def _conf(data: pd.DataFrame):
    return np.sum(data.prob_e_policy_of_high_conf_action)

def ece(data: pd.DataFrame):
    ece_score = 0.0
    n_data = len(data)
    for b in data.bins.unique():
        frac_data = data.groupby('bins').get_group(b)
        n_frac_data = len(frac_data)

        acc = _acc(frac_data) / n_frac_data
        conf = _conf(frac_data) / n_frac_data

        ece_score += (n_frac_data / n_data) * np.abs(acc - conf)
    return ece_score

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.calibration import CalibratedClassifierCV

models = {
    'rf_5' : RandomForestClassifier(max_depth=5),
    'rf_10' : RandomForestClassifier(max_depth=10),

    'knn_30' : KNeighborsClassifier(n_neighbors=30),
    'knn_100' : KNeighborsClassifier(n_neighbors=100),

    'lr_10' : LogisticRegression(C=10),
    'lr_1' : LogisticRegression(C=1),

    'rf_10_iso' : CalibratedClassifierCV(RandomForestClassifier(max_depth=10), cv=2, method="isotonic"),
    'knn_100_iso' : CalibratedClassifierCV(KNeighborsClassifier(n_neighbors=100), cv=2, method="isotonic"),
    'lr_10_iso' : CalibratedClassifierCV(LogisticRegression(C=10), cv=2, method="isotonic")  ,

    'rf_sig' : CalibratedClassifierCV(RandomForestClassifier(max_depth=10), cv=2, method="sigmoid"),
    'knn_100_sig' : CalibratedClassifierCV(KNeighborsClassifier(n_neighbors=100), cv=2, method="sigmoid"),
    'lr_10_sig' : CalibratedClassifierCV(LogisticRegression(C=10), cv=2, method="sigmoid") 
}

ece_s, total_v_distance = [], []
for idx, model_key in enumerate(models):
    model = models[model_key]
    model.fit(train_X, train_Y)

    preds = model.predict(test_X) # -> class
    predict_probs = model.predict_proba(test_X)

    p_e_policy_in_b_policy_action = [d[test_Y[idx]] for idx, d in enumerate(predict_probs)]
    p_e_policy_of_action = [d[preds[idx]] for idx, d in enumerate(predict_probs)] # 推定classの確率値

    test_dataset_cp = test_dataset.copy()

    test_dataset_cp['prob_e_policy_of_b_policy_action'] = p_e_policy_in_b_policy_action
    test_dataset_cp['prob_e_policy_of_high_conf_action'] = p_e_policy_of_action
    test_dataset_cp['high_conf_action'] = preds
    bins = np.arange(10) / 10
    test_dataset_cp["bins"] = pd.cut(
                    test_dataset_cp.prob_e_policy_of_high_conf_action, bins=bins, labels=False
                )
    ece_s.append(ece(test_dataset_cp))
    total_v_distance.append(total_variation_distance_score(test_dataset_cp, test_dataset_cp.prob_e_policy_of_b_policy_action, 1))

In [None]:
import matplotlib.pyplot as plt

for i, label in enumerate(models.keys()):
    plt.scatter(total_v_distance[i], ece_s[i], label=list(models.keys())[i])
    #plt.annotate(list(models.keys())[i], (total_v_distance[i], ece_s[i]))

plt.xlabel('total_variation_distance')
plt.ylabel('expected_calibration_error')


plt.legend()
plt.show()
plt.close()
