In [None]:
cd drive/MyDrive/JAIST/Research/RL/task-grouping

In [None]:
from src.env.ac_control import ACControl
from src.env.interaction import Save
from src.evaluation.metrics import KL_divergence, total_variation_distance
import numpy as np

In [None]:
class Agent:
    def __init__(self):
        self.cold_dist = [0.05, 0.05, 0.05, 0.05, 0.1, 0.2, 0.5]
        self.quit_cold_dist = [0.05, 0.05, 0.05, 0.1, 0.15, 0.2, 0.4]
        self.bit_cold_dist = [0.05, 0.05, 0.1, 0.1, 0.3, 0.2, 0.2]
        self.bit_hot_dist = [i for i in reversed(self.bit_cold_dist)]
        self.quit_hot_dist = [i for i in reversed(self.quit_cold_dist)]
        self.hot_dist = [i for i in reversed(self.cold_dist)]

    def get_action(self, tmp):
        dist = self._get_dist(tmp)
        action = np.random.choice(len(dist), 1, p=dist)
        return action, dist
    
    def _get_dist(self, tmp):

        if tmp in np.arange(0, 10):
            return self.cold_dist

        if tmp in np.arange(10, 20):
            return self.quit_cold_dist

        if tmp in np.arange(20, 25):
            return self.bit_cold_dist

        if tmp in np.arange(25, 30):
            return self.bit_hot_dist

        if tmp in np.arange(30, 40):
            return self.quit_hot_dist

        if tmp in np.arange(40, 50):
            return self.hot_dist


In [None]:
env = ACControl()
save = Save()
agent = Agent()

ID = 1
TRIAL_LEN = 5000
COLUMNS = ['ID', 'State', 'Action', 'Reward', 'Next_state', 'Prob']

observation = env.reset()

for time in range(TRIAL_LEN):
    
    action, dist = agent.get_action(observation)

    next_observation, reward = env.step(action.item())
    save.add(ID, observation, action.item(), reward, next_observation, dist[action.item()])
    observation = next_observation

env.close()

log = save.get_df(COLUMNS)

In [None]:


def softmax(x):
    f_x = np.exp(x) / np.sum(np.exp(x))
    return f_x

In [None]:
def data_split(dataset, train_size):
    n_train = int(len(dataset) * train_size)

    train = dataset.sample(n=n_train, random_state=1)
    test = dataset[~ dataset.index.isin(train.index)]

    return train, test

In [None]:
def process(model, train_X, train_Y, test_X, test_dataset):
    model.fit(train_X, train_Y)
    test_probs = model.predict_proba(test_X)

    probs_per_state = np.insert(test_probs, 0, test_X.flatten(), axis=1)
    u_probs_per_state = np.unique(probs_per_state, axis=0)

    n_state = u_probs_per_state.shape[0]
    kl_log = np.zeros((n_state, 3))
    for idx, ps_per_state in enumerate(u_probs_per_state):
        s = ps_per_state[0]
        e_dist = softmax(ps_per_state[1:])
        b_dist = agent._get_dist(s)
        
        kl_log[idx, 0] = s
        kl_log[idx, 1] = KL_divergence(b_dist, e_dist)
        kl_log[idx, 2] = total_variation_distance(b_dist, e_dist)

    return u_probs_per_state, kl_log



In [None]:
from sklearn.utils import gen_batches
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression


kn = KNeighborsClassifier(n_neighbors=len(log.Action.unique()))
rf = RandomForestClassifier()
mlp = MLPClassifier()
gb = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)
lr = LogisticRegression(random_state=0)

train_dataset, test_dataset = data_split(log, train_size=0.8)

train_X = np.stack([x for x in train_dataset.State]).reshape(-1, 1)
train_Y = np.array(train_dataset.Action).astype('int')
test_X = np.stack([x for x in test_dataset.State]).reshape(-1, 1)
test_Y = np.array(test_dataset.Action).astype('int')

In [None]:
lr_dist, lrlog= process(lr, train_X, train_Y, test_X, test_dataset, )

In [None]:
kn_dist, knlog = process(kn, train_X, train_Y, test_X, test_dataset, )
rf_dist, rflog= process(rf, train_X, train_Y, test_X, test_dataset, )
gb_dist, gblog= process(gb, train_X, train_Y, test_X, test_dataset, )
mlp_dist, mlplog= process(mlp, train_X, train_Y, test_X, test_dataset, )
lr_dist, lrlog= process(lr, train_X, train_Y, test_X, test_dataset, )


In [None]:
import matplotlib.pyplot as plt

plt.scatter(lrlog[:, 0], lrlog[:, 1], label='lr', alpha=0.5)
plt.scatter(rflog[:, 0], rflog[:, 1], label='rf', alpha=0.5)
plt.scatter(mlplog[:, 0], mlplog[:, 1], label='mlp', alpha=0.5)
plt.scatter(gblog[:, 0], gblog[:, 1], label='gb', alpha=0.5)
plt.scatter(knlog[:, 0], knlog[:, 1], label='knn', alpha=0.5)



plt.xlabel('tempture')
plt.ylabel('KL')
plt.title('scatter plot of kl per algorithms')
plt.legend()
#plt.scatter(kn_mlplog[:, 0], kn_mlplog[:, 1])

In [None]:


fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)

ax.violinplot([lrlog[:, 1], rflog[:, 1], mlplog[:, 1], gblog[:, 1], knlog[:, 1]], showmedians = True)
ax.set_xticks([1, 2, 3, 4, 5])
ax.set_xticklabels(['LR', 'RF', 'MLP', 'GB', 'KNN'])
ax.set_xlabel('algorithm')
ax.set_ylabel('KL')
ax.set_ylim(0, 0.8)
ax.set_title('violinplot of kl per algorithms')
plt.show()

In [None]:
def calc_evaluation(probs_per_state, log):

    # cold
    print('cold')
    tmp_log = log[np.where(log[:, 0] < 10, True, False)]
    e = np.mean(probs_per_state[np.where(probs_per_state[:, 0] < 10, True, False)], axis=0)[1:]
    b = agent.cold_dist
    print('KL dist_mean : ', KL_divergence(b, e), 'KL_mean : ', np.mean(tmp_log[:, 1]))
    print('TVD dist_mean : ', total_variation_distance(b, e), 'TVD_mean : ', np.mean(tmp_log[:, 2]))


    print('quit_cold')
    tmp_log = log[np.where((log[:, 0] > 10) & (log[:, 0] < 20), True, False)]
    e = np.mean(probs_per_state[np.where((probs_per_state[:, 0] > 10) & (probs_per_state[:, 0] < 20), True, False)], axis=0)[1:]
    b = agent.quit_cold_dist
    print('KL dist_mean : ', KL_divergence(b, e), 'KL_mean : ', np.mean(tmp_log[:, 1]))
    print('TVD dist_mean : ', total_variation_distance(b, e), 'TVD_mean : ', np.mean(tmp_log[:, 2]))

    print('bit_cold')
    tmp_log = log[np.where((log[:, 0] > 20) & (log[:, 0] < 25), True, False)]
    e = np.mean(probs_per_state[np.where((probs_per_state[:, 0] > 20) & (probs_per_state[:, 0] < 25), True, False)], axis=0)[1:]
    b = agent.bit_cold_dist
    print('KL dist_mean : ', KL_divergence(b, e), 'KL_mean : ', np.mean(tmp_log[:, 1]))
    print('TVD dist_mean : ', total_variation_distance(b, e), 'TVD_mean : ', np.mean(tmp_log[:, 2]))

    print('bit_hot')
    tmp_log = log[np.where((log[:, 0] > 25) & (log[:, 0]  < 30), True, False)]
    e = np.mean(probs_per_state[np.where((probs_per_state[:, 0] > 25) & (probs_per_state[:, 0]  < 30), True, False)], axis=0)[1:]
    b = agent.bit_hot_dist
    print('KL dist_mean : ', KL_divergence(b, e), 'KL_mean : ', np.mean(tmp_log[:, 1]))
    print('TVD dist_mean : ', total_variation_distance(b, e), 'TVD_mean : ', np.mean(tmp_log[:, 2]))

    print('quit_hot')
    tmp_log = log[np.where((log[:, 0] > 30) & (log[:, 0]  < 40), True, False)]
    e = np.mean(probs_per_state[np.where((probs_per_state[:, 0] > 30) & (probs_per_state[:, 0]  < 40), True, False)], axis=0)[1:]
    b = agent.quit_hot_dist
    print('KL dist_mean : ', KL_divergence(b, e), 'KL_mean : ', np.mean(tmp_log[:, 1]))
    print('TVD dist_mean : ', total_variation_distance(b, e), 'TVD_mean : ', np.mean(tmp_log[:, 2]))

    print('hot')
    tmp_log = log[np.where((log[:, 0] > 40) & (log[:, 0]  < 50), True, False)]
    e = np.mean(probs_per_state[np.where((probs_per_state[:, 0] > 40) & (probs_per_state[:, 0]  < 50), True, False)], axis=0)[1:]
    b = agent.hot_dist
    print('KL dist_mean : ', KL_divergence(b, e), 'KL_mean : ', np.mean(tmp_log[:, 1]))
    print('TVD dist_mean : ', total_variation_distance(b, e), 'TVD_mean : ', np.mean(tmp_log[:, 2]))

In [None]:
calc_evaluation(lr_dist, lrlog)
print('-'*40)
calc_evaluation(rf_dist, rflog)
print('-'*40)
calc_evaluation(mlp_dist, mlplog)
print('-'*40)
calc_evaluation(gb_dist, gblog)
print('-'*40)
calc_evaluation(kn_dist, knlog)
