In [1]:
import sys, traceback
import random

from abc import ABCMeta, abstractmethod

print(sys.version)

import numpy as np
import matplotlib.pyplot as plt

print("Numpy: " + np.__version__)

3.7.6 (default, Jan  8 2020, 19:59:22) 
[GCC 7.3.0]
Numpy: 1.18.1


# XCS

実装に必要な要素
- 環境(Environment)
    - 入力: Action
    - 出力: State, reward
- 分類子集合（Population: P）
    - 分類子(Classifier)
        - 条件部（Condition: C）
        - 行動部(Action: A)
        - 予測報酬(Prediction: p)
        - 誤差(Error: ε)
        - 適合度(Fitness: F)
        - 重複度(Action set size: a)
        - 重合度(Numerosity: num)
    - 照合集合(Match set: M)
        - 照合
- 強化学習部（Reinforcement Learning component）
    - LCSの学習方針の設定(Behavioral Policy)
    - 環境からの状態(state)の入力に対して、[P]内の分類子から行動を選択し、実行する（Action decision）
    - 行動実施後に環境から得た報酬(reward)に基づいて、分類子の予測報酬pを更新する(Reward propagation)
    - 分類子を評価する(Rule evaluation)
- 進化計算部(Evolutionary Learning component)
    - 分類子の選択(Selection)
    - 新たな分類子の生成（Reproduction）
    - 低評価の分類子を削除(Delete)

## 分類子集合(Population)

In [2]:
class Classifier:
    p_I = np.finfo(np.float32).eps
    e_I = np.finfo(np.float32).eps
    f_I = np.finfo(np.float32).eps
    
    def __init__(self, L: int, n_act: int, time: int, random=False):
        self.condition = np.zeros(L, dtype=np.uint8) if not random else np.random.randint(0, 3, L, dtype=np.uint8)
        self.action = np.zeros(n_act, dtype=bool)
        self.prediction = self.p_I
        self.error = self.e_I
        self.fitness = self.f_I
        self.experience = 0
        self.time_stamp = time
        self.act_size = 1
        self.numeriosity = 1
        
    def initialize(self):
        self.condition = np.zeros(len(self.condition), dtype=np.uint8)
        self.action = np.zeros(len(self.action), dtype=bool)
        
    def __str__(self):
        return str(self.__dict__)

    def __repr__(self):
        return str(self.__dict__)
    
    def __getitem__(self, key):
        return self.__dict__[key]

    def __setitem__(self, key, value):
        self.__dict__[key] = value

class Population:
    def __init__(self, N: int, L: int, n_act: int, theta_del, delta, empty=True):
        self.N = N
        self.L = L
        self.n_act = n_act
        self.theta_del = theta_del
        self.delta = delta
        self.clf_list = [] if empty else [Classifier(L, n_act, 0, random=True) for _ in range(N)]
    
    def __iter__(self):
        self.__idx_current = 0
        return self
    
    def __next__(self):
        if self.__idx_current == len(self):
            raise StopIteration()
            
        idx = self.__idx_current
        self.__idx_current += 1
            
        return self.clf_list[idx]
    
    def __getitem__(self, idx):
        return self.clf_list[idx]
    
    def __len__(self):
        return len(self.clf_list)
    
    def get_list_of_clfattr(self, key):
        tmp = []
        for i in range(len(self)):
            tmp.append(self[i][key])
        return np.array(tmp)
            
    def append(self, clf):
        self.clf_list.append(clf)
        
    def delete_from_population(self):
        def deletion_vote(cl, ave_fitness):
            vote = cl["act_size"] * cl["numeriosity"]
            if (cl["experience"] > self.theta_del) and (cl["fitness"] / cl["numeriosity"] < delta * ave_fitness):
                vote = vote * ave_fitness / (cl["fitness"] / cl["numeriosity"])
            
            return vote
        
        if len(self) <= self.N:
            return
        
        ave_fitness = np.sum(self.get_list_of_clfattr("fitness")) / np.sum(self.get_list_of_clfattr("numeriosity"))
        
        vote_sum = 0
        for cl in self:
            vote_sum = vote_sum + deletion_vote(cl, ave_fitness)
            
        choice_point = np.random.rand() * vote_sum
        vote_sum = 0
        for cl in self:
            vote_sum = vote_sum + deletion_vote(cl, ave_fitness)
            if vote_sum > choice_point:
                if cl["numeriosity"] > 1:
                    cl["numeriosity"] -= 1
                else:
                    del self.clf_list[self.__idx_current - 1]
                
                return

## 照合集合（Match Set）

In [3]:
class MatchSet:
    def __init__(self, population: Population, sigma: np.ndarray, theta_mna: int,
                 P_s: float, time):
        self.M = []
        self.n_act = population.n_act
        while len(self.M) == 0:
            for cl in population:
                if self.__does_match(cl, sigma):
                    self.M.append(cl)
            if self.__unique_act().shape[0] < theta_mna:
                cl_c = self.__gen_covering_clf(sigma, P_s, time)
                population.append(cl_c)
                population.delete_from_population()
                self.M = []
                
    def __iter__(self):
        self.__idx_current = 0
        return self
    
    def __next__(self):
        if self.__idx_current == len(self):
            raise StopIteration()
            
        idx = self.__idx_current
        self.__idx_current += 1
            
        return self.M[idx]
    
    def __getitem__(self, idx):
        return self.M[idx]
    
    def __len__(self):
        return len(self.M)

    def __does_match(self, cl, sigma):
        for x_cl, x_s in zip(cl["condition"], sigma):
            if (x_cl != 2) and (x_cl != x_s):
                return False
            return True
        
    def __gen_covering_clf(self, sigma, P_s, time):
        acts = self.__unique_act()
        cl = Classifier(len(sigma), self.n_act, time)
        
        for i in range(len(cl["condition"])):
            if np.random.rand() < P_s:
                cl["condition"][i] = 2
            else:
                cl["condition"][i] = sigma[i]
                
        while True:
            act_tmp = np.random.randint(0, 2, self.n_act, dtype=bool)
            if len(acts) == 0 or not np.apply_along_axis(lambda x: np.array_equal(x, act_tmp), 1, acts).all():
                break
        
        cl["action"] = act_tmp.copy()
        
        return cl
        
    def __unique_act(self):
        acts = []
        for cl in self.M:
            acts.append(cl["action"])
            
        if len(acts) != 0:
            acts = np.unique(acts, axis=0)
        
        return np.array(acts)
    
    def get_list_of_clfattr(self, key):
        tmp = []
        for i in range(len(self)):
            tmp.append(self[i][key])
        return np.array(tmp)
    
    def action_match(self, act):
        acts = self.get_list_of_clfattr("action")
        idxs = np.arange(len(acts))
        acts = np.apply_along_axis(lambda x: np.allclose(x, act), 1, acts)
        
        idxs = idxs[acts]
        return idxs

In [157]:
hoge = Population(100, 6, 1, theta_del=2, delta=0.1)
sigma = np.random.randint(0, 2, 10, dtype=bool)
fuga = MatchSet(hoge, sigma, theta_mna=2, P_s=0.1, time=1)

In [159]:
fuga[:]

[{'condition': array([0, 1, 0, 1, 0, 1, 0, 0, 1, 0], dtype=uint8), 'action': array([False]), 'prediction': 1.1920929e-07, 'error': 1.1920929e-07, 'fitness': 1.1920929e-07, 'experience': 0, 'time_stamp': 1, 'act_size': 1, 'numeriosity': 1},
 {'condition': array([0, 1, 0, 1, 0, 2, 0, 0, 1, 0], dtype=uint8), 'action': array([ True]), 'prediction': 1.1920929e-07, 'error': 1.1920929e-07, 'fitness': 1.1920929e-07, 'experience': 0, 'time_stamp': 1, 'act_size': 1, 'numeriosity': 1}]

## 予測集合（Predict array）

In [7]:
import itertools

class PredictArray:
    act_bool = np.array([False, True])

    def __init__(self, match_set: MatchSet):
        self.act_all = np.array(list(itertools.product(self.act_bool, repeat=len(match_set[0]["action"]) )))
        self.PA = np.zeros(self.act_all.shape[0])
        self.PA[:] = np.nan
        self.FSA = np.zeros(self.act_all.shape[0])
        
        for cl in match_set:
            act_idx = self.__get_matched_idx(cl["action"])
            if np.isnan(self.PA[act_idx]):
                self.PA[act_idx] = cl["prediction"] * cl["fitness"]
            else:
                self.PA[act_idx] += cl["prediction"] * cl["fitness"]
            self.FSA[act_idx] += cl["fitness"]
        
        for act in range(len(self.act_all)):
            if(self.FSA[act] != 0):
                self.PA[act] = self.PA[act] / self.FSA[act]
   
    def __iter__(self):
        self.__idx_current = 0
        return self
    
    def __next__(self):
        if self.__idx_current == len(self):
            raise StopIteration()
            
        idx = self.__idx_current
        self.__idx_current += 1
            
        return self.PA[idx]
    
    def __getitem__(self, idx):
        return self.PA[idx]
    
    def __len__(self):
        return len(self.PA)
    
    def __get_matched_idx(self, act):
        idx = np.arange(len(self.act_all))[(self.act_all == act).all(axis=1)]
        return idx
    
    def select_action(self, p_explr):
        if np.random.rand() < p_explr:
            idx = np.random.choice(np.arange(len(self.PA))[np.logical_not(np.isnan(self.PA))])
            return self.act_all[idx]
        else:
            return self.act_all[np.nanargmax(self.PA)]

In [160]:
foo = PredictArray(fuga)
bar = foo.select_action(0.01)

In [163]:
foo[:]

array([1.1920929e-07, 1.1920929e-07])

## 行動集合（Action Set）

In [10]:
class ActionSet:
    def __init__(self, match_set: MatchSet, act: np.ndarray):
        self.A = []
        for cl in match_set:
            if (cl["action"] == act).all():
                self.A.append(cl)
                
    def __iter__(self):
        self.__idx_current = 0
        return self
    
    def __next__(self):
        if self.__idx_current == len(self):
            raise StopIteration()
            
        idx = self.__idx_current
        self.__idx_current += 1
            
        return self.A[idx]
    
    def __getitem__(self, idx):
        return self.A[idx]
    
    def __len__(self):
        return len(self.A)

In [11]:
ActionSet(fuga, act=bar)[0]

{'condition': array([1, 0, 2, 0, 1, 1, 1, 0, 0, 2], dtype=uint8), 'action': array([False, False, False]), 'prediction': 1.1920929e-07, 'error': 1.1920929e-07, 'fitness': 1.1920929e-07, 'experience': 0, 'time_stamp': 1, 'act_size': 1, 'numeriosity': 1}

## 環境(Environment)

In [77]:
# 環境の抽象クラス
class Environment(metaclass=ABCMeta):
    @abstractmethod
    def get_situation(self, t):
        pass
    
    @abstractmethod
    def exec_action(self, act, reward):
        pass

    
# マルチプレクサ問題の環境クラス
class MuxProblemEnvironment(Environment):
    def __init__(self, N_addr, max_iter=10000):
        self.k = N_addr
        self.N = self.k + 2 ** self.k
        self.max_iter = max_iter
        self.bit_array = np.random.randint(0, 2, (self.max_iter, self.N), dtype=bool)
        
    def get_situation(self, t):
        return self.bit_array[t, :]
        
    def exec_action(self, t, act, reward=1000):
        idx_true = self.__bits_to_int(self.bit_array[t, 0:self.k])
        res_true = self.bit_array[t, self.k + idx_true]
        
        ret = reward if act == res_true else 0
            
        return ret
    
    def __iter__(self):
        self.__idx_current = 0
        return self
    
    def __next__(self):
        if self.__idx_current == len(self):
            raise StopIteration()
            
        idx = self.__idx_current
        self.__idx_current += 1
            
        return self.bit_array[idx]
    
    def __getitem__(self, idx):
        return self.bit_array[idx]
    
    def __len__(self):
        return len(self.bit_array)
    
    def __bits_to_int(self, bits_list):
        def mypackbits(X, reverse=True):
            p = np.power(2, np.arange(X.shape[-1]))
            if reverse:
                p = p[::-1]
            return np.dot(X, p)

        idx = mypackbits(bits_list)
        
        return idx

## 強化学習部（Reinforcement Learning Component）

In [228]:
import copy

# 強化学習部の抽象クラス
class RLComponent(metaclass=ABCMeta):    
    @abstractmethod
    def parameter_update(self, A: ActionSet, P: float, Pop: Population):
        pass

    
# Q学習ライクな強化学習部 
class QlearnLikeRLComponent(RLComponent):
    def __init__(self, theta_mna, P_s, p_explr, alpha,
                 beta, eps_0, nu, theta_sub, do_actionset_subsumption):
        self.theta_mna = theta_mna
        self.P_s = P_s
        self.p_explr = p_explr
        self.alpha = alpha
        self.beta = beta
        self.eps_0 = eps_0
        self.nu = nu
        self.theta_sub = theta_sub
        self.do_act_subsumption = do_actionset_subsumption
    
    def __update_fitness(self, A: ActionSet):
        sum_ac = 0
        acc_k = np.zeros(len(A))
        
        for i, cl in enumerate(A):
            if(cl["error"] < self.eps_0):
                acc_k[i] = 1
            else:
                acc_k[i] = self.alpha * (cl["error"] / self.eps_0) ** (- self.nu)
            sum_ac += acc_k[i] * cl["numeriosity"]
        for i, cl in enumerate(A):
            cl["fitness"] += self.beta * (acc_k[i] * cl["numeriosity"] / sum_ac - cl["fitness"])
    
    def __could_subsume(self, cl):
        if(cl["experience"] > self.theta_sub):
            if(cl["error"] < self.eps_0):
                return True
        return False
    
    def __is_more_general(self, cl_gen, cl_spec):
        if(np.sum(cl_gen["condition"] == 2) <= np.sum(cl_spec["condition"])):
            return False
        i = 0
        while True:
            if(cl_gen["condition"][i] != 2 and cl_gen["condition"][i] != cl_spec["condition"][i]):
                return False
            i += 1
            
            if(i < len(cl_gen["condition"])):
                break
        return True
    
    def __do_action_subsumption(self, A: ActionSet, P: Population):
        cl = None
        for c in A:
            if(self.__could_subsume(c)):
                if((cl is not None) or (np.sum(c["condition"]) == 2) > np.sum(c["condition"]) == 2) or \
                    ((np.sum(c["condition"]) == 2) == np.sum(c["condition"]) == 2) and (np.random.rand() < 0.5):
                    cl = c
        if(cl is not None):
            for c in A:
                if(self.__is_more_general(cl, c)):
                    cl["numeriosity"] += c["numeriosity"]
                    A.remove(c)
                    P.remove(c)
    
    def parameter_update(self, A: ActionSet, P: float, Pop: Population):
        n_sum = np.sum([c["numeriosity"] for c in A])
        
        for cl in A:
            cl["experience"] += 1
            
            if cl["experience"] < 1 / self.beta:
                cl["prediction"] = cl["prediction"] + (P - cl["prediction"]) / cl["experience"]
                cl["error"] = cl["error"] + (np.abs(P - cl["prediction"]) - cl["error"]) / cl["experience"]
                cl["act_size"] = cl["act_size"] + (n_sum - cl["act_size"]) / cl["experience"]
            else:
                cl["prediction"] = cl["prediction"] + self.beta * (P - cl["prediction"])
                cl["error"] = cl["error"] + self.beta * (np.abs(P - cl["prediction"]) - cl["error"])
                cl["act_size"] = cl["act_size"] + self.beta * (n_sum - cl["act_size"])
        self.__update_fitness(A)
        if(self.do_act_subsumption):
            self.__do_action_subsumptiono(A, Pop)
        


## 進化計算部(Evolutionary Calculation Component)

In [229]:
class GAComponent:
    @abstractmethod
    def run_evolve(self, A: ActionSet, sigma, P: Population):
        pass
    
    @abstractmethod
    def add_time(self):
        self.t += 1
    
class SimpleGAComponent:
    def __init__(self, theta_ga, chi, mu, do_ga_subsumption):
        self.theta_ga = theta_ga
        self.chi = chi
        self.mu = mu
        self.do_ga_subsumption = do_ga_subsumption
        self.t = 0
        
    def __offspring(self, A: ActionSet):
        fitness_sum = 0
        for cl in A:
            fitness += cl["fitness"]
        choice_point = np.random.rand() * fitness_sum
        fitness_sum = 0
        for cl in A:
            fitness_sum += cl["fitness"]
            if(fitness_sum > choice_point):
                return cl
            
    def __apply_crossover(self, cl1, cl2):
        x = np.random.rand() * (len(cl1["condition"]) + 1)
        y = np.random.rand() * (len(cl1["condition"]) + 1)
        
        if(x > y):
            x, y = y, x
        i = 0
        while True:
            if(x <= i and i < y):
                cl1["condition"][i], cl2["condition"][i] = cl2["condition"][i], cl1["condition"][i]
            i += 1
            if(i > y):
                break
                
    def __apply_mutation(self, cl, sigma):
        i = 0
        while True:
            if(np.random.rand() < self.mu):
                if(cl["condition"][i] == 2):
                    cl["condition"][i] = sigma[i]
                else:
                    cl["condition"][i] = 2
                i += 1
            if i > len(cl["condition"]):
                break
        if(np.random.rand() < self.mu):
            possible_act = np.array(list(itertools.product([False, True], repeat=len(cl[0]["action"]))))
            select_idx = np.random.randint(0, len(possible_act))
            cl["action"] = possible_act[select_idx].copy()
    
    def __could_subsume(self, cl):
        if(cl["experience"] > self.theta_sub):
            if(cl["error"] < self.eps_0):
                return True
        return False
    
    def __is_more_general(self, cl_gen, cl_spec):
        if(np.sum(cl_gen["condition"] == 2) <= np.sum(cl_spec["condition"])):
            return False
        i = 0
        while True:
            if(cl_gen["condition"][i] != 2 and cl_gen["condition"][i] != cl_spec["condition"][i]):
                return False
            i += 1
            
            if(i < len(cl_gen["condition"])):
                break
        return True
    
    def __does_subsume(self, cl_sub, cl_tos):
        if (cl_sub["Action"] & cl_tos["Action"]).all():
            if(self.__could_subsume(cl_sub)):
                if(self.__is_more_general(cl_sub, cl_tos)):
                    return True
               
        return False
               
    def __insert_in_population(self, cl, P: Population):
        for c in P:
            if (c["condition"] == cl["condition"]).all() and (c["action"] == cl["action"]).all():
                c["numeriosity"] += 1
                return
        P.append(cl)
    
    def run_evolve(self, A: ActionSet, sigma, P: Population):
        sum_t = np.sum([cl["time_stamp"] * cl["numeriosity"] for cl in A]) / np.sum([cl["numeriosity"] for cl in A])
        if(self.t - sum_t > self.theta_ga):
            for cl in A:
                cl["time_stamp"] = self.t
            parent_1 = self.__offspring(A)
            parent_2 = self.__offspring(A)
            child_1 = copy.deepcopy(parent_1)
            child_2 = copy.deepcopy(parent_2)
            child_1["numeriosity"], child_2["numeriosity"] = 1, 1
            child_1["experience"], child_2["experience"] = 0, 0
            if(np.random.rand() < self.chi):
                self.__apply_crossover(child_1, child_2)
                child_1["prediction"] = (parent_1["prediction"] + parent_2["prediction"]) / 2
                child_1["error"] = (parent_1["error"] + parent_2["error"]) / 2
                child_1["fitness"] = (parent_1["fitness"] + parent_2["fitness"]) / 2
                child_2["prediction"] = child_1["prediction"]
                child_2["error"] = child_1["error"]
                child_2["fitness"] = child_2["fitness"]
            child_1["fitness"] = child_1["fitness"] * 0.1
            child_2["fitness"] = child_2["fitness"] * 0.1
            for child in (child_1, child_2):
                self.__apply_mutation(child, sigma)
                if(self.do_ga_subsumption):
                    if self.__does_subsume(parent_1, child):
                        parent_1["numeriosity"] += 1
                    elif self.__does_subsume(parent_2, child):
                        parent_2["numeriosity"] += 1
                    else:
                        self.__insert_in_population(child, P)
                else:
                    self.__insert_in_population(child, P)
                P.delete_from_population()

## XCS本体(XCS)

In [230]:
class XCS:
    def __init__(self, env: Environment, N=100, beta=0.1, alpha=0.1, eps_0=0.01, nu=5, 
                 gamma=0.71, theta_ga=25, chi=0.5, mu=0.01, theta_del=20,
                 delta=0.1, theta_sub=20, P_s=0.33, p_I=np.finfo(np.float32).eps,
                 e_I=np.finfo(np.float32).eps, f_I=np.finfo(np.float32).eps,
                 p_explr=0.5, theta_mna=1, do_ga_subsumption=False,
                 do_actionset_subsumption=False):
        self.N = N
        self.beta = beta
        self.alpha = alpha
        self.eps_0 = eps_0
        self.nu = nu
        self.gamma = gamma
        self.theta_ga = theta_ga
        self.chi = chi
        self.mu = mu
        self.theta_del = theta_del
        self.delta = delta
        self.theta_sub = theta_sub
        self.P_s = P_s
        self.p_explr = p_explr
        self.theta_mna = theta_mna
        self.do_ga_subsumption = do_ga_subsumption
        self.do_actionset_subsumption = do_actionset_subsumption
        Classifier.p_I = p_I
        Classifier.e_I = e_I
        Classifier.f_I = f_I
        
        self.env = env
        self.rp = QlearnLikeRLComponent(theta_mna, P_s, p_explr, alpha, beta, eps_0,
                                        nu, theta_sub, do_actionset_subsumption)
        self.ga = SimpleGAComponent(theta_ga, chi, mu, do_ga_subsumption)
        
        self.t = 0
        self.t_end = env.max_iter
        
        self.Pop = Population(N, len(env[0]), 1, theta_del, delta, empty=True)
        
    def run_experiment(self):
        before_rho = 0
        before_A = None
        before_sigma = None
        
        while True:
            sigma = self.env.get_situation(self.t)
            M = MatchSet(self.Pop, sigma, self.theta_mna, self.P_s, time=self.t)
            PA = PredictArray(M)
            act = PA.select_action(self.p_explr)
            A = ActionSet(M, act)
            rho = self.env.exec_action(self.t, act)
                        
            if(before_A is not None):
                P = before_rho + self.gamma * max(PA)
                self.rp.parameter_update(before_A, P, self.Pop)
                self.ga.run_evolve(before_A, before_sigma, self.Pop)
            
            if(self.t >= self.t_end - 1):
                P = rho
                self.rp.parameter_update(A, P, self.Pop)
                self.ga.run_evolve(A, sigma, self.Pop)
            else:
                before_A = copy.deepcopy(A)
                before_rho = rho
                before_sigma = sigma
            
            print(f"{self.t}: {self.Pop[:]}")

            if self.t >= self.t_end - 1:
                break
            else:
                self.t += 1

In [231]:
mux_env = MuxProblemEnvironment(N_addr=2, max_iter=1000)

In [232]:
xcs = XCS(mux_env, N=100)

In [233]:
xcs.run_experiment()

0: [{'condition': array([0, 2, 2, 2, 2, 2], dtype=uint8), 'action': array([ True]), 'prediction': 1.1920929e-07, 'error': 1.1920929e-07, 'fitness': 1.1920929e-07, 'experience': 0, 'time_stamp': 0, 'act_size': 1, 'numeriosity': 1}]
1: [{'condition': array([0, 2, 2, 2, 2, 2], dtype=uint8), 'action': array([ True]), 'prediction': 1.1920929e-07, 'error': 1.1920929e-07, 'fitness': 1.1920929e-07, 'experience': 0, 'time_stamp': 0, 'act_size': 1, 'numeriosity': 1}, {'condition': array([1, 1, 1, 0, 1, 2], dtype=uint8), 'action': array([ True]), 'prediction': 1.1920929e-07, 'error': 1.1920929e-07, 'fitness': 1.1920929e-07, 'experience': 0, 'time_stamp': 1, 'act_size': 1, 'numeriosity': 1}]
2: [{'condition': array([0, 2, 2, 2, 2, 2], dtype=uint8), 'action': array([ True]), 'prediction': 1.1920929e-07, 'error': 1.1920929e-07, 'fitness': 1.1920929e-07, 'experience': 0, 'time_stamp': 0, 'act_size': 1, 'numeriosity': 1}, {'condition': array([1, 1, 1, 0, 1, 2], dtype=uint8), 'action': array([ True]), 

In [234]:
xcs.Pop[:]

[{'condition': array([0, 2, 2, 2, 2, 2], dtype=uint8), 'action': array([ True]), 'prediction': 1000.0, 'error': 0.0, 'fitness': 0.1000001072883606, 'experience': 1, 'time_stamp': 0, 'act_size': 1.0, 'numeriosity': 1},
 {'condition': array([1, 1, 1, 0, 1, 2], dtype=uint8), 'action': array([ True]), 'prediction': 1.1920929e-07, 'error': 1.1920929e-07, 'fitness': 1.1920929e-07, 'experience': 0, 'time_stamp': 1, 'act_size': 1, 'numeriosity': 1}]