In [65]:
import numpy as np

# MDPの構築
from typing import NamedTuple ,Optional
np.random.seed(10)

S = 30# 状態数
A = 3 # 行動数
S_set = np.arange(S)
A_set = np.arange(A)
gamma = 0.9 # 割引率

# rew = np.random.uniform(0,1,size=(S,A)) # 報酬
# rew = np.array(rew)

# H = int (1/(1-gamma) + 20)
H = 10
rew = np.ones((H, S, A))

# ベルヌーイ分布からマスクを生成（p=0.5）
zero_mask = np.random.binomial(1, 0.5, size=rew.shape)

# 要素ごとの乗算
rew = rew * zero_mask

# print(rew)
# P = np.random.rand(S,A,S) # 遷移確率
# P = P.reshape(S*A,S)
# P = P/np.sum(P,axis=1,keepdims=True) # 正規化
# P = P.reshape(S,A,S)
# np.testing.assert_allclose(P.sum(axis=-1), 1, atol=1e-6)

P = np.random.dirichlet(alpha=np.array([0.1] * S), size=(H, S * A))

# 配列の形状を変更
P = P.reshape(H, S, A, S)

# 合計が1になることを確認
np.testing.assert_allclose(P.sum(axis=-1), 1, atol=1e-6)

class MDP(NamedTuple):
    S_set: np.ndarray
    A_set: np.ndarray
    rew: np.ndarray
    P: np.ndarray
    gamma: float
    H: int
    K:int


    optimal_V: Optional[np.ndarray] = None

    @property
    def S(self):
        return len(self.S_set)

    @property
    def A(self):
        return len(self.A_set)
mdp = MDP(S_set,A_set,rew,P,gamma,H,K=50)

In [66]:
def sampler(mdp: MDP, policy,s,h):
    """状態sで行動aをとり、次の状態と報酬を返す"""
    a = policy[h,s]
    a = int(a)
    # print(a)
    next_s = np.random.choice(mdp.S_set, p=mdp.P[h,s,a])
    rew = mdp.rew[h,s, a]
    return next_s, rew,a

In [67]:
def feature_func(n:int,delta:float,mdp:MDP):
    if n == 0:
        return 1
    return np.min([1,np.sqrt((0.52/n) * (1.4*np.log(np.log(np.max([np.e,n]))) + np.log(26*mdp.S*mdp.A*(mdp.H+1+mdp.S)/delta)))])

In [68]:
def V_h_max(h:int,mdp:MDP):
    # return (mdp.H-h+1)
    return (mdp.H - h)

In [69]:
def V_std(P,V):
    # return np.sqrt(np.sum(P*V**2,axis=-1) - np.sum(P*V,axis=-1)**2)
    # return np.sqrt(np.sum(P*(V-P*V)**2,axis=-1))
    # std = np.sqrt(np.sum(P*V**2,axis=-1) - np.sum(2*(P**2) * V**2,axis=-1) + np.sum(P**3 * V**2,axis=-1))
    PV = np.sum(P*V,axis=-1)
    std = np.sqrt(np.sum(P * (V-PV)**2,axis=-1))
    return std

In [70]:
def vector_1_norm(V1, V2):
    """
    Calculate the 1-norm of the difference between two vectors V1 and V2.

    :param V1: First vector.
    :param V2: Second vector.
    :return: The 1-norm of the difference between V1 and V2.
    """
    return np.sum(np.abs(V1 - V2))

In [71]:

def gzi_upper_(mdp:MDP,h:int,feature_func,n_k,s,a,delta,P_k,V_upper,V_lower):
    gzi1 = (V_h_max(h+1,mdp) + 1) *feature_func(n_k[s,a],delta,mdp)
    gzi2 = (1 + np.sqrt(12) * np.sqrt(V_std(P_k[s,a],V_upper[h+1]) ** 2 +np.sum(P_k[s,a] * (V_upper[h+1] - V_lower[h+1]) ** 2,axis=-1)) * feature_func(n_k[s,a],delta,mdp)) + 8.13*V_h_max(h+1,mdp) * feature_func(n_k[s,a],delta,mdp) ** 2
    gzi3 = (1 + np.sqrt(12) * V_std(P_k[s,a],V_upper[h+1])) * feature_func(n_k[s,a],delta,mdp) + 1/mdp.H * np.sum(P_k[s,a] * (V_upper[h+1] - V_lower[h+1])) + (20.13 * mdp.H * vector_1_norm(V_upper[h+1],V_lower[h+1]) )* feature_func(n_k[s,a],delta,mdp) ** 2
    gzi = np.min([gzi1,gzi2,gzi3])
    # if h==0:
        
        # print(gzi1)
        # print(gzi2)
        # print(gzi3)
        # print(gzi)
        # # print(V_std(P_k[s,a],V_upper[h+1]))
        # # print((20.13 * mdp.H * vector_1_norm(V_upper[h+1],V_lower[h+1]) * feature_func(n_k[s,a],delta,mdp)) ** 2)?
        # print('---')
    return gzi

In [72]:
def gzi_lower_(mdp:MDP,h:int,feature_func,n_k,s,a,delta,P_k,V_upper,V_lower):
    gzi1 = (2 * np.sqrt(mdp.S)*V_h_max(h+1,mdp) + 1) *feature_func(n_k[s,a],delta,mdp)
    
    gzi2 = (np.sqrt(12)* np.sqrt(V_std(P_k[s,a],V_upper[h+1]) ** 2 +np.sum(P_k[s,a] * (V_upper[h+1] - V_lower[h+1]) ** 2,axis=-1))  + 1 + 2 * np.sum(np.sqrt(P_k[s,a]) * (V_upper[h+1] - V_lower[h+1])))*feature_func(n_k[s,a],delta,mdp) + (8.13*V_h_max(h+1,mdp) + 4.66 * vector_1_norm(V_upper[h+1],V_lower[h+1])) * feature_func(n_k[s,a],delta,mdp) ** 2
    
    gzi3 = (1 + np.sqrt(12) * V_std(P_k[s,a],V_upper[h+1])) * feature_func(n_k[s,a],delta,mdp) + 1/mdp.H * np.sum(P_k[s,a] * (V_upper[h+1] - V_lower[h+1])) + (8.13 * V_h_max(h+1,mdp)+(32*mdp.H + 4.66)* vector_1_norm(V_upper[h+1],V_lower[h+1]))* feature_func(n_k[s,a],delta,mdp) ** 2
    
    gzi4 = (V_h_max(h+1,mdp)+1 + 2 *np.sum(np.sqrt(P_k[s,a]) * (V_upper[h+1] - V_lower[h+1]))) * feature_func(n_k[s,a],delta,mdp) + 4.66 * vector_1_norm(V_upper[h+1],V_lower[h+1]) * feature_func(n_k[s,a],delta,mdp) ** 2
    gzi = np.min([gzi1,gzi2,gzi3,gzi4])
    # print(gzi1)
    # print(gzi2)
    # print(gzi3)
    # print(gzi)
    # print('---')
    return gzi

In [73]:
def ORLC(mdp:MDP,delta:float):
    np.random.seed(10)
    n_k = np.zeros((mdp.S,mdp.A))
    n_k_p = np.zeros((mdp.S,mdp.A,mdp.S))
    r_hat_k = np.zeros((mdp.S,mdp.A))
    r_k = np.zeros((mdp.S,mdp.A))
    P_k = np.ones((mdp.S,mdp.A,mdp.S))/ mdp.S
    V_lower = np.zeros((mdp.H+1,mdp.S))
    Q_lower = np.zeros((mdp.H,mdp.S,mdp.A))
    V_upper = np.zeros((mdp.H+1,mdp.S))
    Q_upper = np.zeros((mdp.H,mdp.S,mdp.A))
    gzi = np.zeros((mdp.H,mdp.S,mdp.A))
    policy = np.zeros((mdp.H,mdp.S))
    epsilon = np.zeros(mdp.K)
    experience = []
    V_lower[mdp.H] = 0
    V_upper[mdp.H] = 0
    for k in range(mdp.K):
        for h in reversed(range(mdp.H)):
            for s in range(mdp.S):
                for a in range(mdp.A):
                    if h + 1 == mdp.H:
                        # gzi[h,s,a] = 45*mdp.S*mdp.H**2 * feature_func(n_k[s,a],delta,mdp) **2+ 1/mdp.H *np.sum(P_k[s,a]*(V_upper[h+1] - V_lower[h+1])) + (1 + np.sqrt(12) * V_std(P_k[s,a],V_upper[h+1]) * feature_func(n_k[s,a],delta,mdp))
                        gzi_upper = gzi_upper_(mdp,h,feature_func,n_k,s,a,delta,P_k,V_upper,V_lower)
                        # print(V_upper[h+1])
                        gzi_lower = gzi_lower_(mdp,h,feature_func,n_k,s,a,delta,P_k,V_upper,V_lower)
                        Q_upper[h,s,a] = np.max([0,r_k[s,a] + np.sum(P_k[s,a,:]*V_upper[h+1,:]) + gzi_upper])
                        Q_upper[h,s,a] = np.min([V_h_max(h,mdp),Q_upper[h,s,a]])
                        Q_lower[h,s,a] = np.max([0,r_k[s,a] + np.sum(P_k[s,a]*V_lower[h+1]) - gzi_lower])
                        Q_lower[h,s,a] = np.min([V_h_max(h,mdp),Q_lower[h,s,a]])
                    else:
                        # gzi[h,s,a] = (1 + np.sqrt(12)*V_std(P_k[s,a],V_upper[h+1]) * feature_func(n_k[s,a],delta,mdp)) + 45*mdp.S*mdp.H**2 * feature_func(n_k[s,a],delta,mdp) **2 + 1/mdp.H *np.sum(P_k[s,a]*(V_upper[h+1] - V_lower[h+1]))
                        gzi_lower = gzi_lower_(mdp,h,feature_func,n_k,s,a,delta,P_k,V_upper,V_lower)
                        gzi_upper = gzi_upper_(mdp,h,feature_func,n_k,s,a,delta,P_k,V_upper,V_lower)
                        Q_upper[h,s,a] = np.max([0,r_k[s,a] + np.sum(P_k[s,a]*V_upper[h+1]) + gzi_upper])
                        Q_upper[h,s,a] = np.min([V_h_max(h,mdp),Q_upper[h,s,a]])
                        Q_lower[h,s,a] = np.max([0,r_k[s,a] + np.sum(P_k[s,a]*V_lower[h+1]) - gzi_lower])
                        Q_lower[h,s,a] = np.min([V_h_max(h,mdp),Q_lower[h,s,a]])
                        if h == 0:
                            print(gzi_lower)
                            print(V_lower[0])

                max_value = np.max(Q_upper[h,s],axis=-1)
                max_indeices = [i for i,value in enumerate(Q_upper[h,s]) if value == max_value]
                a_ = np.random.choice(max_indeices)    
                policy[h,s] = a_
                V_upper[h,s] = Q_upper[h,s,a_]
                V_lower[h,s] = Q_lower[h,s,a_]
        epsilon[k] = np.abs(V_upper[0,0] - V_lower[0,0])
        s = 0
        # experience = []
        for h in range(mdp.H):
            s_dash,rew,a = sampler(mdp,policy,s,h)
            # experience.append([s,int(policy[h,s]),rew,s_dash])
            print('----------------------------')
            n_k[s, a] += 1
            n_k_p[s, a, s_dash] += 1
            r_hat_k[s, a] += rew
            r_k[s,a] = r_hat_k[s,a] / n_k[s,a]
                # P_k[s, a, s_dash] = n_k_p[s, a, s_dash] / n_k[s, a]
            P_k[s,a,:] = n_k_p[s,a,:] / n_k[s,a]
            s=s_dash
    return policy,epsilon,Q_upper,Q_lower,V_upper,V_lower,r_k,P_k

In [74]:
policy,epsilon,Qu,Ql,Vu,Vl,r,P_ = ORLC(mdp,0.1)

99.5900603509299
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0.]
99.5900603509299
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0.]
99.5900603509299
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0.]
99.5900603509299
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0.]
99.5900603509299
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0.]
99.5900603509299
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0.]
99.5900603509299
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0.]
99.5900603509299
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0.]
99.5900603509299
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0.]
99.5900603

In [75]:
epsilon

array([10., 10., 10., 10., 10., 10., 10., 10., 10., 10., 10., 10., 10.,
       10., 10., 10., 10., 10., 10., 10., 10., 10., 10., 10., 10., 10.,
       10., 10., 10., 10., 10., 10., 10., 10., 10., 10., 10., 10., 10.,
       10., 10., 10., 10., 10., 10., 10., 10., 10., 10., 10.])

In [76]:
Qu

array([[[10.        , 10.        , 10.        ],
        [10.        , 10.        , 10.        ],
        [10.        , 10.        , 10.        ],
        [10.        , 10.        , 10.        ],
        [10.        , 10.        , 10.        ],
        [10.        , 10.        , 10.        ],
        [10.        , 10.        , 10.        ],
        [10.        , 10.        , 10.        ],
        [10.        , 10.        , 10.        ],
        [10.        , 10.        , 10.        ],
        [10.        , 10.        , 10.        ],
        [10.        , 10.        , 10.        ],
        [10.        , 10.        , 10.        ],
        [10.        , 10.        , 10.        ],
        [10.        , 10.        , 10.        ],
        [10.        , 10.        , 10.        ],
        [10.        , 10.        , 10.        ],
        [10.        , 10.        , 10.        ],
        [10.        , 10.        , 10.        ],
        [10.        , 10.        , 10.        ],
        [10.        

In [77]:
r

array([[0.8       , 0.        , 0.79166667],
       [0.33333333, 1.        , 0.        ],
       [0.66666667, 0.4       , 0.66666667],
       [1.        , 0.75      , 0.33333333],
       [1.        , 0.25      , 0.77777778],
       [0.25      , 0.66666667, 0.28571429],
       [0.22222222, 0.5       , 0.14285714],
       [0.4       , 0.55555556, 0.        ],
       [0.5       , 0.75      , 0.6       ],
       [0.28571429, 0.33333333, 0.        ],
       [0.25      , 0.4       , 0.5       ],
       [0.44444444, 0.5       , 0.6       ],
       [1.        , 0.5       , 1.        ],
       [0.2       , 0.33333333, 1.        ],
       [0.        , 0.66666667, 0.5       ],
       [0.2       , 0.5       , 0.75      ],
       [0.33333333, 0.8       , 0.8       ],
       [0.33333333, 0.5       , 1.        ],
       [0.83333333, 0.5       , 0.        ],
       [0.5       , 0.4       , 0.125     ],
       [0.25      , 0.5       , 0.83333333],
       [0.75      , 0.        , 0.8       ],
       [0.

In [78]:
mdp.P

array([[[[4.81051673e-06, 7.75286312e-05, 2.62129074e-06, ...,
          1.25069000e-04, 1.04250724e-05, 1.16093977e-02],
         [1.71824552e-07, 1.75036749e-02, 5.72568846e-03, ...,
          4.11837743e-06, 1.85444501e-03, 1.88405518e-04],
         [2.31418203e-05, 1.11947894e-07, 4.21689917e-09, ...,
          3.39797717e-03, 1.70315237e-04, 1.23950425e-13]],

        [[1.84899429e-08, 2.38587757e-10, 4.99336883e-01, ...,
          1.69272904e-06, 5.99941851e-05, 3.03745058e-02],
         [7.98479758e-03, 9.18024072e-04, 1.67237983e-07, ...,
          3.97824288e-04, 9.43398671e-02, 1.01587741e-01],
         [1.08144966e-02, 8.96963396e-03, 1.89786791e-04, ...,
          5.63501619e-02, 4.02443119e-05, 5.98995085e-05]],

        [[4.49636107e-04, 1.94099419e-01, 1.85516451e-07, ...,
          4.46434025e-09, 1.89097980e-03, 3.21503766e-10],
         [1.45958633e-02, 1.41149345e-07, 1.86794317e-04, ...,
          8.59362907e-07, 1.89458889e-02, 3.91058484e-05],
         [4.94857558

In [79]:
P_

array([[[0.        , 0.        , 0.        , ..., 0.        ,
         0.        , 0.        ],
        [0.10526316, 0.        , 0.05263158, ..., 0.05263158,
         0.        , 0.        ],
        [0.        , 0.        , 0.08333333, ..., 0.        ,
         0.        , 0.04166667]],

       [[0.16666667, 0.        , 0.        , ..., 0.        ,
         0.        , 0.        ],
        [0.        , 0.        , 0.        , ..., 0.        ,
         0.        , 0.        ],
        [0.03333333, 0.03333333, 0.03333333, ..., 0.03333333,
         0.03333333, 0.03333333]],

       [[0.        , 0.        , 0.        , ..., 0.        ,
         0.        , 0.        ],
        [0.        , 0.        , 0.        , ..., 0.        ,
         0.        , 0.        ],
        [0.        , 0.        , 0.        , ..., 0.16666667,
         0.        , 0.        ]],

       ...,

       [[0.        , 0.        , 0.        , ..., 0.        ,
         0.        , 0.        ],
        [0.        , 

In [46]:
policy[-1]

array([0., 1., 0.])

In [80]:
# def Q_iteration(mdp:MDP):
#     Q = np.zeros((mdp.S,mdp.A))
#     for s in range(mdp.S):
#         for a in range(mdp.A):
#             Q[s,a] = mdp.rew[s,a] + mdp.gamma * np.sum(mdp.P[s,a,:] * np.max(Q,axis=-1))
#     return Q
# Q = Q_iteration(mdp)
# a_index = np.argmax(Q,axis=-1)
def Q_iteration(mdp:MDP):
    Q = np.zeros((mdp.H+1,mdp.S,mdp.A))
    for h in reversed(range(mdp.H)):
        for s in range(mdp.S):
            for a in range(mdp.A):
                V = np.max(Q[h+1],axis=-1)
                Q[h,s,a] = mdp.rew[h,s,a] + mdp.gamma * np.sum(mdp.P[h,s,a,:] * V)
                
    return Q

Q = Q_iteration(mdp)
a_index = np.argmax(Q,axis=-1)
a_index


array([[2, 0, 1, 1, 2, 0, 0, 2, 2, 0, 2, 2, 1, 2, 1, 2, 2, 1, 1, 1, 1, 0,
        0, 0, 1, 1, 2, 0, 1, 1],
       [1, 0, 0, 2, 2, 0, 1, 1, 1, 1, 1, 0, 2, 2, 2, 1, 1, 1, 1, 1, 2, 0,
        1, 0, 2, 0, 2, 0, 2, 2],
       [1, 1, 2, 2, 2, 2, 2, 0, 2, 1, 1, 1, 2, 2, 2, 2, 1, 0, 0, 1, 2, 2,
        2, 1, 2, 2, 0, 2, 0, 1],
       [1, 0, 0, 1, 1, 1, 1, 2, 2, 0, 0, 0, 2, 0, 0, 1, 2, 2, 2, 0, 0, 0,
        2, 1, 1, 0, 0, 0, 1, 0],
       [2, 0, 0, 2, 1, 0, 1, 1, 0, 1, 1, 2, 1, 2, 0, 2, 1, 2, 0, 0, 1, 2,
        0, 0, 0, 1, 1, 2, 0, 2],
       [0, 1, 1, 2, 1, 2, 1, 1, 2, 0, 0, 2, 2, 0, 1, 1, 0, 0, 1, 1, 1, 0,
        1, 2, 2, 1, 0, 0, 2, 2],
       [1, 2, 0, 2, 2, 1, 0, 0, 1, 2, 2, 1, 0, 2, 1, 2, 1, 0, 1, 0, 2, 1,
        0, 0, 1, 2, 2, 0, 0, 2],
       [0, 1, 0, 0, 2, 0, 0, 0, 1, 2, 1, 2, 2, 2, 2, 1, 2, 0, 0, 0, 0, 0,
        1, 2, 1, 2, 0, 1, 1, 0],
       [0, 0, 1, 0, 0, 2, 2, 1, 0, 2, 2, 0, 0, 2, 0, 1, 2, 1, 0, 1, 0, 0,
        0, 0, 0, 1, 1, 2, 2, 0],
       [2, 2, 0, 0, 0, 0, 0, 1, 0, 0,

In [81]:
Q

array([[[6.24945648, 5.24843973, 6.25983738],
        [6.22586948, 5.2383771 , 5.25335256],
        [5.25091101, 6.22052352, 5.19658387],
        [5.24178956, 6.26416057, 6.23546964],
        [5.21375029, 6.21143207, 6.25614809],
        [6.21370885, 6.20153014, 5.25922867],
        [6.23530993, 6.21649403, 6.2168783 ],
        [5.16060671, 5.20051104, 6.23075716],
        [5.1409209 , 5.09471867, 6.25275483],
        [6.26470153, 6.08612418, 5.98192845],
        [5.9522084 , 6.14889305, 6.23816574],
        [5.13211161, 5.09713053, 5.17397226],
        [4.77285872, 6.02087421, 5.18372555],
        [6.19839633, 6.08991794, 6.24178235],
        [4.80948796, 6.26633647, 5.26247668],
        [6.25478713, 4.8230703 , 6.27393761],
        [4.94081918, 5.24221892, 5.2570342 ],
        [5.22504345, 6.17292942, 4.70717549],
        [4.91120123, 6.24879935, 6.24173708],
        [5.25326027, 6.2509271 , 5.27228592],
        [6.25127325, 6.26663771, 6.24096462],
        [6.27952262, 5.19567966, 5

In [82]:
Q

array([[[6.24945648, 5.24843973, 6.25983738],
        [6.22586948, 5.2383771 , 5.25335256],
        [5.25091101, 6.22052352, 5.19658387],
        [5.24178956, 6.26416057, 6.23546964],
        [5.21375029, 6.21143207, 6.25614809],
        [6.21370885, 6.20153014, 5.25922867],
        [6.23530993, 6.21649403, 6.2168783 ],
        [5.16060671, 5.20051104, 6.23075716],
        [5.1409209 , 5.09471867, 6.25275483],
        [6.26470153, 6.08612418, 5.98192845],
        [5.9522084 , 6.14889305, 6.23816574],
        [5.13211161, 5.09713053, 5.17397226],
        [4.77285872, 6.02087421, 5.18372555],
        [6.19839633, 6.08991794, 6.24178235],
        [4.80948796, 6.26633647, 5.26247668],
        [6.25478713, 4.8230703 , 6.27393761],
        [4.94081918, 5.24221892, 5.2570342 ],
        [5.22504345, 6.17292942, 4.70717549],
        [4.91120123, 6.24879935, 6.24173708],
        [5.25326027, 6.2509271 , 5.27228592],
        [6.25127325, 6.26663771, 6.24096462],
        [6.27952262, 5.19567966, 5

In [83]:
Qu

array([[[10.        , 10.        , 10.        ],
        [10.        , 10.        , 10.        ],
        [10.        , 10.        , 10.        ],
        [10.        , 10.        , 10.        ],
        [10.        , 10.        , 10.        ],
        [10.        , 10.        , 10.        ],
        [10.        , 10.        , 10.        ],
        [10.        , 10.        , 10.        ],
        [10.        , 10.        , 10.        ],
        [10.        , 10.        , 10.        ],
        [10.        , 10.        , 10.        ],
        [10.        , 10.        , 10.        ],
        [10.        , 10.        , 10.        ],
        [10.        , 10.        , 10.        ],
        [10.        , 10.        , 10.        ],
        [10.        , 10.        , 10.        ],
        [10.        , 10.        , 10.        ],
        [10.        , 10.        , 10.        ],
        [10.        , 10.        , 10.        ],
        [10.        , 10.        , 10.        ],
        [10.        

In [86]:
Qu[9]

array([[1.        , 0.64680158, 1.        ],
       [1.        , 1.        , 1.        ],
       [1.        , 1.        , 1.        ],
       [1.        , 1.        , 1.        ],
       [1.        , 1.        , 1.        ],
       [1.        , 1.        , 1.        ],
       [1.        , 1.        , 1.        ],
       [1.        , 1.        , 0.92709768],
       [1.        , 1.        , 1.        ],
       [1.        , 1.        , 1.        ],
       [1.        , 1.        , 1.        ],
       [1.        , 1.        , 1.        ],
       [1.        , 1.        , 1.        ],
       [1.        , 1.        , 1.        ],
       [1.        , 1.        , 1.        ],
       [1.        , 1.        , 1.        ],
       [1.        , 1.        , 1.        ],
       [1.        , 1.        , 1.        ],
       [1.        , 1.        , 1.        ],
       [1.        , 1.        , 1.        ],
       [1.        , 1.        , 1.        ],
       [1.        , 1.        , 1.        ],
       [1.

In [87]:
Ql[9]

array([[0.25582876, 0.        , 0.19304505],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.

In [88]:
# epsilon[49]
Q[9]

array([[0., 0., 1.],
       [0., 0., 1.],
       [1., 0., 1.],
       [1., 1., 0.],
       [1., 1., 1.],
       [0., 0., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [1., 1., 0.],
       [0., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [1., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [1., 0., 1.],
       [1., 0., 0.],
       [1., 1., 1.],
       [0., 0., 0.],
       [0., 0., 1.],
       [0., 1., 1.],
       [0., 0., 0.],
       [0., 1., 0.],
       [0., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [0., 0., 0.],
       [0., 0., 0.],
       [1., 1., 0.],
       [1., 1., 0.]])