In [23]:
# import necessary packages
import gym
import numpy as np

env = gym.make("CartPole-v1")

def relu(mat):
    return np.multiply(mat, (mat > 0))

def relu_derivative(mat):
    return (mat > 0) * 1


class NNLayer:
    def __init__(self, input_size, output_size, activation=None, lr=0.001):
        self.input_size = input_size
        self.output_size = output_size

        self.weights = np.random.uniform(low=-0.5, high=0.5, size=(input_size, output_size))  # initialize weights
        self.activation_function = activation

        self.lr = lr

    def forward(self, inputs, remember_for_backprop=True):
        input_with_bias = np.append(inputs, 1)
        unactivated = None
        unactivated = np.dot(input_with_bias, self.weights)
        output = unactivated
        if self.activation_function != None:
            output = self.activation_function(output)
        if remember_for_backprop:
            self.backward_store_in = input_with_bias
            self.backward_store_out = np.copy(unactivated)
        return output

    def update_weights(self, gradient, gamma, cumulated_reward, t):
        self.weights += self.lr * (gamma ** t) * cumulated_reward * gradient


    def backward(self, gradient_from_above, gamma, cumulated_return, t):
        adjusted_mul = gradient_from_above
        if self.activation_function == None:
            adjusted_mul = np.multiply(relu_derivative(self.backward_store_out), gradient_from_above)
        D_i = np.dot(np.transpose(np.reshape(self.backward_store_in, (1, len(self.backward_store_in)))),
                     np.reshape(adjusted_mul, (1, len(adjusted_mul))))
        delta_i = np.dot(adjusted_mul, np.transpose(self.weights))[:-1]
        self.update_weights(D_i, gamma, cumulated_return, t)
        return delta_i


class RLAgent:
    env = None
    def __init__(self, env, num_hidden_layers=2, hidden_size=24, gamma=0.95):
        self.env = env
        self.hidden_size = hidden_size
        self.input_size = env.observation_space.shape[0]
        self.output_size = env.action_space.n
        self.num_hidden_layers = num_hidden_layers
        self.gamma = gamma
        self.layers = [
            NNLayer(self.input_size + 1, self.hidden_size, activation=relu),
            NNLayer(self.hidden_size + 1, self.hidden_size, activation=relu),
            NNLayer(self.hidden_size + 1, self.output_size)
        ]

    def select_action(self, observation):
        values = self.forward(np.asmatrix(observation), False)
        action_prob = np.exp(values - np.max(values)) / np.sum(np.exp(values - np.max(values)))
        random = np.random.random()
        print('random:', random)
        print('action prob:', action_prob[0])
        if  random < action_prob[0]:
            return 0
        return 1

    def get_returns(self, rewards_list):
        returns = []
        G = 0
        for r in reversed(rewards_list):
            G = r + self.gamma * G
            returns.insert(0, G)
        return returns

    def forward(self, observation, remember_for_backprop=True):
        vals = np.copy(observation)
        index = 0
        for layer in self.layers:
            vals = layer.forward(vals, remember_for_backprop)
            index = index + 1
        return vals

    def backward(self, observation, action, cumulated_return, t):
        values = self.forward(np.asmatrix(observation), True)
        delta = np.zeros_like(values)
        delta[action] = 1 / values[action]

        for layer in reversed(self.layers):
            delta = layer.backward(delta, self.gamma, cumulated_return, t)




NUM_EPISODES = 10_000
MAX_TIMESTEPS = 1_000
GAMMA = 0.95
hidden_layer_size = 64
num_hidden_layers = 2

model = RLAgent(env, num_hidden_layers, hidden_layer_size, GAMMA)

for i_episode in range(NUM_EPISODES):
    rewards_list = []
    actions_list = []
    states_list = []
    observation, _ = env.reset()
    
    states_list.append(observation)
    
    for t in range(MAX_TIMESTEPS):
        action = model.select_action(observation)
        print(action)
        prev_obs = observation
        observation, reward, terminated, truncated, info = env.step(action)
        done = terminated or truncated
        rewards_list.append(reward)
        actions_list.append(action)
        states_list.append(observation)
        if done:
            #print('Episode {} ended with total reward of {}'.format(i_episode, t))
            break
        
    G_t = model.get_returns(rewards_list)
    for i in range(len(G_t)):
        prev_obs, observation, action, reward = states_list[i], states_list[i + 1], actions_list[i], G_t[i]
        model.backward(prev_obs, action, reward, i + 1)


observation = env.reset()
for t in range(MAX_TIMESTEPS):
    action = model.select_action(observation)
    prev_obs = observation
    observation, reward, done, info = env.step(action)
    if done:
        break

print("Done")


random: 0.01130711283272523
action prob: 0.15168612797503764
0
random: 0.8859568431775824
action prob: 0.1360436814827645
1
random: 0.012308870268087801
action prob: 0.1492368206910897
0
random: 0.9777168448655091
action prob: 0.13777151376185864
1
random: 0.9241482533093439
action prob: 0.1465839206555217
1
random: 0.7196666972402866
action prob: 0.17101488975664464
0
random: 0.4718892164610998
action prob: 0.14544676653224442
0
random: 0.3758221021520297
action prob: 0.1404045594870398
0
random: 0.8387446930920048
action prob: 0.13526321589155585
0
random: 0.3736454302961961
action prob: 0.11763928087245934
0
random: 0.8780829143518445
action prob: 0.11009578146621543
0
random: 0.28163845272138954
action prob: 0.09910746588345253
0
random: 0.07327225132947324
action prob: 0.09161296395517342
0
random: 0.704557995335697
action prob: 0.08554478761061249
0
random: 0.2335126359674382
action prob: 0.075335804514229
0
random: 0.19586270082797985
action prob: 0.06618336324775914
0
random: 0

random: 0.531330036220612
action prob: 5.533613761416837e-83
0
random: 0.7397211709437105
action prob: 2.1512906951283297e-90
0
random: 0.6122380820909994
action prob: 9.153713831900452e-33
0
random: 0.7362918922034248
action prob: 1.674755836880618e-39
0
random: 0.33667808107842045
action prob: 2.801797597900793e-46
0
random: 0.4227296826689805
action prob: 4.676555201183866e-53
0
random: 0.885016959881514
action prob: 6.235450564651088e-60
0
random: 0.6893633059046581
action prob: 7.336190743668634e-67
0
random: 0.06282629402376061
action prob: 6.895521342074033e-74
0
random: 0.9276510429840774
action prob: 4.759558021536279e-81
0
random: 0.9923118477751226
action prob: 2.2766187519863014e-88
0
random: 0.7103704493067438
action prob: 7.521381084921146e-96
0
random: 0.6880876514956303
action prob: 6.205281334145134e-34
0
random: 0.4911178478951457
action prob: 7.638906378194942e-41
0
random: 0.5618841338241574
action prob: 8.497969824578506e-48
0
random: 0.2119354052624819
action prob

random: 0.32267840326100006
action prob: 5.644815675984411e-82
0
random: 0.43337635153065013
action prob: 3.1333256370355994e-99
0
random: 0.4367727292665017
action prob: 1.2573412736072859e-116
0
random: 0.13683444331864325
action prob: 3.7426629722097196e-134
0
random: 0.5090412675516945
action prob: 7.179067286446469e-152
0
random: 0.11681980470635345
action prob: 7.734245451492846e-170
0
random: 0.5045731685644785
action prob: 4.341944593468028e-188
0
random: 0.08002281804596634
action prob: 1.0704520907529925e-206
0
random: 0.3820684471520154
action prob: 1.0349152294742846e-225
0
random: 0.9040236659659994
action prob: 3.660875413412071e-245
0
random: 0.49237848982749965
action prob: 1.1824718866164429e-77
0
random: 0.8686380871195636
action prob: 9.845368132462453e-95
0
random: 0.5246518776215207
action prob: 6.421517158897918e-112
0
random: 0.06553832053415454
action prob: 3.119683258678732e-129
0
random: 0.5933224617853026
action prob: 9.996194131603487e-147
0
random: 0.273871

random: 0.2446075951604889
action prob: 6.231419802825703e-212
0
random: 0.8867358975014081
action prob: 1.4159161345073958e-244
0
random: 0.18427635169370815
action prob: 1.4656151333981368e-277
0
random: 0.05674535579440931
action prob: 4.958550568427e-311
0
random: 0.531923186862891
action prob: 0.0
0
random: 0.7362064143167232
action prob: 0.0
0
random: 0.4464289657347127
action prob: 0.0
0
random: 0.09843823946801844
action prob: 0.0
0
random: 0.28324501296044713
action prob: 1.2865372957188166e-140
0
random: 0.709434636637556
action prob: 2.6038744591386587e-174
0
random: 0.1322796935157352
action prob: 2.9281456480580937e-208
0
random: 0.9566612947134071
action prob: 1.965086477472483e-242
0
random: 0.004563570261306715
action prob: 6.561173273233821e-277
0
random: 0.18848722706433352
action prob: 7.823295848124e-312
0
random: 0.18858731769206338
action prob: 0.0
0
random: 0.1813274948108068
action prob: 0.0
0
random: 0.4922520151918435
action prob: 0.0
0
random: 0.5828623596081

random: 0.5349213854500816
action prob: 1.6399733977252557e-218
0
random: 0.2353296084170059
action prob: 8.35213420660951e-271
0
random: 0.4245573762103283
action prob: 2e-323
0
random: 0.6465419513205827
action prob: 0.0
0
random: 0.1268039653923051
action prob: 0.0
0
random: 0.39118108515343253
action prob: 0.0
0
random: 0.6783014163919981
action prob: 0.0
0
random: 0.9174952257605804
action prob: 0.0
0
random: 0.6511630979382157
action prob: 1.734837684427384e-218
0
random: 0.41362629394621775
action prob: 5.879320066216291e-269
0
random: 0.8959565087348166
action prob: 9.3843e-320
0
random: 0.021345254248749557
action prob: 0.0
0
random: 0.35896555924476503
action prob: 0.0
0
random: 0.43418320099652685
action prob: 0.0
0
random: 0.31022617397768393
action prob: 0.0
0
random: 0.47233068322127936
action prob: 0.0
0
random: 0.3738121025786605
action prob: 0.0
0
random: 0.17372493147313817
action prob: 0.0
0
random: 0.6470269591931359
action prob: 2.0014627298331695e-224
0
random: 0.

random: 0.21706504205688326
action prob: 6.248365247687e-312
0
random: 0.47576696111233874
action prob: 0.0
0
random: 0.21295707279717757
action prob: 0.0
0
random: 0.9458015146770656
action prob: 0.0
0
random: 0.46500830567107865
action prob: 0.0
0
random: 0.5739715351273121
action prob: 0.0
0
random: 0.6628278413322937
action prob: 0.0
0
random: 0.025739148198255513
action prob: 0.0
0
random: 0.31983305390557915
action prob: 0.0
0
random: 0.21894849960771157
action prob: 2.6087489896e-314
0
random: 0.939632940029506
action prob: 0.0
0
random: 0.6885772570609601
action prob: 0.0
0
random: 0.7214235655080763
action prob: 0.0
0
random: 0.7466692584564731
action prob: 0.0
0
random: 0.41837495843569084
action prob: 0.0
0
random: 0.37023094962617376
action prob: 0.0
0
random: 0.6852429242012266
action prob: 0.0
0
random: 0.9613542507081914
action prob: 0.0
0
random: 0.6469628258613347
action prob: 1.0937354606985695e-306
0
random: 0.7354472236465649
action prob: 0.0
0
random: 0.39594903671

random: 0.7325284753077315
action prob: 0.0
0
random: 0.3405928863868576
action prob: 0.0
0
random: 0.9557980531730483
action prob: 0.0
0
random: 0.718649523526271
action prob: 0.0
0
random: 0.7066016039619615
action prob: 0.0
0
random: 0.6373205131247719
action prob: 0.0
0
random: 0.9015067720888676
action prob: 0.0
0
random: 0.5357933962651176
action prob: 0.0
0
random: 0.300787664889855
action prob: 0.0
0
random: 0.4280148526434717
action prob: 0.0
0
random: 0.6674957761518354
action prob: 0.0
0
random: 0.5440783767298852
action prob: 0.0
0
random: 0.47411349214610476
action prob: 0.0
0
random: 0.7275703860865048
action prob: 0.0
0
random: 0.1721433019451768
action prob: 0.0
0
random: 0.35110174376897063
action prob: 0.0
0
random: 0.3303153207777084
action prob: 0.0
0
random: 0.36085465324716426
action prob: 0.0
0
random: 0.7173852724186653
action prob: 0.0
0
random: 0.526308717120747
action prob: 0.0
0
random: 0.7319581841765269
action prob: 0.0
0
random: 0.42643947272319704
action

random: 0.37000042734465566
action prob: 0.0
0
random: 0.9777992577519791
action prob: 0.0
0
random: 0.6065991828276399
action prob: 0.0
0
random: 0.538320294913436
action prob: 0.0
0
random: 0.6254651107228201
action prob: 0.0
0
random: 0.9810205486831215
action prob: 0.0
0
random: 0.06907518756735975
action prob: 0.0
0
random: 0.22343055233979559
action prob: 0.0
0
random: 0.8860469659546827
action prob: 0.0
0
random: 0.5858015130136474
action prob: 0.0
0
random: 0.8178554112496246
action prob: 0.0
0
random: 0.9437508256116911
action prob: 0.0
0
random: 0.8335132839441086
action prob: 0.0
0
random: 0.6957385253432402
action prob: 0.0
0
random: 0.13851087173695653
action prob: 0.0
0
random: 0.9957072552041241
action prob: 0.0
0
random: 0.1758837720921409
action prob: 0.0
0
random: 0.7729609381553644
action prob: 0.0
0
random: 0.7025391456570128
action prob: 0.0
0
random: 0.8173499109418138
action prob: 0.0
0
random: 0.8515365511900053
action prob: 0.0
0
random: 0.8055009571185091
acti

random: 0.2182044397229581
action prob: 0.0
0
random: 0.8639551279028694
action prob: 0.0
0
random: 0.5433710327393763
action prob: 0.0
0
random: 0.5310379276559123
action prob: 0.0
0
random: 0.5831111523684602
action prob: 0.0
0
random: 0.16352944059605512
action prob: 0.0
0
random: 0.7749893484326839
action prob: 0.0
0
random: 0.06716457145903365
action prob: 0.0
0
random: 0.8535459577198832
action prob: 0.0
0
random: 0.5654523320028851
action prob: 0.0
0
random: 0.48678339631160705
action prob: 0.0
0
random: 0.6969583637828957
action prob: 0.0
0
random: 0.6858321975042039
action prob: 0.0
0
random: 0.8476096589311212
action prob: 0.0
0
random: 0.26374280635869085
action prob: 0.0
0
random: 0.9816349255159672
action prob: 0.0
0
random: 0.469168609991802
action prob: 0.0
0
random: 0.8279521096213529
action prob: 0.0
0
random: 0.8013860660086277
action prob: 0.0
0
random: 0.24461660480173053
action prob: 0.0
0
random: 0.34952866312122477
action prob: 0.0
0
random: 0.0586027823529357
ac

random: 0.08327328493862829
action prob: 0.0
0
random: 0.06811188571385052
action prob: 0.0
0
random: 0.4893187741886553
action prob: 0.0
0
random: 0.7669812195364648
action prob: 0.0
0
random: 0.7370103150129674
action prob: 0.0
0
random: 0.34470578423842313
action prob: 0.0
0
random: 0.7434964957798347
action prob: 0.0
0
random: 0.6175426894472842
action prob: 0.0
0
random: 0.551007944055443
action prob: 0.0
0
random: 0.5554663882524081
action prob: 0.0
0
random: 0.7455831144970688
action prob: 0.0
0
random: 0.736688324156519
action prob: 0.0
0
random: 0.17395015789223822
action prob: 0.0
0
random: 0.8757349007749805
action prob: 0.0
0
random: 0.40897366484488484
action prob: 0.0
0
random: 0.724657245921332
action prob: 0.0
0
random: 0.5946280251798738
action prob: 0.0
0
random: 0.09069808732220841
action prob: 0.0
0
random: 0.09070955502655242
action prob: 0.0
0
random: 0.55581037510633
action prob: 0.0
0
random: 0.012144465276116678
action prob: 0.0
0
random: 0.3628094745211805
act

random: 0.5645445174659133
action prob: 0.0
0
random: 0.7015139240527438
action prob: 0.0
0
random: 0.7085653563493028
action prob: 0.0
0
random: 0.18899392948545257
action prob: 0.0
0
random: 0.2993238346971938
action prob: 0.0
0
random: 0.110092094450716
action prob: 0.0
0
random: 0.25501440259322783
action prob: 0.0
0
random: 0.4543934939430999
action prob: 0.0
0
random: 0.7096131665832065
action prob: 0.0
0
random: 0.5128974811571744
action prob: 0.0
0
random: 0.5948045533680623
action prob: 0.0
0
random: 0.024821259143414487
action prob: 0.0
0
random: 0.892081710548487
action prob: 0.0
0
random: 0.5503081348732761
action prob: 0.0
0
random: 0.05545307979789438
action prob: 0.0
0
random: 0.548308523194938
action prob: 0.0
0
random: 0.875112334810107
action prob: 0.0
0
random: 0.6148667624929987
action prob: 0.0
0
random: 0.5861801584373645
action prob: 0.0
0
random: 0.9220839079046241
action prob: 0.0
0
random: 0.506468495696635
action prob: 0.0
0
random: 0.362264297647645
action p

random: 0.5617437875801142
action prob: 0.0
0
random: 0.9103087001467052
action prob: 0.0
0
random: 0.2230881545073854
action prob: 0.0
0
random: 0.7691660130281187
action prob: 0.0
0
random: 0.30473613294240853
action prob: 0.0
0
random: 0.7243946882120073
action prob: 0.0
0
random: 0.3312606041128344
action prob: 0.0
0
random: 0.7542785672107016
action prob: 0.0
0
random: 0.20433020218394837
action prob: 0.0
0
random: 0.07665343385284829
action prob: 0.0
0
random: 0.15516685008743747
action prob: 0.0
0
random: 0.6609755609903984
action prob: 0.0
0
random: 0.7154754959439428
action prob: 0.0
0
random: 0.7928712783887064
action prob: 0.0
0
random: 0.9930508283829734
action prob: 0.0
0
random: 0.7987650891127656
action prob: 0.0
0
random: 0.05446875921548844
action prob: 0.0
0
random: 0.01983344121230801
action prob: 0.0
0
random: 0.9758022748223922
action prob: 0.0
0
random: 0.2000913878345979
action prob: 0.0
0
random: 0.37838600381719123
action prob: 0.0
0
random: 0.5985988627700422


random: 0.3378995451617556
action prob: 0.0
0
random: 0.9362745507966818
action prob: 0.0
0
random: 0.3586902962254055
action prob: 0.0
0
random: 0.4776390943937806
action prob: 0.0
0
random: 0.1828878826333078
action prob: 0.0
0
random: 0.18441671719069486
action prob: 0.0
0
random: 0.08044010549391867
action prob: 0.0
0
random: 0.33393323990726864
action prob: 0.0
0
random: 0.7640313182666574
action prob: 0.0
0
random: 0.02587012418450607
action prob: 0.0
0
random: 0.8278383716787793
action prob: 0.0
0
random: 0.02181147422461982
action prob: 0.0
0
random: 0.007036833490202343
action prob: 0.0
0
random: 0.43585118872571593
action prob: 0.0
0
random: 0.2707009328594949
action prob: 0.0
0
random: 0.030772571343120947
action prob: 0.0
0
random: 0.8679225220021631
action prob: 0.0
0
random: 0.15905051885491817
action prob: 0.0
0
random: 0.7447714515842121
action prob: 0.0
0
random: 0.5962131305392459
action prob: 0.0
0
random: 0.8393625832059947
action prob: 0.0
0
random: 0.8457065319161

random: 0.8625529322003502
action prob: 0.0
0
random: 0.9396140959300404
action prob: 0.0
0
random: 0.3498343980467713
action prob: 0.0
0
random: 0.24347337373939293
action prob: 0.0
0
random: 0.7514846643346415
action prob: 0.0
0
random: 0.8665562429044995
action prob: 0.0
0
random: 0.19584528484403796
action prob: 0.0
0
random: 0.5780586327559127
action prob: 0.0
0
random: 0.46137355080399745
action prob: 0.0
0
random: 0.805925771115647
action prob: 0.0
0
random: 0.5216857327768789
action prob: 0.0
0
random: 0.6633669982090887
action prob: 0.0
0
random: 0.11405397061139
action prob: 0.0
0
random: 0.4272023509626547
action prob: 0.0
0
random: 0.711280448696709
action prob: 0.0
0
random: 0.42408409593420315
action prob: 0.0
0
random: 0.22918058862245771
action prob: 0.0
0
random: 0.17921842300560187
action prob: 0.0
0
random: 0.7393382311430894
action prob: 0.0
0
random: 0.005016438929631106
action prob: 0.0
0
random: 0.031217923330079556
action prob: 0.0
0
random: 0.4415766182717308
a

random: 0.9185390996721916
action prob: 0.0
0
random: 0.3493863017925798
action prob: 0.0
0
random: 0.9287855687948539
action prob: 0.0
0
random: 0.7201618555207658
action prob: 0.0
0
random: 0.18136159299822974
action prob: 0.0
0
random: 0.772593856593534
action prob: 0.0
0
random: 0.37196364783963687
action prob: 0.0
0
random: 0.08489087269583118
action prob: 0.0
0
random: 0.8117328370296755
action prob: 0.0
0
random: 0.360784130341377
action prob: 0.0
0
random: 0.15265985705779594
action prob: 0.0
0
random: 0.9314827350786368
action prob: 0.0
0
random: 0.5270828184408914
action prob: 0.0
0
random: 0.87527570844921
action prob: 0.0
0
random: 0.5487004574527911
action prob: 0.0
0
random: 0.7377574948209454
action prob: 0.0
0
random: 0.3458071102712218
action prob: 0.0
0
random: 0.2913685763511238
action prob: 0.0
0
random: 0.6761755534789604
action prob: 0.0
0
random: 0.6182759603634135
action prob: 0.0
0
random: 0.3998860615227404
action prob: 0.0
0
random: 0.7455643542712722
action 

random: 0.1702786516848307
action prob: 0.0
0
random: 0.20704284021747665
action prob: 0.0
0
random: 0.7648973130415688
action prob: 0.0
0
random: 0.34071036881858674
action prob: 0.0
0
random: 0.3355058101804449
action prob: 0.0
0
random: 0.2283226448731699
action prob: 0.0
0
random: 0.46120646643815955
action prob: 0.0
0
random: 0.5613813368174877
action prob: 0.0
0
random: 0.682242775839084
action prob: 0.0
0
random: 0.11115081122590953
action prob: 0.0
0
random: 0.25895246907554004
action prob: 0.0
0
random: 0.790354741638529
action prob: 0.0
0
random: 0.13975446909877443
action prob: 0.0
0
random: 0.8472285182983822
action prob: 0.0
0
random: 0.006901541035242098
action prob: 0.0
0
random: 0.381068772327269
action prob: 0.0
0
random: 0.056670430826222806
action prob: 0.0
0
random: 0.10464067003871691
action prob: 0.0
0
random: 0.009533056533519324
action prob: 0.0
0
random: 0.24406802646643733
action prob: 0.0
0
random: 0.14978178396611586
action prob: 0.0
0
random: 0.817283491148

random: 0.1644635252903448
action prob: 0.0
0
random: 0.06607985117648596
action prob: 0.0
0
random: 0.05371857612764841
action prob: 0.0
0
random: 0.18595423683053725
action prob: 0.0
0
random: 0.24761496205516254
action prob: 0.0
0
random: 0.9609461823988227
action prob: 0.0
0
random: 0.38874755372613423
action prob: 0.0
0
random: 0.901987782840599
action prob: 0.0
0
random: 0.9951394846298396
action prob: 0.0
0
random: 0.49898181155145427
action prob: 0.0
0
random: 0.5196671023384658
action prob: 0.0
0
random: 0.5980292420857972
action prob: 0.0
0
random: 0.29987280597309895
action prob: 0.0
0
random: 0.7387021861636243
action prob: 0.0
0
random: 0.7385568388550977
action prob: 0.0
0
random: 0.6419730665194048
action prob: 0.0
0
random: 0.25651840643458035
action prob: 0.0
0
random: 0.9009837329176248
action prob: 0.0
0
random: 0.3967148688391209
action prob: 0.0
0
random: 0.23713331973644303
action prob: 0.0
0
random: 0.19030450831055534
action prob: 0.0
0
random: 0.625918196159307

random: 0.46341171911339707
action prob: 0.0
0
random: 0.48349022445910006
action prob: 0.0
0
random: 0.679101651224398
action prob: 0.0
0
random: 0.45839881499624135
action prob: 0.0
0
random: 0.5024409638968047
action prob: 0.0
0
random: 0.9487416027264371
action prob: 0.0
0
random: 0.056189823860530064
action prob: 0.0
0
random: 0.5056632830057362
action prob: 0.0
0
random: 0.1398418945860147
action prob: 0.0
0
random: 0.8528594588918332
action prob: 0.0
0
random: 0.010118501019095727
action prob: 0.0
0
random: 0.6826964779047764
action prob: 0.0
0
random: 0.055048448521273996
action prob: 0.0
0
random: 0.5542071588268567
action prob: 0.0
0
random: 0.1649704034819086
action prob: 0.0
0
random: 0.837466633387727
action prob: 0.0
0
random: 0.09431380291908364
action prob: 0.0
0
random: 0.5459703477627791
action prob: 0.0
0
random: 0.8834233502260318
action prob: 0.0
0
random: 0.2826259033903513
action prob: 0.0
0
random: 0.3605487709441797
action prob: 0.0
0
random: 0.8784418828353819

random: 0.4638469963651265
action prob: 0.0
0
random: 0.7380757508963096
action prob: 0.0
0
random: 0.40686996709321044
action prob: 0.0
0
random: 0.8570281888636172
action prob: 0.0
0
random: 0.2135171883349536
action prob: 0.0
0
random: 0.03621278110074844
action prob: 0.0
0
random: 0.7827117166761267
action prob: 0.0
0
random: 0.6857184591778828
action prob: 0.0
0
random: 0.3563003716983856
action prob: 0.0
0
random: 0.5158470925682554
action prob: 0.0
0
random: 0.4274946924027898
action prob: 0.0
0
random: 0.40353962603586135
action prob: 0.0
0
random: 0.6202738870079149
action prob: 0.0
0
random: 0.7481018275227079
action prob: 0.0
0
random: 0.3017994069148855
action prob: 0.0
0
random: 0.8157258583633813
action prob: 0.0
0
random: 0.48508874029148197
action prob: 0.0
0
random: 0.733075490140638
action prob: 0.0
0
random: 0.925003036207737
action prob: 0.0
0
random: 0.7209083108639635
action prob: 0.0
0
random: 0.8114395262427198
action prob: 0.0
0
random: 0.8880336039040279
actio

random: 0.5347139504198177
action prob: 0.0
0
random: 0.5296321052015021
action prob: 0.0
0
random: 0.8313179192724827
action prob: 0.0
0
random: 0.664613168361964
action prob: 0.0
0
random: 0.451726428955151
action prob: 0.0
0
random: 0.5150047213230459
action prob: 0.0
0
random: 0.7236644929778815
action prob: 0.0
0
random: 0.9728118876575125
action prob: 0.0
0
random: 0.2839653003543654
action prob: 0.0
0
random: 0.9354717049873071
action prob: 0.0
0
random: 0.9980907980312824
action prob: 0.0
0
random: 0.797738250808072
action prob: 0.0
0
random: 0.02803526162133052
action prob: 0.0
0
random: 0.5427522642160908
action prob: 0.0
0
random: 0.19521085078205325
action prob: 0.0
0
random: 0.9852858381598766
action prob: 0.0
0
random: 0.7670586040905368
action prob: 0.0
0
random: 0.7782947534493219
action prob: 0.0
0
random: 0.6331789122431192
action prob: 0.0
0
random: 0.5173588289821761
action prob: 0.0
0
random: 0.027693858044310793
action prob: 0.0
0
random: 0.348295079084585
action 

random: 0.28447083110868254
action prob: 0.0
0
random: 0.9174838621077599
action prob: 0.0
0
random: 0.6941912669917218
action prob: 0.0
0
random: 0.40888471421489614
action prob: 0.0
0
random: 0.6505716486879933
action prob: 0.0
0
random: 0.824917170523242
action prob: 0.0
0
random: 0.06619268428379554
action prob: 0.0
0
random: 0.717384046631782
action prob: 0.0
0
random: 0.36412483438658927
action prob: 0.0
0
random: 0.058376065032368474
action prob: 0.0
0
random: 0.4240806803308692
action prob: 0.0
0
random: 0.7864606915673452
action prob: 0.0
0
random: 0.025799033990089648
action prob: 0.0
0
random: 0.048360957021002404
action prob: 0.0
0
random: 0.5050586747916646
action prob: 0.0
0
random: 0.1505894068307545
action prob: 0.0
0
random: 0.31094052539232353
action prob: 0.0
0
random: 0.13188382688810363
action prob: 0.0
0
random: 0.21394466905904808
action prob: 0.0
0
random: 0.8561387305305526
action prob: 0.0
0
random: 0.7734771350444668
action prob: 0.0
0
random: 0.5457305370277

random: 0.53067074545213
action prob: 0.0
0
random: 0.8640844661783096
action prob: 0.0
0
random: 0.7653854218185661
action prob: 0.0
0
random: 0.06016761703169338
action prob: 0.0
0
random: 0.7190881736405872
action prob: 0.0
0
random: 0.7253436872898498
action prob: 0.0
0
random: 0.9801137946289681
action prob: 0.0
0
random: 0.5454236907556765
action prob: 0.0
0
random: 0.0006416720699878864
action prob: 0.0
0
random: 0.46452037562067283
action prob: 0.0
0
random: 0.8572489274802377
action prob: 0.0
0
random: 0.5250084070003177
action prob: 0.0
0
random: 0.42193982488925386
action prob: 0.0
0
random: 0.22279388410084233
action prob: 0.0
0
random: 0.48977501634428255
action prob: 0.0
0
random: 0.7713006142223875
action prob: 0.0
0
random: 0.08304062006792456
action prob: 0.0
0
random: 0.765605811644091
action prob: 0.0
0
random: 0.6695412132490844
action prob: 0.0
0
random: 0.21218098799454033
action prob: 0.0
0
random: 0.5410372914293194
action prob: 0.0
0
random: 0.29389332610766417

random: 0.9199411101135927
action prob: 0.0
0
random: 0.5444604671478513
action prob: 0.0
0
random: 0.909855975465383
action prob: 0.0
0
random: 0.6172118635084922
action prob: 0.0
0
random: 0.9917999172852957
action prob: 0.0
0
random: 0.3596784744639394
action prob: 0.0
0
random: 0.5304717999481684
action prob: 0.0
0
random: 0.1895962532768546
action prob: 0.0
0
random: 0.44538513566222193
action prob: 0.0
0
random: 0.9392512775844315
action prob: 0.0
0
random: 0.982613821082819
action prob: 0.0
0
random: 0.9120098836545388
action prob: 0.0
0
random: 0.6470280533856815
action prob: 0.0
0
random: 0.6349951801965626
action prob: 0.0
0
random: 0.05636686052299067
action prob: 0.0
0
random: 0.7390728091904726
action prob: 0.0
0
random: 0.6023741119833753
action prob: 0.0
0
random: 0.5955427603438695
action prob: 0.0
0
random: 0.9139817737200491
action prob: 0.0
0
random: 0.4228699247086234
action prob: 0.0
0
random: 0.46044042853064393
action prob: 0.0
0
random: 0.12589223365509383
actio

random: 0.1781544879658986
action prob: 0.0
0
random: 0.49687336278303684
action prob: 0.0
0
random: 0.09612202849011375
action prob: 0.0
0
random: 0.41869218958648013
action prob: 0.0
0
random: 0.1882258523589162
action prob: 0.0
0
random: 0.012378474485198132
action prob: 0.0
0
random: 0.5205775690522606
action prob: 0.0
0
random: 0.42257703025199744
action prob: 0.0
0
random: 0.2807836342403567
action prob: 0.0
0
random: 0.5551414961196951
action prob: 0.0
0
random: 0.4424703948237202
action prob: 0.0
0
random: 0.00524105108309747
action prob: 0.0
0
random: 0.16499619861016224
action prob: 0.0
0
random: 0.6536256197219786
action prob: 0.0
0
random: 0.6345700072389211
action prob: 0.0
0
random: 0.5857395534707087
action prob: 0.0
0
random: 0.009440102116586524
action prob: 0.0
0
random: 0.7476198053028262
action prob: 0.0
0
random: 0.3418154155695807
action prob: 0.0
0
random: 0.7030332786836236
action prob: 0.0
0
random: 0.4703202948126439
action prob: 0.0
0
random: 0.82439980910442

random: 0.7261292761793569
action prob: 0.0
0
random: 0.8374096076755554
action prob: 0.0
0
random: 0.027467323790169806
action prob: 0.0
0
random: 0.5461986392277204
action prob: 0.0
0
random: 0.11639553528398772
action prob: 0.0
0
random: 0.7612446852948948
action prob: 0.0
0
random: 0.06265267897957272
action prob: 0.0
0
random: 0.8818056344537126
action prob: 0.0
0
random: 0.8452152062890979
action prob: 0.0
0
random: 0.7226211226112806
action prob: 0.0
0
random: 0.043036472151104066
action prob: 0.0
0
random: 0.19411206962064442
action prob: 0.0
0
random: 0.9675364444727255
action prob: 0.0
0
random: 0.44963602180716267
action prob: 0.0
0
random: 0.969494880406108
action prob: 0.0
0
random: 0.577715368287144
action prob: 0.0
0
random: 0.822923413047055
action prob: 0.0
0
random: 0.8322395940291767
action prob: 0.0
0
random: 0.8907732146583939
action prob: 0.0
0
random: 0.3833372267179237
action prob: 0.0
0
random: 0.8923534097393954
action prob: 0.0
0
random: 0.011288907281772342


random: 0.27729970287651107
action prob: 0.0
0
random: 0.08105355025027172
action prob: 0.0
0
random: 0.712441034165823
action prob: 0.0
0
random: 0.2951762711507052
action prob: 0.0
0
random: 0.10135292440632104
action prob: 0.0
0
random: 0.7234713422754121
action prob: 0.0
0
random: 0.9645925724933688
action prob: 0.0
0
random: 0.43787611156638273
action prob: 0.0
0
random: 0.25969529454059226
action prob: 0.0
0
random: 0.23184293262052014
action prob: 0.0
0
random: 0.5414169074270327
action prob: 0.0
0
random: 0.276670995086272
action prob: 0.0
0
random: 0.5641335220939089
action prob: 0.0
0
random: 0.39075788047897386
action prob: 0.0
0
random: 0.46458880789019996
action prob: 0.0
0
random: 0.027698946670031366
action prob: 0.0
0
random: 0.08308241801476668
action prob: 0.0
0
random: 0.6873271513356264
action prob: 0.0
0
random: 0.5046860093623198
action prob: 0.0
0
random: 0.09562697065130499
action prob: 0.0
0
random: 0.3872990498254897
action prob: 0.0
0
random: 0.64478793916650

random: 0.08088968867175128
action prob: 0.0
0
random: 0.12174348442256777
action prob: 0.0
0
random: 0.37680984620067415
action prob: 0.0
0
random: 0.12207109586328968
action prob: 0.0
0
random: 0.05275296770973137
action prob: 0.0
0
random: 0.931321306322749
action prob: 0.0
0
random: 0.9875629820119287
action prob: 0.0
0
random: 0.8407108091119725
action prob: 0.0
0
random: 0.03588779970605194
action prob: 0.0
0
random: 0.7681984343714026
action prob: 0.0
0
random: 0.7737322293528516
action prob: 0.0
0
random: 0.889319041598721
action prob: 0.0
0
random: 0.10176431145033282
action prob: 0.0
0
random: 0.2077854714291517
action prob: 0.0
0
random: 0.9095616818779136
action prob: 0.0
0
random: 0.8383790219936846
action prob: 0.0
0
random: 0.6331652306588377
action prob: 0.0
0
random: 0.826920074003323
action prob: 0.0
0
random: 0.8350271816081096
action prob: 0.0
0
random: 0.2752556386769248
action prob: 0.0
0
random: 0.12935098634376607
action prob: 0.0
0
random: 0.6300363086917196
ac

random: 0.5808521832745476
action prob: 0.0
0
random: 0.9880991189314559
action prob: 0.0
0
random: 0.5711577633588204
action prob: 0.0
0
random: 0.15198089334517673
action prob: 0.0
0
random: 0.7028246724515852
action prob: 0.0
0
random: 0.8961444347784464
action prob: 0.0
0
random: 0.6480626341494892
action prob: 0.0
0
random: 0.674100767215465
action prob: 0.0
0
random: 0.19182562602265085
action prob: 0.0
0
random: 0.14535329996320856
action prob: 0.0
0
random: 0.653423482940059
action prob: 0.0
0
random: 0.3766311582320878
action prob: 0.0
0
random: 0.8809380531344954
action prob: 0.0
0
random: 0.7573341091608188
action prob: 0.0
0
random: 0.6150097764233432
action prob: 0.0
0
random: 0.43011990055428717
action prob: 0.0
0
random: 0.7692740795525616
action prob: 0.0
0
random: 0.6292581381371712
action prob: 0.0
0
random: 0.3142819065906861
action prob: 0.0
0
random: 0.7667346190750203
action prob: 0.0
0
random: 0.07326258349233716
action prob: 0.0
0
random: 0.8845592076829079
acti

random: 0.247498996414121
action prob: 0.0
0
random: 0.42914030699602634
action prob: 0.0
0
random: 0.8339299563266326
action prob: 0.0
0
random: 0.15033606330831883
action prob: 0.0
0
random: 0.4078455281521698
action prob: 0.0
0
random: 0.1685948411142416
action prob: 0.0
0
random: 0.5965049760502898
action prob: 0.0
0
random: 0.22924037118453167
action prob: 0.0
0
random: 0.6595396117543336
action prob: 0.0
0
random: 0.7327990175890636
action prob: 0.0
0
random: 0.5459761513549387
action prob: 0.0
0
random: 0.8223667745403265
action prob: 0.0
0
random: 0.13575698089415744
action prob: 0.0
0
random: 0.5460322671146993
action prob: 0.0
0
random: 0.2670770436210522
action prob: 0.0
0
random: 0.8132579280354688
action prob: 0.0
0
random: 0.7141501283221433
action prob: 0.0
0
random: 0.522379367257739
action prob: 0.0
0
random: 0.7230108108702105
action prob: 0.0
0
random: 0.33065871743994035
action prob: 0.0
0
random: 0.2465857666660234
action prob: 0.0
0
random: 0.9654932431765588
acti

random: 0.421153366063695
action prob: 0.0
0
random: 0.2634740623688706
action prob: 0.0
0
random: 0.07992537494566265
action prob: 0.0
0
random: 0.14459195902921407
action prob: 0.0
0
random: 0.6571534737452029
action prob: 0.0
0
random: 0.8674493938530645
action prob: 0.0
0
random: 0.04494909435377448
action prob: 0.0
0
random: 0.20576440909452387
action prob: 0.0
0
random: 0.42338106235382733
action prob: 0.0
0
random: 0.16816682039059327
action prob: 0.0
0
random: 0.11912739256686533
action prob: 0.0
0
random: 0.8899836944105105
action prob: 0.0
0
random: 0.37201881531353265
action prob: 0.0
0
random: 0.7620103966461055
action prob: 0.0
0
random: 0.8284369147769812
action prob: 0.0
0
random: 0.32705969232463783
action prob: 0.0
0
random: 0.41990386265317303
action prob: 0.0
0
random: 0.34176339249318477
action prob: 0.0
0
random: 0.8712645126864736
action prob: 0.0
0
random: 0.7905421282854268
action prob: 0.0
0
random: 0.09735626712666579
action prob: 0.0
0
random: 0.2831428660819

random: 0.3494464089802941
action prob: 0.0
0
random: 0.8235125223241627
action prob: 0.0
0
random: 0.7343763725986905
action prob: 0.0
0
random: 0.989287147134845
action prob: 0.0
0
random: 0.013830719490851684
action prob: 0.0
0
random: 0.5230566207401024
action prob: 0.0
0
random: 0.4692398724859168
action prob: 0.0
0
random: 0.06355025153593008
action prob: 0.0
0
random: 0.03655479632634051
action prob: 0.0
0
random: 0.6246976818416793
action prob: 0.0
0
random: 0.4623166573072668
action prob: 0.0
0
random: 0.3396912556093873
action prob: 0.0
0
random: 0.14542048649472128
action prob: 0.0
0
random: 0.3549009118190979
action prob: 0.0
0
random: 0.31739893582840595
action prob: 0.0
0
random: 0.2950473511595647
action prob: 0.0
0
random: 0.09593129822251001
action prob: 0.0
0
random: 0.6483520512481805
action prob: 0.0
0
random: 0.06582363018669879
action prob: 0.0
0
random: 0.38158180680108855
action prob: 0.0
0
random: 0.5303622330825338
action prob: 0.0
0
random: 0.4209431694927741

random: 0.22260630229418865
action prob: 0.0
0
random: 0.8433942835199043
action prob: 0.0
0
random: 0.9176204047423026
action prob: 0.0
0
random: 0.4096848177888833
action prob: 0.0
0
random: 0.9162725608759122
action prob: 0.0
0
random: 0.3386721545630059
action prob: 0.0
0
random: 0.30317425654799035
action prob: 0.0
0
random: 0.9987539598365984
action prob: 0.0
0
random: 0.7503735275701404
action prob: 0.0
0
random: 0.8042035778033826
action prob: 0.0
0
random: 0.7483549706953009
action prob: 0.0
0
random: 0.9775917031747264
action prob: 0.0
0
random: 0.18038318872321701
action prob: 0.0
0
random: 0.5134867335627488
action prob: 0.0
0
random: 0.9357812683743533
action prob: 0.0
0
random: 0.5771006781014192
action prob: 0.0
0
random: 0.3525302662151477
action prob: 0.0
0
random: 0.17082606546323442
action prob: 0.0
0
random: 0.924234909447903
action prob: 0.0
0
random: 0.5501931318926963
action prob: 0.0
0
random: 0.3250452064159608
action prob: 0.0
0
random: 0.5229884031048667
acti

random: 0.6520141911937133
action prob: 0.0
0
random: 0.2838044992837584
action prob: 0.0
0
random: 0.3097893677776563
action prob: 0.0
0
random: 0.1348021773402276
action prob: 0.0
0
random: 0.19908708848792545
action prob: 0.0
0
random: 0.1150399491313494
action prob: 0.0
0
random: 0.621943716687904
action prob: 0.0
0
random: 0.8824110611225584
action prob: 0.0
0
random: 0.5563147317530708
action prob: 0.0
0
random: 0.5675978515397497
action prob: 0.0
0
random: 0.48189621842778163
action prob: 0.0
0
random: 0.041282229180628405
action prob: 0.0
0
random: 0.8870486932589919
action prob: 0.0
0
random: 0.4170327918777812
action prob: 0.0
0
random: 0.9739231247214843
action prob: 0.0
0
random: 0.8215603122581565
action prob: 0.0
0
random: 0.01598809029641224
action prob: 0.0
0
random: 0.8272919099945953
action prob: 0.0
0
random: 0.057090290329097626
action prob: 0.0
0
random: 0.9942219848987193
action prob: 0.0
0
random: 0.4383086523823033
action prob: 0.0
0
random: 0.7344956567073831
a

random: 0.9411437547807825
action prob: 0.0
0
random: 0.6390090835673662
action prob: 0.0
0
random: 0.2735737584110467
action prob: 0.0
0
random: 0.28189970304911005
action prob: 0.0
0
random: 0.6828603279877696
action prob: 0.0
0
random: 0.2592641066380573
action prob: 0.0
0
random: 0.3297657548626429
action prob: 0.0
0
random: 0.8628296193880874
action prob: 0.0
0
random: 0.6790693174705474
action prob: 0.0
0
random: 0.60591684645975
action prob: 0.0
0
random: 0.3343606356865356
action prob: 0.0
0
random: 0.9678875006411745
action prob: 0.0
0
random: 0.7152532806912933
action prob: 0.0
0
random: 0.3774554759269565
action prob: 0.0
0
random: 0.8545707156905882
action prob: 0.0
0
random: 0.21622968249551255
action prob: 0.0
0
random: 0.15993578945669806
action prob: 0.0
0
random: 0.4587018724120656
action prob: 0.0
0
random: 0.4706373784503044
action prob: 0.0
0
random: 0.6227768939319477
action prob: 0.0
0
random: 0.5656757848475449
action prob: 0.0
0
random: 0.05404868198670876
actio

random: 0.12678753661313313
action prob: 0.0
0
random: 0.6681229176550502
action prob: 0.0
0
random: 0.8579486016351875
action prob: 0.0
0
random: 0.9072612066312283
action prob: 0.0
0
random: 0.9620661952690917
action prob: 0.0
0
random: 0.5818116011558503
action prob: 0.0
0
random: 0.9433543125160944
action prob: 0.0
0
random: 0.9918988673981649
action prob: 0.0
0
random: 0.19316427691415738
action prob: 0.0
0
random: 0.4214648197776032
action prob: 0.0
0
random: 0.6318302798229429
action prob: 0.0
0
random: 0.38007412329747325
action prob: 0.0
0
random: 0.7482956036976697
action prob: 0.0
0
random: 0.3833626917015073
action prob: 0.0
0
random: 0.8073113491204773
action prob: 0.0
0
random: 0.18463027826373302
action prob: 0.0
0
random: 0.23733485752154104
action prob: 0.0
0
random: 0.41897766630895084
action prob: 0.0
0
random: 0.59416486250165
action prob: 0.0
0
random: 0.4819495032397598
action prob: 0.0
0
random: 0.11204407790441928
action prob: 0.0
0
random: 0.31293778124984495
a

random: 0.9100461601213514
action prob: 0.0
0
random: 0.24721197548935148
action prob: 0.0
0
random: 0.1812903693326655
action prob: 0.0
0
random: 0.7136052877129493
action prob: 0.0
0
random: 0.2880536691126895
action prob: 0.0
0
random: 0.9427061189942668
action prob: 0.0
0
random: 0.6607658674793566
action prob: 0.0
0
random: 0.468245077950597
action prob: 0.0
0
random: 0.9265997801865072
action prob: 0.0
0
random: 0.83541305978528
action prob: 0.0
0
random: 0.04763237116537544
action prob: 0.0
0
random: 0.7145029697770131
action prob: 0.0
0
random: 0.44899347946209467
action prob: 0.0
0
random: 0.2519852671424295
action prob: 0.0
0
random: 0.5435473461964481
action prob: 0.0
0
random: 0.15625101131583474
action prob: 0.0
0
random: 0.011330349069499035
action prob: 0.0
0
random: 0.13143711046422546
action prob: 0.0
0
random: 0.9686769032106758
action prob: 0.0
0
random: 0.5889188268466771
action prob: 0.0
0
random: 0.37246533482407573
action prob: 0.0
0
random: 0.6910693102176036
ac

random: 0.41854102332084264
action prob: 0.0
0
random: 0.008293887031247227
action prob: 0.0
0
random: 0.041985700749503474
action prob: 0.0
0
random: 0.30137500550600294
action prob: 0.0
0
random: 0.558806522938628
action prob: 0.0
0
random: 0.09668392966460226
action prob: 0.0
0
random: 0.50346320663793
action prob: 0.0
0
random: 0.4117734633264628
action prob: 0.0
0
random: 0.6184458725790278
action prob: 0.0
0
random: 0.8177888258786813
action prob: 0.0
0
random: 0.48559609662657366
action prob: 0.0
0
random: 0.1237544385089755
action prob: 0.0
0
random: 0.21641531945301484
action prob: 0.0
0
random: 0.06524277002330059
action prob: 0.0
0
random: 0.4902838252999845
action prob: 0.0
0
random: 0.18135152019167144
action prob: 0.0
0
random: 0.23395496555366224
action prob: 0.0
0
random: 0.32416459492115957
action prob: 0.0
0
random: 0.15881497969475633
action prob: 0.0
0
random: 0.7628440996947671
action prob: 0.0
0
random: 0.44564442706837715
action prob: 0.0
0
random: 0.871694485290

random: 0.8982886785779958
action prob: 0.0
0
random: 0.17859135556486494
action prob: 0.0
0
random: 0.13750230223547544
action prob: 0.0
0
random: 0.9402961435572871
action prob: 0.0
0
random: 0.5322250329159914
action prob: 0.0
0
random: 0.22531930802553435
action prob: 0.0
0
random: 0.867217464621827
action prob: 0.0
0
random: 0.933177854295398
action prob: 0.0
0
random: 0.6938736880455502
action prob: 0.0
0
random: 0.12342749655272411
action prob: 0.0
0
random: 0.1537657868625436
action prob: 0.0
0
random: 0.4969491567588983
action prob: 0.0
0
random: 0.2604630400805814
action prob: 0.0
0
random: 0.3788945496412539
action prob: 0.0
0
random: 0.8799661195096145
action prob: 0.0
0
random: 0.34346599564440106
action prob: 0.0
0
random: 0.28114371297625107
action prob: 0.0
0
random: 0.40577168883885373
action prob: 0.0
0
random: 0.2426161038546103
action prob: 0.0
0
random: 0.8705471288501598
action prob: 0.0
0
random: 0.9687401849864996
action prob: 0.0
0
random: 0.8841726617479178
ac

random: 0.9954396375380912
action prob: 0.0
0
random: 0.697802779948526
action prob: 0.0
0
random: 0.41508710405413274
action prob: 0.0
0
random: 0.8032059768328539
action prob: 0.0
0
random: 0.151200022914582
action prob: 0.0
0
random: 0.510054768095406
action prob: 0.0
0
random: 0.6786003150223954
action prob: 0.0
0
random: 0.542904114926113
action prob: 0.0
0
random: 0.33933925878996696
action prob: 0.0
0
random: 0.26303218815756957
action prob: 0.0
0
random: 0.3803292271902038
action prob: 0.0
0
random: 0.7239144145942982
action prob: 0.0
0
random: 0.17189009293141333
action prob: 0.0
0
random: 0.9923784305990512
action prob: 0.0
0
random: 0.6096702673445342
action prob: 0.0
0
random: 0.775208097434086
action prob: 0.0
0
random: 0.4614848368888994
action prob: 0.0
0
random: 0.8392526003454641
action prob: 0.0
0
random: 0.5870619675838873
action prob: 0.0
0
random: 0.9090743227969685
action prob: 0.0
0
random: 0.23528254397385895
action prob: 0.0
0
random: 0.3181244526134922
action 

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (2,) + inhomogeneous part.