In [39]:
import numpy as np
from scipy.stats import poisson
from functools import lru_cache
from tqdm import tqdm

In [51]:
ACTIONS = np.arange(-5, 6)
MAX_CARS_LOC = 20
N_CARS_PER_LOC = (MAX_CARS_LOC + 1)    #[0, 20]
N_STATES = N_CARS_PER_LOC * N_CARS_PER_LOC
CAR_RENT_COST = 10
CAR_MOVE_COST = -2

LAMBDA_RET_LOC1 = 3
LAMBDA_RET_LOC2 = 2
LAMBDA_RNT_LOC1 = 3
LAMBDA_RNT_LOC2 = 4

MAX_POISSON_OUTCOME = 11
GAMMA = 0.9
THETA = 1e-4

#Pre-computing poisson pmf due to high computational cost
poisson_pmf_req1 = [poisson.pmf(i, LAMBDA_RNT_LOC1) for i in range(MAX_POISSON_OUTCOME)]
poisson_pmf_req2 = [poisson.pmf(i, LAMBDA_RNT_LOC2) for i in range(MAX_POISSON_OUTCOME)]
poisson_pmf_ret1 = [poisson.pmf(i, LAMBDA_RET_LOC1) for i in range(MAX_POISSON_OUTCOME)]
poisson_pmf_ret2 = [poisson.pmf(i, LAMBDA_RET_LOC2) for i in range(MAX_POISSON_OUTCOME)]

@lru_cache(maxsize=None)
def calculate_expected_val(state, action): #R(s, a) | P(s' |s, a)
    #Estado do dia anterior
    n1_end_prev, n2_end_prev = divmod(state, N_CARS_PER_LOC)

    #Inicializa recompensa e executa a ação, que é mover os carros de uma localização para outra
    #Valores positivos loc1 -> loc2
    #Valores negativos loc2 -> loc1
    expected_total_reward = 0.0

    n1_after_move = n1_end_prev - action
    n2_after_move = n2_end_prev + action

    if not (0 <= n1_after_move <= MAX_CARS_LOC) or not (0 <= n2_after_move <= MAX_CARS_LOC):
        return 0.0, np.zeros(N_STATES)

    #Inicializa a probabilidade dos estados seguintes
    next_state_prob = np.zeros(N_STATES)

    #Verifica os limites para n1 e n2 se manterem entre 0 e 20
    n1_start_day = min(max(0, n1_after_move), MAX_CARS_LOC)
    n2_start_day = min(max(0, n2_after_move), MAX_CARS_LOC)

    #Custo por mover cada carro
    move_cost = abs(action) * CAR_MOVE_COST

    #Como o processo é estocástico, é necessário verificar todas as possibilidades para encontrar a probabilidade de cair no próximo estado
    for req1 in range(MAX_POISSON_OUTCOME):
        prob_req1 = poisson_pmf_req1[req1] 
        for req2 in range(MAX_POISSON_OUTCOME):
            prob_req2 = poisson_pmf_req2[req2]
            for ret1 in range(MAX_POISSON_OUTCOME):
                prob_ret1 = poisson_pmf_ret1[ret1]
                for ret2 in range(MAX_POISSON_OUTCOME):
                    prob_ret2 = poisson_pmf_ret2[ret2]
                    

                    prob_joint_outcome = prob_req1 * prob_req2 * prob_ret1 * prob_ret2 #Probabilidade conjunta (São independentes)

                    # Calcula o número de carros alugados. Se houverem mais clientes do que carros, aluga todos os carros restantes.
                    rented_1 = min(n1_start_day, req1)
                    rented_2 = min(n2_start_day, req2)
                    
                    # Calcula a recompensa obtida do aluguel das 2 localizações menos o custo de mover os carros
                    rental_income = (rented_1 + rented_2) * CAR_RENT_COST
                    current_reward = rental_income - move_cost
                    
                    # Esperança da recompensa
                    expected_total_reward += prob_joint_outcome * current_reward

                    # Calcula os carros do próximo dia
                    cars_after_rentals_1 = n1_start_day - rented_1
                    cars_after_rentals_2 = n2_start_day - rented_2

                    # Retornam os carros do "dia anterior"
                    next_n1 = min(MAX_CARS_LOC, cars_after_rentals_1 + ret1)
                    next_n2 = min(MAX_CARS_LOC, cars_after_rentals_2 + ret2)
                    
                    #Atribui probabilidade para o próximo estado obtido
                    next_s_idx = next_n1 * N_CARS_PER_LOC + next_n2
                    next_state_prob[next_s_idx] += prob_joint_outcome

    #Normaliza as probabilidades
    sum_probs = np.sum(next_state_prob)
    if sum_probs > 0:
        next_state_prob /= sum_probs

    #Retorna a recompensa esperada de cada próximo estado R(s,a) e sua probabilidade P(s' |s, a)
    return expected_total_reward, next_state_prob

def get_prob_policy(policy, state, action):
    if action in policy[state]:
        return 1/len(policy[state])

    return 0

def policy_eval(state_values, policy, theta, gamma):
    while True:
        delta = 0
        v_old = state_values.copy()
        for state in tqdm(range(N_STATES), desc="Policy Evaluation"):
            v_new = 0
            for action in ACTIONS:
                expected_r, next_state_prob = calculate_expected_val(state, action) # R(s,a) , p(s' |s, a)
                v_action = expected_r + gamma * np.dot(next_state_prob, state_values)
                v_new += v_action * get_prob_policy(policy, state, action)

            state_values[state] = v_new

            delta = max(delta, abs(v_old[state] - v_new))

        if delta < theta:
            break


def policy_improv(policy, state_values, gamma):
    pol_stable = True
    old_policy = [i.copy() for i in policy]
    for state in tqdm(range(N_STATES), desc="Policy Improvement"):
        best_q = float("-inf")
        best_action = 0

        for action in ACTIONS:
            expected_r, next_state_prob = calculate_expected_val(state, action) # R(s,a) , p(s' |s, a)
            print(expected_r)

            if np.sum(next_state_prob) == 0:
                continue

            q_sa = 0
            for next_s in range(N_STATES):
                q_sa += next_state_prob[next_s] * (expected_r + gamma * state_values[next_s])

            if best_q < q_sa:
                best_q = q_sa
                best_action = action

        policy[state] = [best_action]
        if policy[state] != old_policy[state]:
            pol_stable = False

    return pol_stable

def policy_iter(policy, state_values, gamma):
    iterations = 0
    while True:
        iterations += 1
        print("Política atual: ")
        print(np.array(policy).reshape(21,21))
        policy_eval(state_values, policy, 1e-3, gamma)
        pol_stable = policy_improv(policy, state_values, gamma)
        if pol_stable:
            break
    print("Iterations: ", iterations)

policy = [[0] for s in range(N_STATES)]
state_values = np.zeros(N_STATES)

policy_iter(policy, state_values, 0.9)
print(np.array(policy).reshape(21,21))

Política atual: 
[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]


Policy Evaluation: 100%|██████████| 441/441 [02:37<00:00,  2.81it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 13762.97it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 10986.70it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 13452.96it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 12850.41it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 9895.14it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 12313.77it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 13749.06it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 13546.46it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 12207.15it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 12555.49it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 11922.63it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 13219.61it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 12262.91it

0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
11.462520717719304
9.78264246049182
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
21.436118661752367
21.24516317821156
18.833093733124425
0.0
0.0
0.0
0.0
0.0
0.0
0.0
29.17633244525593
31.218761122244164
30.295614450843704
26.41916263003876
0.0
0.0
0.0
0.0
0.0
0.0
34.683162068230004
38.95897490574734
40.26921239487749
37.881683347757836
32.05272169266171
0.0
0.0
0.0
0.0
0.0
38.51495357080725
44.465804528722316
48.00942617838047
47.85528129179149
43.51524241038184
35.73377092099298
0.0
0.0
0.0
0.0
0.0
48.29759603129981
53.51625580135471
55.59549507529572
53.48884035441369
47.196291638713404
37.85281228189168
0.0
0.0
0.0
0.0
0.0
57.34804730393205
61.10232469826957
61.22905413791843
57.16988958274602
49.3153329996116
38.9305150645017
0.0
0.0
0.0
0.0
0.0
64.93411620084653
66.73588376089279
64.91010336624943
59.28893094364359
50.39303578222172
39.41316723094661
0.0
0.0
0.0
0.0
0.0
70.56767526346867
70.41693298922402
67.02914472714849
60.366633726

Policy Improvement:  36%|███▋      | 160/441 [00:00<00:00, 299.50it/s]

73.11345636885746
70.97432603299954
68.2004507907631
68.35493539116908
66.83438187117852
63.08044419065646
57.093122349606766
49.61687773486927
77.69134508176325
76.76783373622918
75.22235151902537
73.29858322721957
71.02721942110304
68.2004507907631
68.35493539116908
66.83438187117852
63.08044419065646
57.093122349606766
49.61687773486927
78.76904786437368
77.25048590267507
75.40747837738768
73.35147661532353
71.02721942110304
68.2004507907631
68.35493539116908
66.83438187117852
63.08044419065646
57.093122349606766
49.61687773486927
79.25170003081841
77.43561276103725
75.46037176549115
73.35147661532353
71.02721942110304
68.2004507907631
68.35493539116908
66.83438187117852
63.08044419065646
57.093122349606766
49.61687773486927
79.43682688918094
77.48850614914079
75.46037176549115
73.35147661532353
71.02721942110304
68.2004507907631
68.35493539116908
66.83438187117852
63.08044419065646
57.093122349606766
49.61687773486927
79.4897202772844
77.48850614914079
75.46037176549115
73.35147661

Policy Improvement:  50%|█████     | 221/441 [00:00<00:00, 286.97it/s]

72.8137960256518
75.57230372086481
75.69820703027199
74.78277176139055
73.27228587634453
71.45619860656277
69.48095761101727
71.35833856383105
73.02035747259445
74.17986494523727
74.33434954564291
72.8137960256518
77.69134508176325
76.77590981288144
75.26542392783578
73.45741273470705
71.5090919946666
69.48095761101727
71.35833856383105
73.02035747259445
74.17986494523727
74.33434954564291
72.8137960256518
78.76904786437368
77.25856197932679
75.4505507861981
73.51030612281107
71.5090919946666
69.48095761101727
71.35833856383105
73.02035747259445
74.17986494523727
74.33434954564291
72.8137960256518
79.25170003081841
77.44368883768935
75.50344417430162
73.51030612281107
71.5090919946666
69.48095761101727
71.35833856383105
73.02035747259445
74.17986494523727
74.33434954564291
72.8137960256518
79.43682688918094
77.49658222579272
75.50344417430162
73.51030612281107
71.5090919946666
69.48095761101727
71.35833856383105
73.02035747259445
74.17986494523727
74.33434954564291
72.8137960256518
79.

Policy Improvement:  63%|██████▎   | 279/441 [00:00<00:00, 250.47it/s]

73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.50223004615913
75.46037176549115
77.33775271830532
78.99977162706753
79.43682688918094
77.49658222579272
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.50223004615913
75.46037176549115
77.33775271830532
78.99977162706753
79.4897202772844
77.49658222579272
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.50223004615913
75.46037176549115
77.33775271830532
78.99977162706753
79.4897202772844
77.49658222579272
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.50223004615913
75.46037176549115
77.33775271830532
0.0
79.4897202772844
77.49658222579272
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.50223004615913
75.46037176549115
0.0
0.0
79.4897202772844
77.49658222579272
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.51716

Policy Improvement:  76%|███████▌  | 336/441 [00:01<00:00, 265.23it/s]

39.83853279987092
47.62803720887216
54.68535043001269
60.27828127543639
63.918702286567786
65.60661346340801
69.7187928757979
72.78963370989963
75.26542392783578
77.44368883768935
79.48164420063242
49.62117526036291
56.67848848150528
62.27141932692714
65.91184033805861
67.59975151489928
67.72565482430642
70.79649565840785
73.27228587634453
75.4505507861981
77.49658222579272
79.48164420063242
58.67162653299595
64.26455737841889
67.90497838955044
69.59288956639094
69.7187928757979
68.80335760691617
71.27914782485263
73.45741273470705
75.50344417430162
77.49658222579272
79.48164420063242
66.25769542991019
69.89811644104174
71.5860276178815
71.71193092728963
70.79649565840785
69.28600977336106
71.46427468321464
73.51030612281107
75.50344417430162
77.49658222579272
79.48164420063242
71.89125449253338
73.5791656693739
73.70506897878057
72.78963370989963
71.27914782485263
69.47113663172323
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
79.48164420063242
75.57230372086

Policy Improvement: 100%|██████████| 441/441 [00:01<00:00, 300.65it/s]


0.0
0.0
75.26542392783578
73.45741273470705
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
79.4897202772844
0.0
0.0
75.4505507861981
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
79.4897202772844
0.0
0.0
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
79.4897202772844
0.0
0.0
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
79.4897202772844
0.0
0.0
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
79.4897202772844
0.0
0.0
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
0.0
0.0
0.

Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 12374.15it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 14770.80it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 15088.41it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 15002.13it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 15339.54it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 14710.18it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 13931.84it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 14858.13it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 14699.20it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 15004.20it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 11851.35it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 11414.23it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 9897.10it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 12580.7

0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
11.462520717719304
9.78264246049182
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
21.436118661752367
21.24516317821156
18.833093733124425
0.0
0.0
0.0
0.0
0.0
0.0
0.0
29.17633244525593
31.218761122244164
30.295614450843704
26.41916263003876
0.0
0.0
0.0
0.0
0.0
0.0
34.683162068230004
38.95897490574734
40.26921239487749
37.881683347757836
32.05272169266171
0.0
0.0
0.0
0.0
0.0
38.51495357080725
44.465804528722316
48.00942617838047
47.85528129179149
43.51524241038184
35.73377092099298
0.0
0.0
0.0
0.0
0.0
48.29759603129981
53.51625580135471
55.59549507529572
53.48884035441369
47.196291638713404
37.85281228189168
0.0
0.0
0.0
0.0
0.0
57.34804730393205
61.10232469826957
61.22905413791843
57.16988958274602
49.3153329996116
38.9305150645017
0.0
0.0
0.0
0.0
0.0
64.93411620084653
66.73588376089279
64.91010336624943
59.28893094364359
50.39303578222172
39.41316723094661
0.0
0.0
0.0
0.0
0.0
70.56767526346867
70.41693298922402
67.02914472714849
60.366633726

Policy Improvement:  33%|███▎      | 147/441 [00:00<00:01, 288.46it/s]

54.64227802120238
60.1194517679487
63.42875363635133
64.28303423434335
66.5565601956486
66.1137094582669
62.842423944190216
57.04022896150331
49.61687773486927
49.62117526036291
56.67041240485373
62.22834691811678
65.75301083057154
67.10980286468278
66.40207559524275
67.63426297825748
66.59636162471176
63.02755080255303
57.093122349606766
49.61687773486927
58.67162653299595
64.25648130176697
67.86190598074013
69.43406005890348
69.2288442255814
67.47977837785216
68.11691514470266
66.78148848307478
63.08044419065646
57.093122349606766
49.61687773486927
66.25769542991019
69.89004036438965
71.54295520907161
71.55310141980189
70.30654700819228
67.96243054429756
68.30204200306552
66.83438187117852
63.08044419065646
57.093122349606766
49.61687773486927
71.89125449253338
73.57108959272207
73.66199656996993
72.63080420241164
70.78919917463652
68.14755740265936
68.35493539116908
66.83438187117852
63.08044419065646
57.093122349606766
49.61687773486927
75.57230372086481
75.69013095362044
74.739699

Policy Improvement:  53%|█████▎    | 235/441 [00:00<00:00, 271.58it/s]

69.36520051233913
71.02721942110304
72.18672689374702
72.34121149415174
70.82065797416062
67.06672029363943
79.43682688918094
77.49658222579272
75.50344417430162
73.50223004615913
71.47409566250812
69.36520051233913
71.02721942110304
72.18672689374702
72.34121149415174
70.82065797416062
67.06672029363943
79.4897202772844
77.49658222579272
75.50344417430162
73.50223004615913
71.47409566250812
69.36520051233913
71.02721942110304
72.18672689374702
72.34121149415174
70.82065797416062
67.06672029363943
79.4897202772844
77.49658222579272
75.50344417430162
73.50223004615913
71.47409566250812
69.36520051233913
71.02721942110304
72.18672689374702
72.34121149415174
70.82065797416062
0.0
79.4897202772844
77.49658222579272
75.50344417430162
73.50223004615913
71.47409566250812
69.36520051233913
71.02721942110304
72.18672689374702
72.34121149415174
0.0
0.0
79.4897202772844
77.49658222579272
75.50344417430162
73.50223004615913
71.47409566250812
69.36520051233913
71.02721942110304
72.18672689374702
0.

Policy Improvement:  66%|██████▌   | 292/441 [00:01<00:00, 271.72it/s]

0.0
37.84539474837965
45.63489915738079
52.69221237852185
58.28514322394468
61.92556423507602
67.59975151489928
71.7038548506379
74.73969935258027
77.09973247183956
78.9468782389636
39.83853279987092
47.62803720887216
54.68535043001269
60.27828127543639
63.918702286567786
65.60661346340801
69.7187928757979
72.78155763324771
75.22235151902537
77.28485933020178
78.99977162706753
49.62117526036291
56.67848848150528
62.27141932692714
65.91184033805861
67.59975151489928
67.72565482430642
70.79649565840785
73.26420979969288
75.40747837738768
77.33775271830532
78.99977162706753
58.67162653299595
64.26455737841889
67.90497838955044
69.59288956639094
69.7187928757979
68.80335760691617
71.27914782485263
73.44933665805516
75.46037176549115
77.33775271830532
78.99977162706753
66.25769542991019
69.89811644104174
71.5860276178815
71.71193092728963
70.79649565840785
69.28600977336106
71.46427468321464
73.50223004615913
75.46037176549115
77.33775271830532
78.99977162706753
71.89125449253338
73.5791656

Policy Improvement:  85%|████████▍ | 373/441 [00:01<00:00, 242.32it/s]

73.51030612281107
75.50344417430162
77.49658222579272
79.48164420063242
78.76904786437368
77.25856197932679
75.4505507861981
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
79.48164420063242
79.25170003081841
77.44368883768935
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
79.48164420063242
79.43682688918094
77.49658222579272
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
79.48164420063242
79.4897202772844
77.49658222579272
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
79.48164420063242
79.4897202772844
77.49658222579272
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107


Policy Improvement: 100%|██████████| 441/441 [00:01<00:00, 278.16it/s]


0.0
0.0
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
0.0
0.0
0.0
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
0.0
0.0
0.0
0.0
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
0.0
0.0
0.0
0.0
0.0
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
0.0
0.0
0.0
0.0
0.0
0.0
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
29.872842542414297
41.64862305439792
52.69221237852185
62.27141932692714
69.89811644104174
75.57230372086481
0.0
0.0
0.0
0.0
31.86598059390581
39.6554850029061
50.69907432703028
60.27828127543639
67.90497838955044
73.5791656693739
77.69134508176325
0.0
0.0
0.0
33.85911864539706
41.64862305439792
48.70593627553869
58.28514322394468
65.911840338

Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 12403.44it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 13449.83it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 13895.52it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 13809.00it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 12464.88it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 13539.32it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 12951.18it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 13999.96it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 14678.78it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 13901.16it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 12726.89it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 10387.25it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 12533.72it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 13905.

0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
11.462520717719304
9.78264246049182
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
21.436118661752367
21.24516317821156
18.833093733124425
0.0
0.0
0.0
0.0
0.0
0.0
0.0
29.17633244525593
31.218761122244164
30.295614450843704
26.41916263003876
0.0
0.0
0.0
0.0
0.0
0.0
34.683162068230004
38.95897490574734
40.26921239487749
37.881683347757836
32.05272169266171
0.0
0.0
0.0
0.0
0.0
38.51495357080725
44.465804528722316
48.00942617838047
47.85528129179149
43.51524241038184
35.73377092099298
0.0
0.0
0.0
0.0
0.0
48.29759603129981
53.51625580135471
55.59549507529572
53.48884035441369
47.196291638713404
37.85281228189168
0.0
0.0
0.0
0.0
0.0
57.34804730393205
61.10232469826957
61.22905413791843
57.16988958274602
49.3153329996116
38.9305150645017
0.0
0.0
0.0
0.0
0.0
64.93411620084653
66.73588376089279
64.91010336624943
59.28893094364359
50.39303578222172
39.41316723094661
0.0
0.0
0.0
0.0
0.0
70.56767526346867
70.41693298922402
67.02914472714849
60.366633726

Policy Improvement:  31%|███▏      | 138/441 [00:00<00:01, 279.97it/s]

0.0
79.42875081252882
77.45350981698245
75.34461466681422
73.02035747259445
70.19358884225431
66.36179733967798
64.84124381968533
61.08730613916535
55.09998429811466
47.623739683378844
0.0
79.48164420063242
77.45350981698245
75.34461466681422
73.02035747259445
70.19358884225431
66.36179733967798
64.84124381968533
61.08730613916535
55.09998429811466
47.623739683378844
0.0
79.48164420063242
77.45350981698245
75.34461466681422
73.02035747259445
70.19358884225431
66.36179733967798
64.84124381968533
61.08730613916535
55.09998429811466
47.623739683378844
0.0
79.48164420063242
77.45350981698245
75.34461466681422
73.02035747259445
70.19358884225431
66.36179733967798
64.84124381968533
61.08730613916535
55.09998429811466
0.0
0.0
79.48164420063242
77.45350981698245
75.34461466681422
73.02035747259445
70.19358884225431
66.36179733967798
64.84124381968533
61.08730613916535
0.0
0.0
0.0
79.48164420063242
77.45350981698245
75.34461466681422
73.02035747259445
70.19358884225431
66.36179733967798
64.8412

Policy Improvement:  44%|████▍     | 196/441 [00:00<00:00, 267.73it/s]

67.06672029363943
79.25170003081841
77.44368883768935
75.50344417430162
73.50223004615913
71.47409566250812
69.36520051233913
71.02721942110304
72.18672689374702
72.34121149415174
70.82065797416062
67.06672029363943
79.43682688918094
77.49658222579272
75.50344417430162
73.50223004615913
71.47409566250812
69.36520051233913
71.02721942110304
72.18672689374702
72.34121149415174
70.82065797416062
67.06672029363943
79.4897202772844
77.49658222579272
75.50344417430162
73.50223004615913
71.47409566250812
69.36520051233913
71.02721942110304
72.18672689374702
72.34121149415174
70.82065797416062
67.06672029363943
79.4897202772844
77.49658222579272
75.50344417430162
73.50223004615913
71.47409566250812
69.36520051233913
71.02721942110304
72.18672689374702
72.34121149415174
70.82065797416062
0.0
79.4897202772844
77.49658222579272
75.50344417430162
73.50223004615913
71.47409566250812
69.36520051233913
71.02721942110304
72.18672689374702
72.34121149415174
0.0
0.0
79.4897202772844
77.49658222579272
75

Policy Improvement:  57%|█████▋    | 250/441 [00:00<00:00, 215.63it/s]

79.25170003081841
77.44368883768935
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.5090919946666
73.4672337140007
75.34461466681422
77.0066335755771
78.16614104822088
79.43682688918094
77.49658222579272
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.5090919946666
73.4672337140007
75.34461466681422
77.0066335755771
78.16614104822088
79.4897202772844
77.49658222579272
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.5090919946666
73.4672337140007
75.34461466681422
77.0066335755771
78.16614104822088
79.4897202772844
77.49658222579272
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.5090919946666
73.4672337140007
75.34461466681422
77.0066335755771
0.0
79.4897202772844
77.49658222579272
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.5090919946666
73.4672337140007
75.34461466681422
0.0
0.0
79.4897202772844
77.49658222579272
75.50344417430162
73.51030612281107
71.

Policy Improvement:  77%|███████▋  | 339/441 [00:01<00:00, 262.85it/s]

79.33089076979745
78.76904786437368
77.25856197932679
75.4505507861981
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.4953680976497
77.45350981698245
79.33089076979745
79.25170003081841
77.44368883768935
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.4953680976497
77.45350981698245
79.33089076979745
79.43682688918094
77.49658222579272
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.4953680976497
77.45350981698245
79.33089076979745
79.4897202772844
77.49658222579272
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.4953680976497
77.45350981698245
79.33089076979745
79.4897202772844
77.49658222579272
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.4953680976497
77.45350981698245
0.0
79.4897202772844
77

Policy Improvement:  91%|█████████▏| 403/441 [00:01<00:00, 287.43it/s]

0.0
0.0
35.85225669688868
43.641761105889024
50.69907432703028
56.29200517245324
63.918702286567786
69.59288956639094
73.70506897878057
76.77590981288144
79.25170003081841
0.0
37.84539474837965
45.63489915738079
52.69221237852185
58.28514322394468
61.92556423507602
67.59975151489928
71.71193092728963
74.78277176139055
77.25856197932679
79.43682688918094
0.0
47.62803720887216
54.68535043001269
60.27828127543639
63.918702286567786
65.60661346340801
69.7187928757979
72.78963370989963
75.26542392783578
77.44368883768935
79.4897202772844
0.0
56.67848848150528
62.27141932692714
65.91184033805861
67.59975151489928
67.72565482430642
70.79649565840785
73.27228587634453
75.4505507861981
77.49658222579272
79.4897202772844
0.0
64.26455737841889
67.90497838955044
69.59288956639094
69.7187928757979
68.80335760691617
71.27914782485263
73.45741273470705
75.50344417430162
77.49658222579272
79.4897202772844
0.0
69.89811644104174
71.5860276178815
71.71193092728963
70.79649565840785
69.28600977336106
71.4

Policy Improvement: 100%|██████████| 441/441 [00:01<00:00, 280.63it/s]


73.45741273470705
75.50344417430162
77.49658222579272
79.4897202772844
0.0
0.0
0.0
0.0
0.0
69.28600977336106
71.46427468321464
73.51030612281107
75.50344417430162
77.49658222579272
79.4897202772844
0.0
0.0
0.0
0.0
0.0
69.47113663172323
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
79.4897202772844
0.0
0.0
0.0
0.0
0.0
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
79.4897202772844
0.0
0.0
0.0
0.0
0.0
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
79.4897202772844
0.0
0.0
0.0
0.0
0.0
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
79.4897202772844
0.0
0.0
0.0
0.0
0.0
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
79.4897202772844
0.0
0.0
0.0
0.0
0.0
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
79.4897202772844
0.0
0.0
0.0
0.0
0.0
69.5240300198269
71.51716

Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 10943.41it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 15079.80it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 14646.01it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 14928.52it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 13873.84it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 12233.63it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 13720.50it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 15112.82it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 14597.58it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 14696.28it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 14003.03it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 13084.02it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 14696.16it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 15072.

0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
11.462520717719304
9.78264246049182
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
21.436118661752367
21.24516317821156
18.833093733124425
0.0
0.0
0.0
0.0
0.0
0.0
0.0
29.17633244525593
31.218761122244164
30.295614450843704
26.41916263003876
0.0
0.0
0.0
0.0
0.0
0.0
34.683162068230004
38.95897490574734
40.26921239487749
37.881683347757836
32.05272169266171
0.0
0.0
0.0
0.0
0.0
38.51495357080725
44.465804528722316
48.00942617838047
47.85528129179149
43.51524241038184
35.73377092099298
0.0
0.0
0.0
0.0
0.0
48.29759603129981
53.51625580135471
55.59549507529572
53.48884035441369
47.196291638713404
37.85281228189168
0.0
0.0
0.0
0.0
0.0
57.34804730393205
61.10232469826957
61.22905413791843
57.16988958274602
49.3153329996116
38.9305150645017
0.0
0.0
0.0
0.0
0.0
64.93411620084653
66.73588376089279
64.91010336624943
59.28893094364359
50.39303578222172
39.41316723094661
0.0
0.0
0.0
0.0
0.0
70.56767526346867
70.41693298922402
67.02914472714849
60.366633726

Policy Improvement:  29%|██▊       | 126/441 [00:00<00:00, 331.40it/s]

75.01349552408531
72.18672689374702
68.35493539116908
62.848105768195055
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
26.71060986226475
34.972698802764846
40.26921239487749
41.867959450740045
40.02527389862726
0.0
0.0
0.0
0.0
0.0
30.54240136484214
36.493252322756966
44.02315007539868
47.85528129179149
47.501518513363166
43.7063231269592
0.0
0.0
0.0
0.0
33.36916999518074
40.32504382533426
45.54370359538961
51.609218972312284
53.48884035441369
51.18256774169587
45.825364487857364
0.0
0.0
0.0
35.693427189401405
43.15181245567256
49.375495097966684
53.129772492304234
57.24277803493476
57.16988958274602
53.301609102594256
46.90306727046757
0.0
0.0
37.80232233956971
45.47606964989298
52.20226372830527
56.96156399488114
58.76333155492686
60.92382726326685
59.28893094364359
54.379311885203656
47.385719436912446
0.0
39.83045672321905
47.58496480006178
54.52652092252543
59.7883326252199
62.59512305750389
62.444380783258126
63.04286862416487
60.36663372625423
54.86196405164885
47.57084629527527
0.0
49

Policy Improvement:  50%|█████     | 222/441 [00:00<00:00, 279.81it/s]

65.07358224214859
59.086260401097796
79.43682688918094
77.49658222579272
75.4953680976497
73.4672337140007
71.35833856383105
69.0340813696114
70.19358884225431
70.34807344266109
68.82751992266893
65.07358224214859
59.086260401097796
79.4897202772844
77.49658222579272
75.4953680976497
73.4672337140007
71.35833856383105
69.0340813696114
70.19358884225431
70.34807344266109
68.82751992266893
65.07358224214859
59.086260401097796
79.4897202772844
77.49658222579272
75.4953680976497
73.4672337140007
71.35833856383105
69.0340813696114
70.19358884225431
70.34807344266109
68.82751992266893
65.07358224214859
0.0
79.4897202772844
77.49658222579272
75.4953680976497
73.4672337140007
71.35833856383105
69.0340813696114
70.19358884225431
70.34807344266109
68.82751992266893
0.0
0.0
79.4897202772844
77.49658222579272
75.4953680976497
73.4672337140007
71.35833856383105
69.0340813696114
70.19358884225431
70.34807344266109
0.0
0.0
0.0
79.4897202772844
77.49658222579272
75.4953680976497
73.4672337140007
71.35

Policy Improvement:  64%|██████▎   | 281/441 [00:00<00:00, 280.87it/s]

78.76904786437368
77.25856197932679
75.4505507861981
73.51030612281107
71.51716807131848
69.5240300198269
71.5090919946666
73.4672337140007
75.34461466681422
77.0066335755771
78.16614104822088
79.25170003081841
77.44368883768935
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.5090919946666
73.4672337140007
75.34461466681422
77.0066335755771
78.16614104822088
79.43682688918094
77.49658222579272
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.5090919946666
73.4672337140007
75.34461466681422
77.0066335755771
78.16614104822088
79.4897202772844
77.49658222579272
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.5090919946666
73.4672337140007
75.34461466681422
77.0066335755771
78.16614104822088
79.4897202772844
77.49658222579272
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.5090919946666
73.4672337140007
75.34461466681422
77.0066335755771
0.0
79.4897202772844
77.49658222579272
75.503444174

Policy Improvement:  84%|████████▍ | 370/441 [00:01<00:00, 288.94it/s]

77.49658222579272
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
79.48164420063242
79.4897202772844
77.49658222579272
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
0.0
79.4897202772844
77.49658222579272
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
0.0
0.0
79.4897202772844
77.49658222579272
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
0.0
0.0
0.0
79.4897202772844
77.49658222579272
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
0.0
0.0
0.0
0.0
79.4897202772844
77.49658222579272
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
29.872842542414297
41.648623054

Policy Improvement: 100%|██████████| 441/441 [00:01<00:00, 305.05it/s]


0.0
0.0
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
79.4897202772844
0.0
0.0
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
79.4897202772844
0.0
0.0
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
79.4897202772844
0.0
0.0
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
0.0
0.0
0.0
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
0.0
0.0
0.0
0.0
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
0.0
0.0
0.0
0.0
0.0
75.50344417430162
73.51030612281107
71.51716807131848
69.52403001

Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 10999.70it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 10842.70it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 13495.95it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 14639.05it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 15514.39it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 12751.37it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 14112.54it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 14760.66it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 14661.21it/s]
Policy Improvement:  18%|█▊        | 78/441 [00:00<00:00, 364.90it/s]

0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
11.462520717719304
9.78264246049182
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
21.436118661752367
21.24516317821156
18.833093733124425
0.0
0.0
0.0
0.0
0.0
0.0
0.0
29.17633244525593
31.218761122244164
30.295614450843704
26.41916263003876
0.0
0.0
0.0
0.0
0.0
0.0
34.683162068230004
38.95897490574734
40.26921239487749
37.881683347757836
32.05272169266171
0.0
0.0
0.0
0.0
0.0
38.51495357080725
44.465804528722316
48.00942617838047
47.85528129179149
43.51524241038184
35.73377092099298
0.0
0.0
0.0
0.0
0.0
48.29759603129981
53.51625580135471
55.59549507529572
53.48884035441369
47.196291638713404
37.85281228189168
0.0
0.0
0.0
0.0
0.0
57.34804730393205
61.10232469826957
61.22905413791843
57.16988958274602
49.3153329996116
38.9305150645017
0.0
0.0
0.0
0.0
0.0
64.93411620084653
66.73588376089279
64.91010336624943
59.28893094364359
50.39303578222172
39.41316723094661
0.0
0.0
0.0
0.0
0.0
70.56767526346867
70.41693298922402
67.02914472714849
60.366633726

Policy Improvement:  34%|███▍      | 149/441 [00:00<00:00, 301.83it/s]

0.0
79.48164420063242
77.45350981698245
75.34461466681422
73.02035747259445
70.19358884225431
66.36179733967798
64.84124381968533
61.08730613916535
55.09998429811466
47.623739683378844
0.0
79.48164420063242
77.45350981698245
75.34461466681422
73.02035747259445
70.19358884225431
66.36179733967798
64.84124381968533
61.08730613916535
55.09998429811466
0.0
0.0
79.48164420063242
77.45350981698245
75.34461466681422
73.02035747259445
70.19358884225431
66.36179733967798
64.84124381968533
61.08730613916535
0.0
0.0
0.0
79.48164420063242
77.45350981698245
75.34461466681422
73.02035747259445
70.19358884225431
66.36179733967798
64.84124381968533
0.0
0.0
0.0
0.0
79.48164420063242
77.45350981698245
75.34461466681422
73.02035747259445
70.19358884225431
66.36179733967798
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
28.54926331335077
38.48639037424875
46.016288126889755
49.84841934328206
49.4946565648557
45.699461178449866
0.0
0.0
0.0
0.0
31.376031943689686
38.33190577384225
47.536841646881015
53.60235702380

Policy Improvement:  48%|████▊     | 210/441 [00:00<00:00, 283.89it/s]

39.83853279987092
47.62803720887216
54.68535043001269
60.27828127543639
63.910626209916046
65.56354105459772
69.55996336831089
72.29968505968328
73.94184469877119
74.28145615753935
72.8137960256518
49.62117526036291
56.67848848150528
62.27141932692714
65.91184033805861
67.59167543824722
67.68258241549648
70.63766615092018
72.78233722612812
74.12697155713386
74.33434954564291
72.8137960256518
58.67162653299595
64.26455737841889
67.90497838955044
69.59288956639094
69.71071679914607
68.76028519810652
71.12031831736526
72.96746408449106
74.17986494523727
74.33434954564291
72.8137960256518
66.25769542991019
69.89811644104174
71.5860276178815
71.71193092728963
70.78841958175589
69.2429373645515
71.30544517572737
73.02035747259445
74.17986494523727
74.33434954564291
72.8137960256518
71.89125449253338
73.5791656693739
73.70506897878057
72.78963370989963
71.2710717482007
69.42806422291379
71.35833856383105
73.02035747259445
74.17986494523727
74.33434954564291
72.8137960256518
75.57230372086481


Policy Improvement:  61%|██████    | 267/441 [00:00<00:00, 229.42it/s]

37.84539474837965
45.63489915738079
52.69221237852185
58.28514322394468
61.92556423507602
67.59975151489928
71.7038548506379
74.73969935258027
77.09973247183956
78.9468782389636
39.83853279987092
47.62803720887216
54.68535043001269
60.27828127543639
63.918702286567786
65.60661346340801
69.7187928757979
72.78155763324771
75.22235151902537
77.28485933020178
78.99977162706753
49.62117526036291
56.67848848150528
62.27141932692714
65.91184033805861
67.59975151489928
67.72565482430642
70.79649565840785
73.26420979969288
75.40747837738768
77.33775271830532
78.99977162706753
58.67162653299595
64.26455737841889
67.90497838955044
69.59288956639094
69.7187928757979
68.80335760691617
71.27914782485263
73.44933665805516
75.46037176549115
77.33775271830532
78.99977162706753
66.25769542991019
69.89811644104174
71.5860276178815
71.71193092728963
70.79649565840785
69.28600977336106
71.46427468321464
73.50223004615913
75.46037176549115
77.33775271830532
78.99977162706753
71.89125449253338
73.57916566937

Policy Improvement:  73%|███████▎  | 322/441 [00:01<00:00, 247.42it/s]

69.89811644104174
71.5860276178815
71.71193092728963
70.79649565840785
69.28600977336106
71.46427468321464
73.51030612281107
75.50344417430162
77.48850614914079
79.44664786847423
71.89125449253338
73.5791656693739
73.70506897878057
72.78963370989963
71.27914782485263
69.47113663172323
71.51716807131848
73.51030612281107
75.50344417430162
77.48850614914079
79.44664786847423
75.57230372086481
75.69820703027199
74.78277176139055
73.27228587634453
71.46427468321464
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.48850614914079
79.44664786847423
77.69134508176325
76.77590981288144
75.26542392783578
73.45741273470705
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.48850614914079
79.44664786847423
78.76904786437368
77.25856197932679
75.4505507861981
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.48850614914079
79.44664786847423
79.25170003081841
77.4436888376893

Policy Improvement:  93%|█████████▎| 412/441 [00:01<00:00, 285.80it/s]

71.27914782485263
73.45741273470705
75.50344417430162
77.49658222579272
79.4897202772844
0.0
69.89811644104174
71.5860276178815
71.71193092728963
70.79649565840785
69.28600977336106
71.46427468321464
73.51030612281107
75.50344417430162
77.49658222579272
79.4897202772844
0.0
73.5791656693739
73.70506897878057
72.78963370989963
71.27914782485263
69.47113663172323
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
79.4897202772844
0.0
75.69820703027199
74.78277176139055
73.27228587634453
71.46427468321464
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
79.4897202772844
0.0
76.77590981288144
75.26542392783578
73.45741273470705
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
79.4897202772844
0.0
77.25856197932679
75.4505507861981
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
79.4897202772844
0.

Policy Improvement: 100%|██████████| 441/441 [00:01<00:00, 285.67it/s]


0.0
0.0
0.0
0.0
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
79.4897202772844
0.0
0.0
0.0
0.0
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
79.4897202772844
0.0
0.0
0.0
0.0
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
79.4897202772844
0.0
0.0
0.0
0.0
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
0.0
0.0
0.0
0.0
0.0
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
0.0
0.0
0.0
0.0
0.0
0.0
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
0.0
0.0
0.0
0.0
0.0
0.0
0.0
71.51716807131848
69.5240300198269
71.51716807131848
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
71.51716807131848
69.5240300198269
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
29.872842542414297
41.64862305439792
52.69221237852185
62.2714193

Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 9617.61it/s]
Policy Evaluation: 100%|██████████| 441/441 [00:00<00:00, 9451.12it/s]
Policy Improvement:   0%|          | 0/441 [00:00<?, ?it/s]

0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
11.462520717719304
9.78264246049182
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
21.436118661752367
21.24516317821156
18.833093733124425
0.0
0.0
0.0
0.0
0.0
0.0
0.0
29.17633244525593
31.218761122244164
30.295614450843704
26.41916263003876
0.0
0.0
0.0
0.0
0.0
0.0
34.683162068230004
38.95897490574734
40.26921239487749
37.881683347757836
32.05272169266171
0.0
0.0
0.0
0.0
0.0
38.51495357080725
44.465804528722316
48.00942617838047
47.85528129179149
43.51524241038184
35.73377092099298
0.0
0.0
0.0
0.0
0.0
48.29759603129981
53.51625580135471
55.59549507529572
53.48884035441369
47.196291638713404
37.85281228189168
0.0
0.0
0.0
0.0
0.0
57.34804730393205
61.10232469826957
61.22905413791843
57.16988958274602
49.3153329996116
38.9305150645017
0.0
0.0
0.0
0.0
0.0
64.93411620084653
66.73588376089279
64.91010336624943
59.28893094364359
50.39303578222172
39.41316723094661
0.0
0.0
0.0
0.0
0.0
70.56767526346867
70.41693298922402
67.02914472714849
60.366633726

Policy Improvement:  21%|██        | 92/441 [00:00<00:00, 361.53it/s]

0.0
37.355446098163476
44.311319928317
49.529979698371534
51.609218972312284
49.50256425143158
47.196291638713404
41.83908838487432
0.0
0.0
0.0
39.679703292383024
47.13808855865572
53.36177120094982
57.116048595286685
57.24277803493476
53.1836134797637
49.3153329996116
42.91679116748445
0.0
0.0
0.0
49.462345752875116
56.18853983128914
60.947840097863235
62.74960765790891
60.92382726326685
55.30265484066182
50.39303578222172
43.39944333392876
0.0
0.0
0.0
58.51279702550778
63.774608728203
66.58139916048616
66.43065688624084
63.04286862416487
56.3803576232717
50.87568794866615
43.58457019229133
0.0
0.0
0.0
66.09886592242374
69.40816779082634
70.26244838881763
68.54969824714031
64.12057140677493
56.86300978971655
51.06081480702847
43.637463580394986
0.0
0.0
0.0
71.73242498504622
73.0892170191596
72.38148974971679
69.62740102974988
64.60322357321932
57.04813664807917
51.11370819513217
43.637463580394986
0.0
0.0
0.0
75.41347421337743
75.20825838005621
73.45919253232577
70.11005319619548
64.7

Policy Improvement:  29%|██▉       | 130/441 [00:00<00:00, 331.70it/s]

66.249619353258
69.85504403223119
71.42719811039372
71.22198227707375
69.47291642934294
66.12377709321235
64.78835043158169
61.08730613916535
55.09998429811466
47.623739683378844
0.0
71.88317841588123
73.5360932605635
73.5462394712931
72.29968505968328
69.95556859578879
66.30890395157469
64.84124381968533
61.08730613916535
55.09998429811466
47.623739683378844
0.0
75.56422764421289
75.65513462146272
74.62394225390244
72.78233722612812
70.14069545415052
66.36179733967798
64.84124381968533
61.08730613916535
55.09998429811466
47.623739683378844
0.0
77.6832690051113
76.73283740407145
75.10659442034805
72.96746408449106
70.19358884225431
66.36179733967798
64.84124381968533
61.08730613916535
55.09998429811466
47.623739683378844
0.0
78.7609717877215
77.2154895705168
75.29172127871044
73.02035747259445
70.19358884225431
66.36179733967798
64.84124381968533
61.08730613916535
55.09998429811466
47.623739683378844
0.0
79.24362395416611
77.40061642887919
75.34461466681422
73.02035747259445
70.1935888

Policy Improvement:  37%|███▋      | 164/441 [00:00<00:00, 299.99it/s]

67.06672029363943
58.67162653299595
64.26455737841889
67.90497838955044
69.58481348973902
69.67572046698734
68.64452809942824
70.78919917463652
72.13383350564288
72.34121149415174
70.82065797416062
67.06672029363943
66.25769542991019
69.89811644104174
71.5860276178815
71.7038548506379
70.75342324959752
69.1271802658734
70.97432603299954
72.18672689374702
72.34121149415174
70.82065797416062
67.06672029363943
71.89125449253338
73.5791656693739
73.70506897878057
72.78155763324771
71.2360754160422
69.31230712423493
71.02721942110304
72.18672689374702
72.34121149415174
70.82065797416062
67.06672029363943
75.57230372086481
75.69820703027199
74.78277176139055
73.26420979969288
71.42120227440437
69.36520051233913
71.02721942110304
72.18672689374702
72.34121149415174
70.82065797416062
67.06672029363943
77.69134508176325
76.77590981288144
75.26542392783578
73.44933665805516
71.47409566250812
69.36520051233913
71.02721942110304
72.18672689374702
72.34121149415174
70.82065797416062
67.066720293639

Policy Improvement:  44%|████▍     | 195/441 [00:00<00:00, 249.47it/s]

0.0
37.84539474837965
45.63489915738079
52.69221237852185
58.27706714729271
61.88249182626571
67.44092200741102
71.22198227707375
73.45919253232577
74.09632929917707
72.7609026375487
39.83853279987092
47.62803720887216
54.68535043001269
60.27828127543639
63.910626209916046
65.56354105459772
69.55996336831089
72.29968505968328
73.94184469877119
74.28145615753935
72.8137960256518
49.62117526036291
56.67848848150528
62.27141932692714
65.91184033805861
67.59167543824722
67.68258241549648
70.63766615092018
72.78233722612812
74.12697155713386
74.33434954564291
72.8137960256518
58.67162653299595
64.26455737841889
67.90497838955044
69.59288956639094
69.71071679914607
68.76028519810652
71.12031831736526
72.96746408449106
74.17986494523727
74.33434954564291
72.8137960256518
66.25769542991019
69.89811644104174
71.5860276178815
71.71193092728963
70.78841958175589
69.2429373645515
71.30544517572737
73.02035747259445
74.17986494523727
74.33434954564291
72.8137960256518
71.89125449253338
73.579165669

Policy Improvement:  51%|█████     | 223/441 [00:00<00:00, 257.37it/s]

60.23520886662618
67.74614888206271
73.0892170191596
76.36776585269953
0.0
0.0
0.0
33.85911864539706
41.64862305439792
48.70593627553869
58.27706714729271
65.86876792924876
71.42719811039372
75.20825838005621
77.44546863531042
0.0
0.0
35.85225669688868
43.641761105889024
50.69907432703028
56.29200517245324
63.910626209916046
69.54981715758072
73.5462394712931
76.28596116266576
77.92812080175469
0.0
37.84539474837965
45.63489915738079
52.69221237852185
58.28514322394468
61.92556423507602
67.59167543824722
71.66885851847928
74.62394225390244
76.76861332911122
78.11324766011731
39.83853279987092
47.62803720887216
54.68535043001269
60.27828127543639
63.918702286567786
65.60661346340801
69.71071679914607
72.74656130108961
75.10659442034805
76.95374018747376
78.16614104822088
49.62117526036291
56.67848848150528
62.27141932692714
65.91184033805861
67.59975151489928
67.72565482430642
70.78841958175589
73.22921346753463
75.29172127871044
77.0066335755771
78.16614104822088
58.67162653299595
64.2

Policy Improvement:  57%|█████▋    | 251/441 [00:00<00:00, 262.23it/s]

79.25170003081841
77.44368883768935
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.5090919946666
73.4672337140007
75.34461466681422
77.0066335755771
78.16614104822088
79.43682688918094
77.49658222579272
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.5090919946666
73.4672337140007
75.34461466681422
77.0066335755771
78.16614104822088
79.4897202772844
77.49658222579272
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.5090919946666
73.4672337140007
75.34461466681422
77.0066335755771
78.16614104822088
79.4897202772844
77.49658222579272
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.5090919946666
73.4672337140007
75.34461466681422
77.0066335755771
0.0
79.4897202772844
77.49658222579272
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.5090919946666
73.4672337140007
75.34461466681422
0.0
0.0
79.4897202772844
77.49658222579272
75.50344417430162
73.51030612281107
71.

Policy Improvement:  63%|██████▎   | 279/441 [00:00<00:00, 265.50it/s]

78.72597545556344
0.0
0.0
35.85225669688868
43.641761105889024
50.69907432703028
56.29200517245324
63.918702286567786
69.59288956639094
73.70506897878057
76.76783373622918
79.20862762200795
0.0
37.84539474837965
45.63489915738079
52.69221237852185
58.28514322394468
61.92556423507602
67.59975151489928
71.71193092728963
74.78277176139055
77.25048590267507
79.39375448037049
39.83853279987092
47.62803720887216
54.68535043001269
60.27828127543639
63.918702286567786
65.60661346340801
69.7187928757979
72.78963370989963
75.26542392783578
77.43561276103725
79.44664786847423
49.62117526036291
56.67848848150528
62.27141932692714
65.91184033805861
67.59975151489928
67.72565482430642
70.79649565840785
73.27228587634453
75.4505507861981
77.48850614914079
79.44664786847423
58.67162653299595
64.26455737841889
67.90497838955044
69.59288956639094
69.7187928757979
68.80335760691617
71.27914782485263
73.45741273470705
75.50344417430162
77.48850614914079
79.44664786847423
66.25769542991019
69.8981164410417

Policy Improvement:  76%|███████▌  | 336/441 [00:01<00:00, 272.15it/s]

79.25170003081841
77.44368883768935
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
79.48164420063242
79.43682688918094
77.49658222579272
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
79.48164420063242
79.4897202772844
77.49658222579272
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
79.48164420063242
79.4897202772844
77.49658222579272
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
0.0
79.4897202772844
77.49658222579272
75.50344417430162
73.51030612281107
71.51716807131848
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
0.0
0.0
79.4897202772844
77.49658222579272
75.50344417430162
73.5103

Policy Improvement:  91%|█████████▏| 403/441 [00:01<00:00, 305.39it/s]

0.0
0.0
0.0
0.0
0.0
29.872842542414297
41.64862305439792
52.69221237852185
62.27141932692714
69.89811644104174
75.57230372086481
0.0
0.0
0.0
0.0
31.86598059390581
39.6554850029061
50.69907432703028
60.27828127543639
67.90497838955044
73.5791656693739
77.69134508176325
0.0
0.0
0.0
33.85911864539706
41.64862305439792
48.70593627553869
58.28514322394468
65.91184033805861
71.5860276178815
75.69820703027199
78.76904786437368
0.0
0.0
35.85225669688868
43.641761105889024
50.69907432703028
56.29200517245324
63.918702286567786
69.59288956639094
73.70506897878057
76.77590981288144
79.25170003081841
0.0
37.84539474837965
45.63489915738079
52.69221237852185
58.28514322394468
61.92556423507602
67.59975151489928
71.71193092728963
74.78277176139055
77.25856197932679
79.43682688918094
0.0
47.62803720887216
54.68535043001269
60.27828127543639
63.918702286567786
65.60661346340801
69.7187928757979
72.78963370989963
75.26542392783578
77.44368883768935
79.4897202772844
0.0
56.67848848150528
62.271419326927

Policy Improvement: 100%|██████████| 441/441 [00:01<00:00, 285.95it/s]


0.0
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
79.4897202772844
0.0
0.0
0.0
0.0
0.0
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
79.4897202772844
0.0
0.0
0.0
0.0
0.0
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
79.4897202772844
0.0
0.0
0.0
0.0
0.0
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
79.4897202772844
0.0
0.0
0.0
0.0
0.0
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
77.49658222579272
0.0
0.0
0.0
0.0
0.0
0.0
69.5240300198269
71.51716807131848
73.51030612281107
75.50344417430162
0.0
0.0
0.0
0.0
0.0
0.0
0.0
69.5240300198269
71.51716807131848
73.51030612281107
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
69.5240300198269
71.51716807131848
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
69.5240300198269
0.0
0.0
0.0
0.0
0.0
Iterations:  6
[[ 0 -1 -2 -3 -4 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5]
 [ 1 


