In [1]:
import os
import sys

# Get the absolute path of the notebook's directory
notebook_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(notebook_dir)

In [2]:
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from src.app import PermutationSolver


In [3]:
n = 12
config = {
            'n_permutations_length': n,
            
            # Random walks params
            'random_walks_type': 'non-backtracking-beam',
            'n_random_walk_length': int(n * (n-1) / 2),
            'n_random_walks_to_generate': 10000,
            'n_random_walks_steps_back_to_ban': 8,
            
            # Neural Net params
            'model_type': 'MLP',
            'list_layers_sizes': [2**9],
            'n_epochs': 30,
            'batch_size': 1024,
            'lr': 0.001,
            
            # DQN training
            'n_epochs_dqn': 300,
            'flag_dqn_round': False,
            'n_random_walks_to_generate_dqn': 1000,
            'verbose_loc': 5,
            'lr_dqn': 0.0005,
            
            # Beam search
            'beam_search_torch': True,
            'beam_search_Fironov': False,
            'beam_width': 1,
            'n_steps_limit': 4 * n**2,
            'alpha_previous_cost_accumulation': 0,
            'beam_search_models_or_heuristics': 'model_torch',
            'ban_p0_p1_transposition_if_p0_lt_p1_ie_already_sorted': False,
            'n_beam_search_steps_back_to_ban': 32,
            
            # What to solve
            'solve_random_or_longest_state': 'solve_LRX_longest',
            'verbose': 100
        }

In [4]:
# n_perm_list = [8]
# n_epoch_list = [100]
# n_epoch_dqn_list = [300]
# batch_size_list = [2**10]
# lr_list = [0.000001]
# beam_width_list = [2**0]
# list_layers_sizes = [[2**13]]

n_perm_list = [15]
n_epoch_list = [30]
n_epoch_dqn_list = [1000]
batch_size_list = [2**10]
lr_list = [0.0005]
beam_width_list = [2**0]
list_layers_sizes = [[2**11]]

In [5]:
config['mode'] = 'single_hard_hinge'
config['w_anchor'] = 1.0
config['w_hinge'] = 1.0


In [6]:
summary_df = pd.DataFrame()

for n_permutations_length in n_perm_list:
    solver = PermutationSolver(config)
    solver.config['n_permutations_length'] = n_permutations_length
    for list_layers_size in list_layers_sizes:
        solver.config['list_layers_sizes'] = list_layers_size
        for lr in lr_list:
            solver.config['lr_dqn'] = lr
            for batch_size in batch_size_list:
                solver.config['batch_size'] = batch_size
                for n_epoch in n_epoch_list:
                    solver.config['n_epochs'] = n_epoch
                    mlp_losses = solver.train_mlp()
                    # save mlp_model
                    torch.save(solver.mlp_model.state_dict(), f'models/mlp_model_{n_permutations_length}_{n_epoch}_{list_layers_sizes[0]}_{lr}_{batch_size}.pth')
                    for n_epoch_dqn in n_epoch_dqn_list:
                        solver.config['n_epochs_dqn'] = n_epoch_dqn
                        dqn_losses = solver.train_dqn()
                        # save dqn_model
                        torch.save(solver.dqn_model.state_dict(), f'models/dqn_model_{n_permutations_length}_{n_epoch_dqn}_{list_layers_sizes[0]}_{lr}_{batch_size}.pth')
                        for beam_width in beam_width_list:
                            solver.config['beam_width'] = beam_width
                            i_step, flag_found_destination = solver.test_beam_search()
                            # save summary_df
                            summary_df = pd.concat([summary_df, pd.DataFrame({'n_permutations_length': n_permutations_length, 'list_layers_sizes': list_layers_sizes, 'lr': lr, 'n_epoch': n_epoch, 'n_epoch_dqn': n_epoch_dqn, 'beam_width': beam_width, 'i_step': i_step, 'flag_found_destination': flag_found_destination, 'mlp_losses': mlp_losses[-1], 'dqn_losses': dqn_losses[-1]})])
                            summary_df.to_csv('models/summary_df.csv', index=False)


Training MLP:  33%|███▎      | 10/30 [00:11<00:23,  1.15s/it]

Epoch 10, Loss: 59.9828


Training MLP:  67%|██████▋   | 20/30 [00:22<00:10,  1.08s/it]

Epoch 20, Loss: 59.5383


Training MLP: 100%|██████████| 30/30 [00:33<00:00,  1.11s/it]

Epoch 30, Loss: 57.5893





1000001
X.shape: torch.Size([1000001, 15])
y.shape: torch.Size([1000001])
Starting DQN training for 1000 epochs...


Training DQN:   0%|          | 1/1000 [00:05<1:36:41,  5.81s/it]

Epoch   0 | Loss: 47.0813 | Hinge: 8.9340 | Anchor: 36.5866 | Times - RW: 4.83s, Bellman: 0.04s, Train: 0.10s


Training DQN:   1%|          | 6/1000 [00:14<31:43,  1.92s/it]  

Epoch   5 | Loss: 5.7760 | Hinge: 1.3587 | Anchor: 4.6814 | Times - RW: 1.54s, Bellman: 0.02s, Train: 0.10s


Training DQN:   1%|          | 11/1000 [00:22<28:20,  1.72s/it]

Epoch  10 | Loss: 11.6412 | Hinge: 0.7546 | Anchor: 10.0559 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.12s


Training DQN:   2%|▏         | 16/1000 [00:30<27:33,  1.68s/it]

Epoch  15 | Loss: 18.3575 | Hinge: 0.5822 | Anchor: 17.7508 | Times - RW: 1.55s, Bellman: 0.02s, Train: 0.10s


Training DQN:   2%|▏         | 21/1000 [00:39<27:08,  1.66s/it]

Epoch  20 | Loss: 23.8058 | Hinge: 0.4813 | Anchor: 22.3337 | Times - RW: 1.51s, Bellman: 0.02s, Train: 0.12s


Training DQN:   3%|▎         | 26/1000 [00:47<27:00,  1.66s/it]

Epoch  25 | Loss: 25.9123 | Hinge: 0.5425 | Anchor: 25.9965 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.11s


Training DQN:   3%|▎         | 31/1000 [00:55<26:51,  1.66s/it]

Epoch  30 | Loss: 25.9562 | Hinge: 0.4385 | Anchor: 24.0702 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.11s


Training DQN:   4%|▎         | 36/1000 [01:04<26:38,  1.66s/it]

Epoch  35 | Loss: 25.4120 | Hinge: 0.3865 | Anchor: 25.7079 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.10s


Training DQN:   4%|▍         | 41/1000 [01:12<27:09,  1.70s/it]

Epoch  40 | Loss: 20.6855 | Hinge: 0.4440 | Anchor: 21.3782 | Times - RW: 1.56s, Bellman: 0.02s, Train: 0.12s


Training DQN:   5%|▍         | 46/1000 [01:21<26:47,  1.68s/it]

Epoch  45 | Loss: 17.2508 | Hinge: 0.3398 | Anchor: 16.5789 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.13s


Training DQN:   5%|▌         | 51/1000 [01:29<26:25,  1.67s/it]

Epoch  50 | Loss: 13.6264 | Hinge: 0.2862 | Anchor: 12.2888 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.12s


Training DQN:   6%|▌         | 56/1000 [01:37<26:24,  1.68s/it]

Epoch  55 | Loss: 8.9571 | Hinge: 0.2808 | Anchor: 8.8935 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.12s


Training DQN:   6%|▌         | 61/1000 [01:46<26:11,  1.67s/it]

Epoch  60 | Loss: 5.8486 | Hinge: 0.2853 | Anchor: 5.4464 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.12s


Training DQN:   7%|▋         | 66/1000 [01:54<26:09,  1.68s/it]

Epoch  65 | Loss: 3.7656 | Hinge: 0.3014 | Anchor: 3.6187 | Times - RW: 1.54s, Bellman: 0.02s, Train: 0.13s


Training DQN:   7%|▋         | 71/1000 [02:02<25:46,  1.66s/it]

Epoch  70 | Loss: 2.4959 | Hinge: 0.2546 | Anchor: 2.3003 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.11s


Training DQN:   8%|▊         | 76/1000 [02:11<25:46,  1.67s/it]

Epoch  75 | Loss: 1.7302 | Hinge: 0.2167 | Anchor: 1.4622 | Times - RW: 1.53s, Bellman: 0.02s, Train: 0.11s


Training DQN:   8%|▊         | 81/1000 [02:19<25:37,  1.67s/it]

Epoch  80 | Loss: 1.5563 | Hinge: 0.2176 | Anchor: 1.3247 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.12s


Training DQN:   9%|▊         | 86/1000 [02:27<25:30,  1.67s/it]

Epoch  85 | Loss: 1.3370 | Hinge: 0.2265 | Anchor: 0.9876 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.13s


Training DQN:   9%|▉         | 91/1000 [02:36<25:19,  1.67s/it]

Epoch  90 | Loss: 1.1726 | Hinge: 0.2273 | Anchor: 0.8649 | Times - RW: 1.51s, Bellman: 0.02s, Train: 0.12s


Training DQN:  10%|▉         | 96/1000 [02:44<25:07,  1.67s/it]

Epoch  95 | Loss: 1.1201 | Hinge: 0.1804 | Anchor: 0.8366 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.12s


Training DQN:  10%|█         | 101/1000 [02:52<24:51,  1.66s/it]

Epoch 100 | Loss: 0.8743 | Hinge: 0.2063 | Anchor: 0.6169 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.11s


Training DQN:  11%|█         | 106/1000 [03:01<24:46,  1.66s/it]

Epoch 105 | Loss: 0.9831 | Hinge: 0.1897 | Anchor: 0.7280 | Times - RW: 1.51s, Bellman: 0.02s, Train: 0.11s


Training DQN:  11%|█         | 111/1000 [03:09<24:45,  1.67s/it]

Epoch 110 | Loss: 0.9235 | Hinge: 0.1848 | Anchor: 0.6302 | Times - RW: 1.51s, Bellman: 0.02s, Train: 0.13s


Training DQN:  12%|█▏        | 116/1000 [03:17<24:29,  1.66s/it]

Epoch 115 | Loss: 1.0489 | Hinge: 0.1728 | Anchor: 0.8763 | Times - RW: 1.51s, Bellman: 0.02s, Train: 0.11s


Training DQN:  12%|█▏        | 121/1000 [03:26<24:27,  1.67s/it]

Epoch 120 | Loss: 0.9661 | Hinge: 0.1864 | Anchor: 0.7066 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.13s


Training DQN:  13%|█▎        | 126/1000 [03:34<24:10,  1.66s/it]

Epoch 125 | Loss: 0.9563 | Hinge: 0.2083 | Anchor: 0.7586 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.11s


Training DQN:  13%|█▎        | 131/1000 [03:42<24:06,  1.66s/it]

Epoch 130 | Loss: 1.0628 | Hinge: 0.1706 | Anchor: 0.9344 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.11s


Training DQN:  14%|█▎        | 136/1000 [03:51<24:04,  1.67s/it]

Epoch 135 | Loss: 1.0329 | Hinge: 0.1610 | Anchor: 0.8776 | Times - RW: 1.52s, Bellman: 0.03s, Train: 0.15s


Training DQN:  14%|█▍        | 141/1000 [03:59<23:45,  1.66s/it]

Epoch 140 | Loss: 0.9359 | Hinge: 0.1751 | Anchor: 0.7556 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.10s


Training DQN:  15%|█▍        | 146/1000 [04:07<23:46,  1.67s/it]

Epoch 145 | Loss: 0.9289 | Hinge: 0.1568 | Anchor: 0.7376 | Times - RW: 1.55s, Bellman: 0.02s, Train: 0.11s


Training DQN:  15%|█▌        | 151/1000 [04:16<24:09,  1.71s/it]

Epoch 150 | Loss: 0.8958 | Hinge: 0.1563 | Anchor: 0.7351 | Times - RW: 1.60s, Bellman: 0.02s, Train: 0.12s


Training DQN:  16%|█▌        | 156/1000 [04:25<25:13,  1.79s/it]

Epoch 155 | Loss: 0.7693 | Hinge: 0.1717 | Anchor: 0.6332 | Times - RW: 1.65s, Bellman: 0.03s, Train: 0.13s


Training DQN:  16%|█▌        | 161/1000 [04:34<25:23,  1.82s/it]

Epoch 160 | Loss: 0.7514 | Hinge: 0.1501 | Anchor: 0.5719 | Times - RW: 1.65s, Bellman: 0.02s, Train: 0.15s


Training DQN:  17%|█▋        | 166/1000 [04:43<23:48,  1.71s/it]

Epoch 165 | Loss: 0.7706 | Hinge: 0.1786 | Anchor: 0.5789 | Times - RW: 1.54s, Bellman: 0.02s, Train: 0.10s


Training DQN:  17%|█▋        | 171/1000 [04:51<23:03,  1.67s/it]

Epoch 170 | Loss: 0.8807 | Hinge: 0.1474 | Anchor: 0.6668 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.11s


Training DQN:  18%|█▊        | 176/1000 [05:00<23:48,  1.73s/it]

Epoch 175 | Loss: 0.7986 | Hinge: 0.1643 | Anchor: 0.6623 | Times - RW: 1.62s, Bellman: 0.02s, Train: 0.14s


Training DQN:  18%|█▊        | 181/1000 [05:09<25:30,  1.87s/it]

Epoch 180 | Loss: 0.8283 | Hinge: 0.1415 | Anchor: 0.7032 | Times - RW: 1.74s, Bellman: 0.03s, Train: 0.14s


Training DQN:  19%|█▊        | 186/1000 [05:18<24:22,  1.80s/it]

Epoch 185 | Loss: 0.8940 | Hinge: 0.1279 | Anchor: 0.7177 | Times - RW: 1.53s, Bellman: 0.03s, Train: 0.13s


Training DQN:  19%|█▉        | 191/1000 [05:26<22:48,  1.69s/it]

Epoch 190 | Loss: 0.8896 | Hinge: 0.1482 | Anchor: 0.7579 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.12s


Training DQN:  20%|█▉        | 196/1000 [05:35<22:16,  1.66s/it]

Epoch 195 | Loss: 0.7978 | Hinge: 0.1289 | Anchor: 0.5458 | Times - RW: 1.51s, Bellman: 0.02s, Train: 0.11s


Training DQN:  20%|██        | 201/1000 [05:43<22:08,  1.66s/it]

Epoch 200 | Loss: 0.6771 | Hinge: 0.1101 | Anchor: 0.4785 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.11s


Training DQN:  21%|██        | 206/1000 [05:51<21:59,  1.66s/it]

Epoch 205 | Loss: 0.5658 | Hinge: 0.1439 | Anchor: 0.4378 | Times - RW: 1.51s, Bellman: 0.02s, Train: 0.12s


Training DQN:  21%|██        | 211/1000 [06:00<21:53,  1.67s/it]

Epoch 210 | Loss: 0.5862 | Hinge: 0.1157 | Anchor: 0.5518 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.13s


Training DQN:  22%|██▏       | 216/1000 [06:08<21:50,  1.67s/it]

Epoch 215 | Loss: 0.5240 | Hinge: 0.1398 | Anchor: 0.3829 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.12s


Training DQN:  22%|██▏       | 221/1000 [06:16<21:36,  1.66s/it]

Epoch 220 | Loss: 0.5680 | Hinge: 0.1174 | Anchor: 0.4869 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.11s


Training DQN:  23%|██▎       | 226/1000 [06:25<21:33,  1.67s/it]

Epoch 225 | Loss: 0.6104 | Hinge: 0.1424 | Anchor: 0.5587 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.11s


Training DQN:  23%|██▎       | 231/1000 [06:33<21:55,  1.71s/it]

Epoch 230 | Loss: 0.6958 | Hinge: 0.1200 | Anchor: 0.6507 | Times - RW: 1.55s, Bellman: 0.02s, Train: 0.13s


Training DQN:  24%|██▎       | 236/1000 [06:42<21:30,  1.69s/it]

Epoch 235 | Loss: 0.7142 | Hinge: 0.1208 | Anchor: 0.5764 | Times - RW: 1.51s, Bellman: 0.02s, Train: 0.11s


Training DQN:  24%|██▍       | 241/1000 [06:50<20:49,  1.65s/it]

Epoch 240 | Loss: 0.5749 | Hinge: 0.1348 | Anchor: 0.4174 | Times - RW: 1.50s, Bellman: 0.02s, Train: 0.10s


Training DQN:  25%|██▍       | 246/1000 [06:58<20:37,  1.64s/it]

Epoch 245 | Loss: 0.5487 | Hinge: 0.1433 | Anchor: 0.4557 | Times - RW: 1.50s, Bellman: 0.02s, Train: 0.11s


Training DQN:  25%|██▌       | 251/1000 [07:06<20:30,  1.64s/it]

Epoch 250 | Loss: 0.5236 | Hinge: 0.1250 | Anchor: 0.4627 | Times - RW: 1.50s, Bellman: 0.02s, Train: 0.11s


Training DQN:  26%|██▌       | 256/1000 [07:14<20:17,  1.64s/it]

Epoch 255 | Loss: 0.6565 | Hinge: 0.1310 | Anchor: 0.5567 | Times - RW: 1.50s, Bellman: 0.02s, Train: 0.10s


Training DQN:  26%|██▌       | 261/1000 [07:23<20:07,  1.63s/it]

Epoch 260 | Loss: 0.5915 | Hinge: 0.0811 | Anchor: 0.4554 | Times - RW: 1.50s, Bellman: 0.02s, Train: 0.10s


Training DQN:  27%|██▋       | 266/1000 [07:31<20:39,  1.69s/it]

Epoch 265 | Loss: 0.5257 | Hinge: 0.1033 | Anchor: 0.4266 | Times - RW: 1.54s, Bellman: 0.02s, Train: 0.13s


Training DQN:  27%|██▋       | 271/1000 [07:39<20:15,  1.67s/it]

Epoch 270 | Loss: 0.5704 | Hinge: 0.1050 | Anchor: 0.4064 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.10s


Training DQN:  28%|██▊       | 276/1000 [07:48<20:08,  1.67s/it]

Epoch 275 | Loss: 0.6217 | Hinge: 0.1219 | Anchor: 0.5254 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.12s


Training DQN:  28%|██▊       | 281/1000 [07:56<19:51,  1.66s/it]

Epoch 280 | Loss: 0.5014 | Hinge: 0.1035 | Anchor: 0.3826 | Times - RW: 1.51s, Bellman: 0.02s, Train: 0.11s


Training DQN:  29%|██▊       | 286/1000 [08:04<19:44,  1.66s/it]

Epoch 285 | Loss: 0.4536 | Hinge: 0.1065 | Anchor: 0.3419 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.11s


Training DQN:  29%|██▉       | 291/1000 [08:13<19:37,  1.66s/it]

Epoch 290 | Loss: 0.4580 | Hinge: 0.0917 | Anchor: 0.3262 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.12s


Training DQN:  30%|██▉       | 296/1000 [08:21<19:28,  1.66s/it]

Epoch 295 | Loss: 0.4395 | Hinge: 0.1116 | Anchor: 0.3275 | Times - RW: 1.51s, Bellman: 0.02s, Train: 0.12s


Training DQN:  30%|███       | 301/1000 [08:29<19:22,  1.66s/it]

Epoch 300 | Loss: 0.4401 | Hinge: 0.0952 | Anchor: 0.3471 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.11s


Training DQN:  31%|███       | 306/1000 [08:38<19:13,  1.66s/it]

Epoch 305 | Loss: 0.3974 | Hinge: 0.1205 | Anchor: 0.2432 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.10s


Training DQN:  31%|███       | 311/1000 [08:46<19:31,  1.70s/it]

Epoch 310 | Loss: 0.3947 | Hinge: 0.0902 | Anchor: 0.2661 | Times - RW: 1.54s, Bellman: 0.02s, Train: 0.12s


Training DQN:  32%|███▏      | 316/1000 [08:55<19:46,  1.73s/it]

Epoch 315 | Loss: 0.3854 | Hinge: 0.0959 | Anchor: 0.2151 | Times - RW: 1.57s, Bellman: 0.02s, Train: 0.13s


Training DQN:  32%|███▏      | 321/1000 [09:03<19:19,  1.71s/it]

Epoch 320 | Loss: 0.4167 | Hinge: 0.1207 | Anchor: 0.3096 | Times - RW: 1.54s, Bellman: 0.02s, Train: 0.13s


Training DQN:  33%|███▎      | 326/1000 [09:12<18:52,  1.68s/it]

Epoch 325 | Loss: 0.3957 | Hinge: 0.1035 | Anchor: 0.3037 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.12s


Training DQN:  33%|███▎      | 331/1000 [09:20<18:50,  1.69s/it]

Epoch 330 | Loss: 0.4103 | Hinge: 0.1093 | Anchor: 0.2692 | Times - RW: 1.54s, Bellman: 0.02s, Train: 0.12s


Training DQN:  34%|███▎      | 336/1000 [09:29<18:47,  1.70s/it]

Epoch 335 | Loss: 0.4351 | Hinge: 0.1022 | Anchor: 0.3035 | Times - RW: 1.54s, Bellman: 0.02s, Train: 0.12s


Training DQN:  34%|███▍      | 341/1000 [09:37<18:33,  1.69s/it]

Epoch 340 | Loss: 0.4874 | Hinge: 0.0993 | Anchor: 0.3674 | Times - RW: 1.53s, Bellman: 0.02s, Train: 0.12s


Training DQN:  35%|███▍      | 346/1000 [09:46<18:12,  1.67s/it]

Epoch 345 | Loss: 0.4428 | Hinge: 0.0737 | Anchor: 0.3235 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.12s


Training DQN:  35%|███▌      | 351/1000 [09:54<18:01,  1.67s/it]

Epoch 350 | Loss: 0.3912 | Hinge: 0.0988 | Anchor: 0.3082 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.12s


Training DQN:  36%|███▌      | 356/1000 [10:02<17:53,  1.67s/it]

Epoch 355 | Loss: 0.4641 | Hinge: 0.0844 | Anchor: 0.3621 | Times - RW: 1.51s, Bellman: 0.02s, Train: 0.12s


Training DQN:  36%|███▌      | 361/1000 [10:10<17:42,  1.66s/it]

Epoch 360 | Loss: 0.4412 | Hinge: 0.1022 | Anchor: 0.3689 | Times - RW: 1.51s, Bellman: 0.02s, Train: 0.11s


Training DQN:  37%|███▋      | 366/1000 [10:19<17:46,  1.68s/it]

Epoch 365 | Loss: 0.5365 | Hinge: 0.0935 | Anchor: 0.4346 | Times - RW: 1.55s, Bellman: 0.02s, Train: 0.12s


Training DQN:  37%|███▋      | 371/1000 [10:28<18:36,  1.77s/it]

Epoch 370 | Loss: 0.6255 | Hinge: 0.0985 | Anchor: 0.5040 | Times - RW: 1.59s, Bellman: 0.02s, Train: 0.11s


Training DQN:  38%|███▊      | 376/1000 [10:36<17:54,  1.72s/it]

Epoch 375 | Loss: 0.6851 | Hinge: 0.0921 | Anchor: 0.5536 | Times - RW: 1.53s, Bellman: 0.02s, Train: 0.12s


Training DQN:  38%|███▊      | 381/1000 [10:45<17:16,  1.67s/it]

Epoch 380 | Loss: 0.5263 | Hinge: 0.1167 | Anchor: 0.4354 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.12s


Training DQN:  39%|███▊      | 386/1000 [10:53<17:05,  1.67s/it]

Epoch 385 | Loss: 0.4760 | Hinge: 0.0994 | Anchor: 0.3464 | Times - RW: 1.55s, Bellman: 0.02s, Train: 0.11s


Training DQN:  39%|███▉      | 391/1000 [11:02<17:30,  1.73s/it]

Epoch 390 | Loss: 0.4282 | Hinge: 0.0890 | Anchor: 0.3334 | Times - RW: 1.55s, Bellman: 0.02s, Train: 0.13s


Training DQN:  40%|███▉      | 396/1000 [11:10<17:16,  1.72s/it]

Epoch 395 | Loss: 0.4811 | Hinge: 0.0994 | Anchor: 0.3659 | Times - RW: 1.56s, Bellman: 0.02s, Train: 0.11s


Training DQN:  40%|████      | 401/1000 [11:19<16:53,  1.69s/it]

Epoch 400 | Loss: 0.3716 | Hinge: 0.1003 | Anchor: 0.2555 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.13s


Training DQN:  41%|████      | 406/1000 [11:27<16:29,  1.67s/it]

Epoch 405 | Loss: 0.3279 | Hinge: 0.0834 | Anchor: 0.2496 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.11s


Training DQN:  41%|████      | 411/1000 [11:36<16:24,  1.67s/it]

Epoch 410 | Loss: 0.3205 | Hinge: 0.0880 | Anchor: 0.2246 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.13s


Training DQN:  42%|████▏     | 416/1000 [11:44<16:14,  1.67s/it]

Epoch 415 | Loss: 0.3296 | Hinge: 0.0877 | Anchor: 0.2361 | Times - RW: 1.51s, Bellman: 0.02s, Train: 0.13s


Training DQN:  42%|████▏     | 421/1000 [11:52<16:04,  1.67s/it]

Epoch 420 | Loss: 0.3867 | Hinge: 0.0907 | Anchor: 0.3157 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.13s


Training DQN:  43%|████▎     | 426/1000 [12:00<15:56,  1.67s/it]

Epoch 425 | Loss: 0.4949 | Hinge: 0.0908 | Anchor: 0.3785 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.13s


Training DQN:  43%|████▎     | 431/1000 [12:09<15:45,  1.66s/it]

Epoch 430 | Loss: 0.4915 | Hinge: 0.0901 | Anchor: 0.4579 | Times - RW: 1.51s, Bellman: 0.02s, Train: 0.11s


Training DQN:  44%|████▎     | 436/1000 [12:17<15:40,  1.67s/it]

Epoch 435 | Loss: 0.3842 | Hinge: 0.0876 | Anchor: 0.3002 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.11s


Training DQN:  44%|████▍     | 441/1000 [12:25<15:30,  1.67s/it]

Epoch 440 | Loss: 0.3648 | Hinge: 0.0684 | Anchor: 0.2501 | Times - RW: 1.51s, Bellman: 0.02s, Train: 0.11s


Training DQN:  45%|████▍     | 446/1000 [12:34<15:21,  1.66s/it]

Epoch 445 | Loss: 0.3413 | Hinge: 0.0723 | Anchor: 0.2572 | Times - RW: 1.51s, Bellman: 0.02s, Train: 0.10s


Training DQN:  45%|████▌     | 451/1000 [12:42<15:08,  1.66s/it]

Epoch 450 | Loss: 0.3225 | Hinge: 0.0780 | Anchor: 0.2159 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.10s


Training DQN:  46%|████▌     | 456/1000 [12:50<15:01,  1.66s/it]

Epoch 455 | Loss: 0.3805 | Hinge: 0.0912 | Anchor: 0.2868 | Times - RW: 1.51s, Bellman: 0.02s, Train: 0.11s


Training DQN:  46%|████▌     | 461/1000 [12:59<14:59,  1.67s/it]

Epoch 460 | Loss: 0.4100 | Hinge: 0.0764 | Anchor: 0.2927 | Times - RW: 1.53s, Bellman: 0.02s, Train: 0.12s


Training DQN:  47%|████▋     | 466/1000 [13:07<14:50,  1.67s/it]

Epoch 465 | Loss: 0.3851 | Hinge: 0.0811 | Anchor: 0.3024 | Times - RW: 1.51s, Bellman: 0.02s, Train: 0.12s


Training DQN:  47%|████▋     | 471/1000 [13:15<14:40,  1.66s/it]

Epoch 470 | Loss: 0.4183 | Hinge: 0.0768 | Anchor: 0.3020 | Times - RW: 1.51s, Bellman: 0.02s, Train: 0.12s


Training DQN:  48%|████▊     | 476/1000 [13:24<14:30,  1.66s/it]

Epoch 475 | Loss: 0.3827 | Hinge: 0.0671 | Anchor: 0.3347 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.11s


Training DQN:  48%|████▊     | 481/1000 [13:32<14:20,  1.66s/it]

Epoch 480 | Loss: 0.3035 | Hinge: 0.0766 | Anchor: 0.1918 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.10s


Training DQN:  49%|████▊     | 486/1000 [13:40<14:15,  1.66s/it]

Epoch 485 | Loss: 0.2936 | Hinge: 0.0845 | Anchor: 0.1996 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.11s


Training DQN:  49%|████▉     | 491/1000 [13:49<14:04,  1.66s/it]

Epoch 490 | Loss: 0.3615 | Hinge: 0.0820 | Anchor: 0.3011 | Times - RW: 1.51s, Bellman: 0.02s, Train: 0.11s


Training DQN:  50%|████▉     | 496/1000 [13:57<14:00,  1.67s/it]

Epoch 495 | Loss: 0.4164 | Hinge: 0.0834 | Anchor: 0.3417 | Times - RW: 1.51s, Bellman: 0.02s, Train: 0.13s


Training DQN:  50%|█████     | 501/1000 [14:05<13:45,  1.65s/it]

Epoch 500 | Loss: 0.4282 | Hinge: 0.0914 | Anchor: 0.3343 | Times - RW: 1.51s, Bellman: 0.02s, Train: 0.10s


Training DQN:  51%|█████     | 506/1000 [14:14<13:40,  1.66s/it]

Epoch 505 | Loss: 0.3929 | Hinge: 0.0714 | Anchor: 0.3305 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.10s


Training DQN:  51%|█████     | 511/1000 [14:22<13:30,  1.66s/it]

Epoch 510 | Loss: 0.4907 | Hinge: 0.0728 | Anchor: 0.4092 | Times - RW: 1.51s, Bellman: 0.02s, Train: 0.11s


Training DQN:  52%|█████▏    | 516/1000 [14:30<13:26,  1.67s/it]

Epoch 515 | Loss: 0.4032 | Hinge: 0.0774 | Anchor: 0.3140 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.12s


Training DQN:  52%|█████▏    | 521/1000 [14:38<13:17,  1.67s/it]

Epoch 520 | Loss: 0.3250 | Hinge: 0.0862 | Anchor: 0.2412 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.12s


Training DQN:  53%|█████▎    | 526/1000 [14:47<13:07,  1.66s/it]

Epoch 525 | Loss: 0.3220 | Hinge: 0.0732 | Anchor: 0.2357 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.11s


Training DQN:  53%|█████▎    | 531/1000 [14:55<12:56,  1.66s/it]

Epoch 530 | Loss: 0.4136 | Hinge: 0.0759 | Anchor: 0.3142 | Times - RW: 1.51s, Bellman: 0.02s, Train: 0.10s


Training DQN:  54%|█████▎    | 536/1000 [15:03<12:50,  1.66s/it]

Epoch 535 | Loss: 0.3445 | Hinge: 0.0719 | Anchor: 0.2523 | Times - RW: 1.51s, Bellman: 0.03s, Train: 0.13s


Training DQN:  54%|█████▍    | 541/1000 [15:12<12:45,  1.67s/it]

Epoch 540 | Loss: 0.3340 | Hinge: 0.0837 | Anchor: 0.2392 | Times - RW: 1.51s, Bellman: 0.02s, Train: 0.14s


Training DQN:  55%|█████▍    | 546/1000 [15:20<12:38,  1.67s/it]

Epoch 545 | Loss: 0.3143 | Hinge: 0.0649 | Anchor: 0.2431 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.11s


Training DQN:  55%|█████▌    | 551/1000 [15:28<12:25,  1.66s/it]

Epoch 550 | Loss: 0.3112 | Hinge: 0.0684 | Anchor: 0.2512 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.11s


Training DQN:  56%|█████▌    | 556/1000 [15:37<12:17,  1.66s/it]

Epoch 555 | Loss: 0.2930 | Hinge: 0.0793 | Anchor: 0.2578 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.12s


Training DQN:  56%|█████▌    | 561/1000 [15:45<12:07,  1.66s/it]

Epoch 560 | Loss: 0.3226 | Hinge: 0.0725 | Anchor: 0.2377 | Times - RW: 1.51s, Bellman: 0.02s, Train: 0.11s


Training DQN:  57%|█████▋    | 566/1000 [15:53<11:59,  1.66s/it]

Epoch 565 | Loss: 0.3913 | Hinge: 0.0805 | Anchor: 0.3133 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.11s


Training DQN:  57%|█████▋    | 571/1000 [16:01<11:49,  1.65s/it]

Epoch 570 | Loss: 0.4104 | Hinge: 0.0886 | Anchor: 0.3383 | Times - RW: 1.51s, Bellman: 0.02s, Train: 0.10s


Training DQN:  58%|█████▊    | 576/1000 [16:10<11:41,  1.65s/it]

Epoch 575 | Loss: 0.3593 | Hinge: 0.0670 | Anchor: 0.2863 | Times - RW: 1.51s, Bellman: 0.02s, Train: 0.11s


Training DQN:  58%|█████▊    | 581/1000 [16:18<11:34,  1.66s/it]

Epoch 580 | Loss: 0.2787 | Hinge: 0.0942 | Anchor: 0.1905 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.11s


Training DQN:  59%|█████▊    | 586/1000 [16:26<11:29,  1.67s/it]

Epoch 585 | Loss: 0.2614 | Hinge: 0.0791 | Anchor: 0.1842 | Times - RW: 1.51s, Bellman: 0.02s, Train: 0.12s


Training DQN:  59%|█████▉    | 591/1000 [16:35<11:27,  1.68s/it]

Epoch 590 | Loss: 0.2814 | Hinge: 0.0905 | Anchor: 0.1796 | Times - RW: 1.57s, Bellman: 0.02s, Train: 0.11s


Training DQN:  60%|█████▉    | 596/1000 [16:43<11:18,  1.68s/it]

Epoch 595 | Loss: 0.3039 | Hinge: 0.0733 | Anchor: 0.2105 | Times - RW: 1.53s, Bellman: 0.02s, Train: 0.10s


Training DQN:  60%|██████    | 601/1000 [16:51<11:05,  1.67s/it]

Epoch 600 | Loss: 0.2360 | Hinge: 0.0741 | Anchor: 0.1702 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.12s


Training DQN:  61%|██████    | 606/1000 [17:00<10:51,  1.65s/it]

Epoch 605 | Loss: 0.2779 | Hinge: 0.0719 | Anchor: 0.1920 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.10s


Training DQN:  61%|██████    | 611/1000 [17:08<10:46,  1.66s/it]

Epoch 610 | Loss: 0.2814 | Hinge: 0.0651 | Anchor: 0.2009 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.10s


Training DQN:  62%|██████▏   | 616/1000 [17:16<10:40,  1.67s/it]

Epoch 615 | Loss: 0.2582 | Hinge: 0.0815 | Anchor: 0.1697 | Times - RW: 1.51s, Bellman: 0.02s, Train: 0.11s


Training DQN:  62%|██████▏   | 621/1000 [17:25<10:27,  1.66s/it]

Epoch 620 | Loss: 0.2664 | Hinge: 0.0742 | Anchor: 0.1829 | Times - RW: 1.51s, Bellman: 0.02s, Train: 0.11s


Training DQN:  63%|██████▎   | 626/1000 [17:33<10:20,  1.66s/it]

Epoch 625 | Loss: 0.3138 | Hinge: 0.0654 | Anchor: 0.2436 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.12s


Training DQN:  63%|██████▎   | 631/1000 [17:41<10:11,  1.66s/it]

Epoch 630 | Loss: 0.2743 | Hinge: 0.0689 | Anchor: 0.2000 | Times - RW: 1.51s, Bellman: 0.02s, Train: 0.11s


Training DQN:  64%|██████▎   | 636/1000 [17:50<10:05,  1.66s/it]

Epoch 635 | Loss: 0.2714 | Hinge: 0.0857 | Anchor: 0.1811 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.11s


Training DQN:  64%|██████▍   | 641/1000 [17:58<09:56,  1.66s/it]

Epoch 640 | Loss: 0.2449 | Hinge: 0.0634 | Anchor: 0.1822 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.11s


Training DQN:  65%|██████▍   | 646/1000 [18:06<09:47,  1.66s/it]

Epoch 645 | Loss: 0.2489 | Hinge: 0.0812 | Anchor: 0.1700 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.11s


Training DQN:  65%|██████▌   | 651/1000 [18:14<09:39,  1.66s/it]

Epoch 650 | Loss: 0.2564 | Hinge: 0.0832 | Anchor: 0.1848 | Times - RW: 1.51s, Bellman: 0.02s, Train: 0.11s


Training DQN:  66%|██████▌   | 656/1000 [18:23<09:32,  1.66s/it]

Epoch 655 | Loss: 0.2759 | Hinge: 0.0756 | Anchor: 0.2051 | Times - RW: 1.54s, Bellman: 0.02s, Train: 0.11s


Training DQN:  66%|██████▌   | 661/1000 [18:31<09:22,  1.66s/it]

Epoch 660 | Loss: 0.2700 | Hinge: 0.0772 | Anchor: 0.2065 | Times - RW: 1.51s, Bellman: 0.02s, Train: 0.12s


Training DQN:  67%|██████▋   | 666/1000 [18:39<09:15,  1.66s/it]

Epoch 665 | Loss: 0.2744 | Hinge: 0.0773 | Anchor: 0.1701 | Times - RW: 1.51s, Bellman: 0.02s, Train: 0.13s


Training DQN:  67%|██████▋   | 671/1000 [18:48<09:11,  1.68s/it]

Epoch 670 | Loss: 0.3406 | Hinge: 0.0746 | Anchor: 0.2592 | Times - RW: 1.56s, Bellman: 0.02s, Train: 0.10s


Training DQN:  68%|██████▊   | 676/1000 [18:56<09:21,  1.73s/it]

Epoch 675 | Loss: 0.4122 | Hinge: 0.0759 | Anchor: 0.3609 | Times - RW: 1.59s, Bellman: 0.03s, Train: 0.11s


Training DQN:  68%|██████▊   | 681/1000 [19:05<09:17,  1.75s/it]

Epoch 680 | Loss: 0.2496 | Hinge: 0.0679 | Anchor: 0.1466 | Times - RW: 1.60s, Bellman: 0.02s, Train: 0.12s


Training DQN:  69%|██████▊   | 686/1000 [19:14<08:52,  1.70s/it]

Epoch 685 | Loss: 0.2821 | Hinge: 0.0747 | Anchor: 0.2343 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.11s


Training DQN:  69%|██████▉   | 691/1000 [19:22<08:35,  1.67s/it]

Epoch 690 | Loss: 0.2434 | Hinge: 0.0799 | Anchor: 0.1620 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.13s


Training DQN:  70%|██████▉   | 696/1000 [19:30<08:23,  1.66s/it]

Epoch 695 | Loss: 0.2371 | Hinge: 0.0665 | Anchor: 0.1611 | Times - RW: 1.51s, Bellman: 0.02s, Train: 0.11s


Training DQN:  70%|███████   | 701/1000 [19:38<08:16,  1.66s/it]

Epoch 700 | Loss: 0.2933 | Hinge: 0.0715 | Anchor: 0.2267 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.11s


Training DQN:  71%|███████   | 706/1000 [19:47<08:08,  1.66s/it]

Epoch 705 | Loss: 0.3642 | Hinge: 0.0691 | Anchor: 0.2852 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.10s


Training DQN:  71%|███████   | 711/1000 [19:55<07:59,  1.66s/it]

Epoch 710 | Loss: 0.3001 | Hinge: 0.0809 | Anchor: 0.2222 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.10s


Training DQN:  72%|███████▏  | 716/1000 [20:03<07:50,  1.66s/it]

Epoch 715 | Loss: 0.2910 | Hinge: 0.0799 | Anchor: 0.2271 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.10s


Training DQN:  72%|███████▏  | 721/1000 [20:12<07:42,  1.66s/it]

Epoch 720 | Loss: 0.2441 | Hinge: 0.0655 | Anchor: 0.1827 | Times - RW: 1.51s, Bellman: 0.02s, Train: 0.11s


Training DQN:  73%|███████▎  | 726/1000 [20:20<07:34,  1.66s/it]

Epoch 725 | Loss: 0.2224 | Hinge: 0.0773 | Anchor: 0.1559 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.10s


Training DQN:  73%|███████▎  | 731/1000 [20:28<07:25,  1.66s/it]

Epoch 730 | Loss: 0.2468 | Hinge: 0.0608 | Anchor: 0.1817 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.11s


Training DQN:  74%|███████▎  | 736/1000 [20:37<07:18,  1.66s/it]

Epoch 735 | Loss: 0.2390 | Hinge: 0.0712 | Anchor: 0.1479 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.12s


Training DQN:  74%|███████▍  | 741/1000 [20:45<07:10,  1.66s/it]

Epoch 740 | Loss: 0.2162 | Hinge: 0.0807 | Anchor: 0.1387 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.12s


Training DQN:  75%|███████▍  | 746/1000 [20:53<07:02,  1.66s/it]

Epoch 745 | Loss: 0.2297 | Hinge: 0.0652 | Anchor: 0.1697 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.11s


Training DQN:  75%|███████▌  | 751/1000 [21:02<06:53,  1.66s/it]

Epoch 750 | Loss: 0.2569 | Hinge: 0.0604 | Anchor: 0.1923 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.12s


Training DQN:  76%|███████▌  | 756/1000 [21:10<06:47,  1.67s/it]

Epoch 755 | Loss: 0.2440 | Hinge: 0.0664 | Anchor: 0.1802 | Times - RW: 1.54s, Bellman: 0.02s, Train: 0.12s


Training DQN:  76%|███████▌  | 761/1000 [21:19<06:52,  1.72s/it]

Epoch 760 | Loss: 0.2342 | Hinge: 0.0590 | Anchor: 0.1693 | Times - RW: 1.59s, Bellman: 0.03s, Train: 0.11s


Training DQN:  77%|███████▋  | 766/1000 [21:27<06:43,  1.73s/it]

Epoch 765 | Loss: 0.2464 | Hinge: 0.0580 | Anchor: 0.1720 | Times - RW: 1.55s, Bellman: 0.02s, Train: 0.13s


Training DQN:  77%|███████▋  | 771/1000 [21:36<06:35,  1.73s/it]

Epoch 770 | Loss: 0.2488 | Hinge: 0.0782 | Anchor: 0.1912 | Times - RW: 1.56s, Bellman: 0.02s, Train: 0.13s


Training DQN:  78%|███████▊  | 776/1000 [21:45<06:29,  1.74s/it]

Epoch 775 | Loss: 0.2064 | Hinge: 0.0550 | Anchor: 0.1305 | Times - RW: 1.55s, Bellman: 0.02s, Train: 0.15s


Training DQN:  78%|███████▊  | 781/1000 [21:53<06:27,  1.77s/it]

Epoch 780 | Loss: 0.2142 | Hinge: 0.0631 | Anchor: 0.1451 | Times - RW: 1.61s, Bellman: 0.02s, Train: 0.13s


Training DQN:  79%|███████▊  | 786/1000 [22:02<06:15,  1.75s/it]

Epoch 785 | Loss: 0.1991 | Hinge: 0.0546 | Anchor: 0.1309 | Times - RW: 1.59s, Bellman: 0.02s, Train: 0.13s


Training DQN:  79%|███████▉  | 791/1000 [22:11<06:06,  1.75s/it]

Epoch 790 | Loss: 0.2599 | Hinge: 0.0684 | Anchor: 0.1730 | Times - RW: 1.60s, Bellman: 0.02s, Train: 0.13s


Training DQN:  80%|███████▉  | 796/1000 [22:20<05:52,  1.73s/it]

Epoch 795 | Loss: 0.2374 | Hinge: 0.0691 | Anchor: 0.2015 | Times - RW: 1.56s, Bellman: 0.02s, Train: 0.12s


Training DQN:  80%|████████  | 801/1000 [22:28<05:38,  1.70s/it]

Epoch 800 | Loss: 0.2854 | Hinge: 0.0644 | Anchor: 0.2052 | Times - RW: 1.53s, Bellman: 0.02s, Train: 0.14s


Training DQN:  81%|████████  | 806/1000 [22:37<05:30,  1.70s/it]

Epoch 805 | Loss: 0.3243 | Hinge: 0.0640 | Anchor: 0.2488 | Times - RW: 1.56s, Bellman: 0.02s, Train: 0.12s


Training DQN:  81%|████████  | 811/1000 [22:45<05:20,  1.70s/it]

Epoch 810 | Loss: 0.3318 | Hinge: 0.0730 | Anchor: 0.2879 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.12s


Training DQN:  82%|████████▏ | 816/1000 [22:54<05:12,  1.70s/it]

Epoch 815 | Loss: 0.2856 | Hinge: 0.0660 | Anchor: 0.2183 | Times - RW: 1.55s, Bellman: 0.02s, Train: 0.12s


Training DQN:  82%|████████▏ | 821/1000 [23:02<05:00,  1.68s/it]

Epoch 820 | Loss: 0.4079 | Hinge: 0.0637 | Anchor: 0.3072 | Times - RW: 1.53s, Bellman: 0.02s, Train: 0.12s


Training DQN:  83%|████████▎ | 826/1000 [23:10<04:50,  1.67s/it]

Epoch 825 | Loss: 0.4133 | Hinge: 0.0749 | Anchor: 0.3543 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.10s


Training DQN:  83%|████████▎ | 831/1000 [23:19<04:43,  1.68s/it]

Epoch 830 | Loss: 0.3183 | Hinge: 0.0649 | Anchor: 0.2344 | Times - RW: 1.52s, Bellman: 0.02s, Train: 0.11s


Training DQN:  84%|████████▎ | 836/1000 [23:27<04:34,  1.67s/it]

Epoch 835 | Loss: 0.2980 | Hinge: 0.0697 | Anchor: 0.2351 | Times - RW: 1.54s, Bellman: 0.02s, Train: 0.10s


Training DQN:  84%|████████▍ | 841/1000 [23:37<04:49,  1.82s/it]

Epoch 840 | Loss: 0.2484 | Hinge: 0.0607 | Anchor: 0.1921 | Times - RW: 1.61s, Bellman: 0.02s, Train: 0.11s


Training DQN:  85%|████████▍ | 846/1000 [23:45<04:26,  1.73s/it]

Epoch 845 | Loss: 0.2025 | Hinge: 0.0579 | Anchor: 0.1479 | Times - RW: 1.56s, Bellman: 0.02s, Train: 0.12s


Training DQN:  85%|████████▌ | 851/1000 [23:54<04:19,  1.74s/it]

Epoch 850 | Loss: 0.1894 | Hinge: 0.0703 | Anchor: 0.1169 | Times - RW: 1.62s, Bellman: 0.02s, Train: 0.12s


Training DQN:  86%|████████▌ | 856/1000 [24:02<04:03,  1.69s/it]

Epoch 855 | Loss: 0.1915 | Hinge: 0.0572 | Anchor: 0.1141 | Times - RW: 1.53s, Bellman: 0.02s, Train: 0.11s


Training DQN:  86%|████████▌ | 861/1000 [24:11<03:57,  1.71s/it]

Epoch 860 | Loss: 0.2358 | Hinge: 0.0601 | Anchor: 0.1904 | Times - RW: 1.57s, Bellman: 0.02s, Train: 0.13s


Training DQN:  87%|████████▋ | 866/1000 [24:19<03:49,  1.71s/it]

Epoch 865 | Loss: 0.2229 | Hinge: 0.0786 | Anchor: 0.1795 | Times - RW: 1.56s, Bellman: 0.02s, Train: 0.12s


Training DQN:  87%|████████▋ | 871/1000 [24:28<03:41,  1.72s/it]

Epoch 870 | Loss: 0.2232 | Hinge: 0.0567 | Anchor: 0.1686 | Times - RW: 1.54s, Bellman: 0.02s, Train: 0.13s


Training DQN:  88%|████████▊ | 876/1000 [24:36<03:30,  1.70s/it]

Epoch 875 | Loss: 0.2217 | Hinge: 0.0669 | Anchor: 0.1577 | Times - RW: 1.53s, Bellman: 0.02s, Train: 0.13s


Training DQN:  88%|████████▊ | 881/1000 [24:45<03:20,  1.69s/it]

Epoch 880 | Loss: 0.1909 | Hinge: 0.0617 | Anchor: 0.1303 | Times - RW: 1.53s, Bellman: 0.02s, Train: 0.12s


Training DQN:  89%|████████▊ | 886/1000 [24:53<03:11,  1.68s/it]

Epoch 885 | Loss: 0.2482 | Hinge: 0.0684 | Anchor: 0.1679 | Times - RW: 1.56s, Bellman: 0.02s, Train: 0.10s


Training DQN:  89%|████████▉ | 891/1000 [25:02<03:02,  1.68s/it]

Epoch 890 | Loss: 0.2726 | Hinge: 0.0613 | Anchor: 0.2172 | Times - RW: 1.53s, Bellman: 0.02s, Train: 0.11s


Training DQN:  90%|████████▉ | 896/1000 [25:10<02:55,  1.68s/it]

Epoch 895 | Loss: 0.1989 | Hinge: 0.0622 | Anchor: 0.1601 | Times - RW: 1.53s, Bellman: 0.02s, Train: 0.11s


Training DQN:  90%|█████████ | 901/1000 [25:18<02:46,  1.68s/it]

Epoch 900 | Loss: 0.2031 | Hinge: 0.0580 | Anchor: 0.1361 | Times - RW: 1.53s, Bellman: 0.02s, Train: 0.13s


Training DQN:  91%|█████████ | 906/1000 [25:27<02:37,  1.68s/it]

Epoch 905 | Loss: 0.1908 | Hinge: 0.0631 | Anchor: 0.1181 | Times - RW: 1.53s, Bellman: 0.02s, Train: 0.10s


Training DQN:  91%|█████████ | 911/1000 [25:35<02:29,  1.68s/it]

Epoch 910 | Loss: 0.2355 | Hinge: 0.0673 | Anchor: 0.1699 | Times - RW: 1.53s, Bellman: 0.02s, Train: 0.13s


Training DQN:  92%|█████████▏| 916/1000 [25:44<02:23,  1.71s/it]

Epoch 915 | Loss: 0.2709 | Hinge: 0.0662 | Anchor: 0.2124 | Times - RW: 1.55s, Bellman: 0.02s, Train: 0.14s


Training DQN:  92%|█████████▏| 921/1000 [25:52<02:13,  1.69s/it]

Epoch 920 | Loss: 0.3104 | Hinge: 0.0553 | Anchor: 0.2348 | Times - RW: 1.54s, Bellman: 0.02s, Train: 0.11s


Training DQN:  93%|█████████▎| 926/1000 [26:01<02:04,  1.68s/it]

Epoch 925 | Loss: 0.2126 | Hinge: 0.0615 | Anchor: 0.1545 | Times - RW: 1.53s, Bellman: 0.02s, Train: 0.11s


Training DQN:  93%|█████████▎| 931/1000 [26:09<01:55,  1.67s/it]

Epoch 930 | Loss: 0.2343 | Hinge: 0.0609 | Anchor: 0.1854 | Times - RW: 1.53s, Bellman: 0.02s, Train: 0.10s


Training DQN:  94%|█████████▎| 936/1000 [26:17<01:47,  1.68s/it]

Epoch 935 | Loss: 0.2242 | Hinge: 0.0524 | Anchor: 0.1651 | Times - RW: 1.53s, Bellman: 0.02s, Train: 0.13s


Training DQN:  94%|█████████▍| 941/1000 [26:26<01:40,  1.70s/it]

Epoch 940 | Loss: 0.3089 | Hinge: 0.0645 | Anchor: 0.2263 | Times - RW: 1.55s, Bellman: 0.02s, Train: 0.12s


Training DQN:  95%|█████████▍| 946/1000 [26:34<01:31,  1.69s/it]

Epoch 945 | Loss: 0.2712 | Hinge: 0.0632 | Anchor: 0.1808 | Times - RW: 1.54s, Bellman: 0.02s, Train: 0.11s


Training DQN:  95%|█████████▌| 951/1000 [26:43<01:22,  1.69s/it]

Epoch 950 | Loss: 0.2333 | Hinge: 0.0666 | Anchor: 0.1581 | Times - RW: 1.53s, Bellman: 0.02s, Train: 0.12s


Training DQN:  96%|█████████▌| 956/1000 [26:51<01:14,  1.68s/it]

Epoch 955 | Loss: 0.2071 | Hinge: 0.0608 | Anchor: 0.1489 | Times - RW: 1.54s, Bellman: 0.02s, Train: 0.11s


Training DQN:  96%|█████████▌| 961/1000 [27:00<01:05,  1.68s/it]

Epoch 960 | Loss: 0.2608 | Hinge: 0.0542 | Anchor: 0.2097 | Times - RW: 1.53s, Bellman: 0.02s, Train: 0.12s


Training DQN:  97%|█████████▋| 966/1000 [27:08<00:57,  1.69s/it]

Epoch 965 | Loss: 0.2357 | Hinge: 0.0670 | Anchor: 0.1753 | Times - RW: 1.53s, Bellman: 0.02s, Train: 0.12s


Training DQN:  97%|█████████▋| 971/1000 [27:16<00:48,  1.68s/it]

Epoch 970 | Loss: 0.2219 | Hinge: 0.0639 | Anchor: 0.1557 | Times - RW: 1.53s, Bellman: 0.02s, Train: 0.11s


Training DQN:  98%|█████████▊| 976/1000 [27:25<00:40,  1.69s/it]

Epoch 975 | Loss: 0.2966 | Hinge: 0.0573 | Anchor: 0.2289 | Times - RW: 1.55s, Bellman: 0.02s, Train: 0.12s


Training DQN:  98%|█████████▊| 981/1000 [27:33<00:32,  1.69s/it]

Epoch 980 | Loss: 0.3090 | Hinge: 0.0683 | Anchor: 0.2244 | Times - RW: 1.54s, Bellman: 0.02s, Train: 0.10s


Training DQN:  99%|█████████▊| 986/1000 [27:42<00:23,  1.69s/it]

Epoch 985 | Loss: 0.3416 | Hinge: 0.0576 | Anchor: 0.2956 | Times - RW: 1.53s, Bellman: 0.02s, Train: 0.12s


Training DQN:  99%|█████████▉| 991/1000 [27:50<00:15,  1.68s/it]

Epoch 990 | Loss: 0.2258 | Hinge: 0.0463 | Anchor: 0.1670 | Times - RW: 1.53s, Bellman: 0.02s, Train: 0.12s


Training DQN: 100%|█████████▉| 996/1000 [27:59<00:06,  1.68s/it]

Epoch 995 | Loss: 0.1771 | Hinge: 0.0725 | Anchor: 0.1032 | Times - RW: 1.53s, Bellman: 0.02s, Train: 0.10s


Training DQN: 100%|██████████| 1000/1000 [28:06<00:00,  1.69s/it]


Training finished in 1686.0s
i_position_X_in_list_generators: 2
Step: 1 Beam (not cumulative) min: 66.04 median: 66.18 max: 66.39
Time: 0.0 t_moves  0.000, t_hash  0.000, t_isin 0.000, t_unique_els  0.001, t_full_step 0.013
Step: 11 Beam (not cumulative) min: 62.08 median: 63.06 max: 64.05
Time: 0.0 t_moves  0.000, t_hash  0.000, t_isin 0.001, t_unique_els  0.001, t_full_step 1747252702.381
Step: 21 Beam (not cumulative) min: 62.34 median: 63.08 max: 63.81
Step: 31 Beam (not cumulative) min: 62.30 median: 62.47 max: 62.63
Time: 0.0 t_moves  0.000, t_hash  0.000, t_isin 0.000, t_unique_els  0.001, t_full_step 0.002
Step: 41 Beam (not cumulative) min: 63.96 median: 64.13 max: 64.30
Time: 0.0 t_moves  0.000, t_hash  0.000, t_isin 0.000, t_unique_els  0.000, t_full_step 1747252702.443
Step: 51 Beam (not cumulative) min: 62.70 median: 63.17 max: 63.65
Step: 61 Beam (not cumulative) min: 62.08 median: 63.40 max: 64.71
Time: 0.0 t_moves  0.000, t_hash  0.000, t_isin 0.000, t_unique_els  0.001

In [7]:
solver.config['n_steps_limit'] = 10000

In [9]:
solver.config['beam_width'] = 16
i_step, flag_found_destination = solver.test_beam_search()

i_position_X_in_list_generators: 2
Step: 1 Beam (not cumulative) min: 0.00 median: 0.00 max: 0.00
Time: 0.0 t_moves  0.000, t_hash  0.000, t_isin 0.008, t_unique_els  0.005, t_full_step 0.018
Step: 11 Beam (not cumulative) min: 62.25 median: 62.95 max: 65.29
Time: 0.0 t_moves  0.000, t_hash  0.000, t_isin 0.001, t_unique_els  0.000, t_full_step 1747252720.598
Step: 21 Beam (not cumulative) min: 59.34 median: 61.56 max: 63.62
Step: 31 Beam (not cumulative) min: 59.77 median: 61.13 max: 63.73
Time: 0.0 t_moves  0.000, t_hash  0.000, t_isin 0.001, t_unique_els  0.001, t_full_step 0.003
Step: 41 Beam (not cumulative) min: 59.11 median: 61.38 max: 63.70
Time: 0.0 t_moves  0.000, t_hash  0.000, t_isin 0.001, t_unique_els  0.001, t_full_step 1747252720.723
Step: 51 Beam (not cumulative) min: 53.39 median: 55.93 max: 58.37
Step: 61 Beam (not cumulative) min: 45.69 median: 48.06 max: 51.00
Time: 0.0 t_moves  0.000, t_hash  0.000, t_isin 0.001, t_unique_els  0.001, t_full_step 0.004
Step: 71 Bea

In [None]:
# Initialize solver
solver = PermutationSolver(config)

# Train MLP first
print("\nTraining MLP model...")
mlp_losses = solver.train_mlp()

# Train DQN using pre-trained MLP weights
print("\nTraining DQN model with pre-trained MLP weights...")
dqn_losses = solver.train_dqn()

# Test models with beam search
print("\nTesting models with beam search...")
print("\nTesting DQN model...")
dqn_result = solver.test_beam_search()

In [9]:
from collections import deque, defaultdict
import torch
import numpy as np

def bfs_build_dataset(state_destination, list_generators, device, num_of_samples=5_000_000):
    start_state = tuple(state_destination.tolist())
    queue = deque([start_state])
    visited = {start_state: 0}
    
    while queue and len(visited) < num_of_samples:
        current_state = queue.popleft()
        current_depth = visited[current_state]
        
        for gen in list_generators:
            next_state_list = [current_state[gen[i]] for i in range(len(gen))]
            next_state = tuple(next_state_list)
            
            if next_state not in visited:
                visited[next_state] = current_depth + 1
                queue.append(next_state)
    
    all_states = list(visited.keys())
    print(len(all_states))
    depths = [visited[s] for s in all_states]
    
    X = torch.tensor(all_states, dtype=torch.long)
    y = torch.tensor(depths, dtype=torch.long)

    print('X.shape:',X.shape)
    print('y.shape:',y.shape)
    
    return X.to(device), y.to(device)


def get_LRX_moves(n):
    L = np.array( list(np.arange(1,n)) + [0])
    R = np.array( [n-1] + list(np.arange(n-1)) )
    X = np.array( [1,0] + list(np.arange(2,n)) )
    return L,R,X

device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 
dtype_generators = torch.int64 
n = 9

L,R,X = get_LRX_moves(n)
list_generators = [L,R,X]
state_destination = torch.arange( len(list_generators[0]) , device = device, dtype =  dtype_generators  )

num_of_samples = 1_000_000
X_train, y_train = bfs_build_dataset(state_destination, list_generators, device, num_of_samples=num_of_samples)

362880
X.shape: torch.Size([362880, 9])
y.shape: torch.Size([362880])
