In [1]:
import os
import sys

# Get the absolute path of the notebook's directory
notebook_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(notebook_dir)

In [2]:
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from src.app import PermutationSolver


In [3]:
n = 16
config = {
            'n_permutations_length': n,
            
            # Random walks params
            'random_walks_type': 'non-backtracking-beam',
            'n_random_walk_length': int(n * (n-1) / 2),
            'n_random_walks_to_generate': 1000,
            'n_random_walks_steps_back_to_ban': 8,
            
            # Neural Net params
            'model_type': 'MLP',
            'list_layers_sizes': [2**12],
            'n_epochs': 100,
            'batch_size': 1024,
            'lr_supervised': 0.001,
            
            # DQN training
            'n_epochs_dqn': 16000,
            'flag_dqn_round': False,
            'n_random_walks_to_generate_dqn': 1000,
            'verbose_loc': 50,
            'lr_rl': 0.001,
            
            # Beam search
            'beam_search_torch': True,
            'beam_search_Fironov': False,
            'beam_width': 1,
            'n_steps_limit': 4 * n**2,
            'alpha_previous_cost_accumulation': 0,
            'beam_search_models_or_heuristics': 'model_torch',
            'ban_p0_p1_transposition_if_p0_lt_p1_ie_already_sorted': False,
            'n_beam_search_steps_back_to_ban': 32,
            
            # What to solve
            'solve_random_or_longest_state': 'solve_LRX_longest',
            'verbose': 100
        }

In [4]:
n_perm_list = [n]
n_epoch_list = [100]
n_epoch_dqn_list = [16000]
batch_size_list = [2**10]
lr_supervised_list = [0.001]
lr_rl_list = [0.001]
beam_width_list = [2**0]
list_layers_sizes = [[2**12]]

config['mode'] = 'single_hard_hinge'
config['w_anchor'] = 0.0
config['w_hinge'] = 1.0

summary_df = pd.DataFrame()

for n_permutations_length in n_perm_list:
    anchor_mode = 0
    solver = PermutationSolver(config)
    solver.config['n_permutations_length'] = n_permutations_length
    for list_layers_size in list_layers_sizes:
        solver.config['list_layers_sizes'] = list_layers_size
        for lr in lr_supervised_list:
            solver.config['lr_supervised'] = lr
            for lr_rl in lr_rl_list:
                solver.config['lr_rl'] = lr_rl
                for batch_size in batch_size_list:
                    solver.config['batch_size'] = batch_size
                    for n_epoch in n_epoch_list:
                        solver.config['n_epochs'] = n_epoch
                        mlp_losses = solver.train_mlp()
                        # save mlp_model
                        torch.save(solver.mlp_model.state_dict(), f'models/mlp_model_{n_permutations_length}_{n_epoch}_{list_layers_sizes[0]}_{lr}_{batch_size}.pth')
                        for n_epoch_dqn in n_epoch_dqn_list:
                            solver.config['n_epochs_dqn'] = n_epoch_dqn
                            dqn_losses = solver.train_dqn()
                            # save dqn_model
                            torch.save(solver.dqn_model.state_dict(), f'models/dqn_model_{n_permutations_length}_{n_epoch_dqn}_{list_layers_sizes[0]}_{lr}_{batch_size}.pth')
                            for beam_width in beam_width_list:
                                solver.config['beam_width'] = beam_width
                                i_step, flag_found_destination, path = solver.test_beam_search()

                                # calculate min and max of difference between solver.y_anchor and y_valid
                                y_valid = solver.dqn_model(solver.X_anchor)
                                diff = solver.y_anchor.cpu().detach().numpy() - y_valid.cpu().detach().numpy().reshape(1, -1)[0]
                                min_diff = diff.min()
                                max_diff = diff.max()
                                std_diff = diff.std()
                                mean_diff = diff.mean()
                                num_elements_less_than_minus_05 = (diff < -0.5).sum()
                                num_elements_larger_than_05 = (diff > 0.5).sum()
                                wrong_predictions = num_elements_less_than_minus_05 + num_elements_larger_than_05
                                percentage_wrong_predictions = wrong_predictions / len(diff)

                                # save summary_df
                                summary_df_cur = pd.DataFrame({'n_permutations_length': n_permutations_length, 
                                                               'anchor':anchor_mode, 'list_layers_sizes': 
                                                               list_layers_sizes, 'lr_supervised': 
                                                               lr, 'lr_rl': lr_rl, 
                                                               'n_epoch': n_epoch, 
                                                               'n_epoch_dqn': n_epoch_dqn, 
                                                               'beam_width': beam_width, 
                                                               'i_step': i_step, 
                                                               'flag_found_destination': flag_found_destination, 
                                                               'mlp_losses': mlp_losses[-1], 
                                                               'dqn_losses': dqn_losses[-1],
                                                               'min_diff': min_diff,
                                                               'max_diff': max_diff,
                                                               'mean_diff': mean_diff,
                                                               'std_diff': std_diff,
                                                               'num_elements_less_than_minus_1': num_elements_less_than_minus_05,
                                                               'num_elements_larger_than_1': num_elements_larger_than_05,
                                                               'percentage_wrong_predictions': percentage_wrong_predictions
                                                               })
                                summary_df = pd.concat([summary_df, summary_df_cur])
                                summary_df.to_csv('models/summary_df.csv', index=False)


Training MLP:  10%|█         | 10/100 [00:04<00:36,  2.50it/s]

Epoch 10, Loss: 292.1363


Training MLP:  20%|██        | 20/100 [00:07<00:28,  2.83it/s]

Epoch 20, Loss: 264.9992


Training MLP:  30%|███       | 30/100 [00:11<00:26,  2.68it/s]

Epoch 30, Loss: 217.1881


Training MLP:  40%|████      | 40/100 [00:14<00:21,  2.73it/s]

Epoch 40, Loss: 231.0281


Training MLP:  50%|█████     | 50/100 [00:18<00:17,  2.87it/s]

Epoch 50, Loss: 224.8966


Training MLP:  60%|██████    | 60/100 [00:22<00:14,  2.67it/s]

Epoch 60, Loss: 251.5345


Training MLP:  70%|███████   | 70/100 [00:25<00:10,  2.75it/s]

Epoch 70, Loss: 240.5316


Training MLP:  80%|████████  | 80/100 [00:29<00:07,  2.75it/s]

Epoch 80, Loss: 262.7074


Training MLP:  90%|█████████ | 90/100 [00:33<00:03,  2.70it/s]

Epoch 90, Loss: 211.0787


Training MLP: 100%|██████████| 100/100 [00:37<00:00,  2.70it/s]

Epoch 100, Loss: 206.7275





1000001
X.shape: torch.Size([1000001, 16])
y.shape: torch.Size([1000001])
Starting DQN training for 16000 epochs...


Training DQN:   0%|          | 1/16000 [00:00<2:59:55,  1.48it/s]

Epoch   0 | Loss: 52.4258 | Hinge: 35.8346 | Anchor: 0.0000 | Times - RW: 0.28s, Bellman: 0.18s, Train: 0.21s


Training DQN:   0%|          | 51/16000 [00:26<2:17:10,  1.94it/s]

Epoch  50 | Loss: 0.6797 | Hinge: 0.5258 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.23s


Training DQN:   1%|          | 101/16000 [00:52<2:16:25,  1.94it/s]

Epoch 100 | Loss: 0.2138 | Hinge: 0.2001 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.22s


Training DQN:   1%|          | 151/16000 [01:18<2:15:04,  1.96it/s]

Epoch 150 | Loss: 0.1956 | Hinge: 0.1579 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.22s


Training DQN:   1%|▏         | 201/16000 [01:43<2:14:33,  1.96it/s]

Epoch 200 | Loss: 0.1470 | Hinge: 0.1296 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.22s


Training DQN:   2%|▏         | 251/16000 [02:09<2:13:54,  1.96it/s]

Epoch 250 | Loss: 0.1202 | Hinge: 0.0970 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.18s


Training DQN:   2%|▏         | 301/16000 [02:34<2:04:55,  2.09it/s]

Epoch 300 | Loss: 0.1021 | Hinge: 0.0993 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.18s


Training DQN:   2%|▏         | 351/16000 [02:58<2:04:37,  2.09it/s]

Epoch 350 | Loss: 0.0949 | Hinge: 0.0837 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.18s


Training DQN:   3%|▎         | 401/16000 [03:23<2:06:45,  2.05it/s]

Epoch 400 | Loss: 0.0880 | Hinge: 0.0761 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.18s


Training DQN:   3%|▎         | 451/16000 [03:48<2:09:04,  2.01it/s]

Epoch 450 | Loss: 0.0883 | Hinge: 0.0679 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.05s, Train: 0.21s


Training DQN:   3%|▎         | 501/16000 [04:12<2:08:29,  2.01it/s]

Epoch 500 | Loss: 0.0826 | Hinge: 0.0872 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.18s


Training DQN:   3%|▎         | 551/16000 [04:36<2:01:48,  2.11it/s]

Epoch 550 | Loss: 0.0768 | Hinge: 0.0727 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.17s


Training DQN:   4%|▍         | 601/16000 [05:01<2:03:02,  2.09it/s]

Epoch 600 | Loss: 0.0795 | Hinge: 0.0561 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.18s


Training DQN:   4%|▍         | 651/16000 [05:26<2:12:34,  1.93it/s]

Epoch 650 | Loss: 0.0716 | Hinge: 0.0685 | Anchor: 0.0000 | Times - RW: 0.28s, Bellman: 0.04s, Train: 0.18s


Training DQN:   4%|▍         | 701/16000 [05:51<2:07:09,  2.01it/s]

Epoch 700 | Loss: 0.0727 | Hinge: 0.0490 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.19s


Training DQN:   5%|▍         | 751/16000 [06:15<2:02:41,  2.07it/s]

Epoch 750 | Loss: 0.0708 | Hinge: 0.0703 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.18s


Training DQN:   5%|▌         | 801/16000 [06:40<2:00:15,  2.11it/s]

Epoch 800 | Loss: 0.0692 | Hinge: 0.0577 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.17s


Training DQN:   5%|▌         | 851/16000 [07:05<2:09:16,  1.95it/s]

Epoch 850 | Loss: 0.0745 | Hinge: 0.0450 | Anchor: 0.0000 | Times - RW: 0.29s, Bellman: 0.04s, Train: 0.17s


Training DQN:   6%|▌         | 901/16000 [07:29<2:21:53,  1.77it/s]

Epoch 900 | Loss: 0.0668 | Hinge: 0.0585 | Anchor: 0.0000 | Times - RW: 0.27s, Bellman: 0.04s, Train: 0.21s


Training DQN:   6%|▌         | 951/16000 [07:52<1:52:42,  2.23it/s]

Epoch 950 | Loss: 0.0632 | Hinge: 0.0641 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.16s


Training DQN:   6%|▋         | 1001/16000 [08:15<1:59:10,  2.10it/s]

Epoch 1000 | Loss: 0.0654 | Hinge: 0.0562 | Anchor: 0.0000 | Times - RW: 0.19s, Bellman: 0.04s, Train: 0.16s


Training DQN:   7%|▋         | 1051/16000 [08:38<1:52:09,  2.22it/s]

Epoch 1050 | Loss: 0.0658 | Hinge: 0.0570 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.16s


Training DQN:   7%|▋         | 1101/16000 [09:00<1:51:53,  2.22it/s]

Epoch 1100 | Loss: 0.0611 | Hinge: 0.0543 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.16s


Training DQN:   7%|▋         | 1151/16000 [09:23<1:53:05,  2.19it/s]

Epoch 1150 | Loss: 0.0623 | Hinge: 0.0619 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.16s


Training DQN:   8%|▊         | 1201/16000 [09:45<1:51:23,  2.21it/s]

Epoch 1200 | Loss: 0.0609 | Hinge: 0.0674 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.16s


Training DQN:   8%|▊         | 1251/16000 [10:08<1:50:27,  2.23it/s]

Epoch 1250 | Loss: 0.0615 | Hinge: 0.0506 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.16s


Training DQN:   8%|▊         | 1301/16000 [10:31<1:50:40,  2.21it/s]

Epoch 1300 | Loss: 0.0584 | Hinge: 0.0564 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.16s


Training DQN:   8%|▊         | 1351/16000 [10:53<1:50:30,  2.21it/s]

Epoch 1350 | Loss: 0.0566 | Hinge: 0.0529 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.16s


Training DQN:   9%|▉         | 1401/16000 [11:18<1:59:42,  2.03it/s]

Epoch 1400 | Loss: 0.0588 | Hinge: 0.0693 | Anchor: 0.0000 | Times - RW: 0.21s, Bellman: 0.04s, Train: 0.16s


Training DQN:   9%|▉         | 1451/16000 [11:42<1:53:08,  2.14it/s]

Epoch 1450 | Loss: 0.0553 | Hinge: 0.0515 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.16s


Training DQN:   9%|▉         | 1501/16000 [12:07<1:56:23,  2.08it/s]

Epoch 1500 | Loss: 0.0602 | Hinge: 0.0540 | Anchor: 0.0000 | Times - RW: 0.19s, Bellman: 0.04s, Train: 0.17s


Training DQN:  10%|▉         | 1551/16000 [12:31<1:58:28,  2.03it/s]

Epoch 1550 | Loss: 0.0550 | Hinge: 0.0365 | Anchor: 0.0000 | Times - RW: 0.21s, Bellman: 0.04s, Train: 0.19s


Training DQN:  10%|█         | 1601/16000 [12:57<1:57:30,  2.04it/s]

Epoch 1600 | Loss: 0.0567 | Hinge: 0.0480 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.18s


Training DQN:  10%|█         | 1651/16000 [13:20<1:50:28,  2.16it/s]

Epoch 1650 | Loss: 0.0569 | Hinge: 0.0466 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.16s


Training DQN:  11%|█         | 1701/16000 [13:44<1:51:21,  2.14it/s]

Epoch 1700 | Loss: 0.0578 | Hinge: 0.0322 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.17s


Training DQN:  11%|█         | 1751/16000 [14:07<2:02:02,  1.95it/s]

Epoch 1750 | Loss: 0.0525 | Hinge: 0.0536 | Anchor: 0.0000 | Times - RW: 0.24s, Bellman: 0.04s, Train: 0.18s


Training DQN:  11%|█▏        | 1801/16000 [14:32<1:47:24,  2.20it/s]

Epoch 1800 | Loss: 0.0550 | Hinge: 0.0546 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.16s


Training DQN:  12%|█▏        | 1851/16000 [14:55<1:46:53,  2.21it/s]

Epoch 1850 | Loss: 0.0549 | Hinge: 0.0619 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.16s


Training DQN:  12%|█▏        | 1901/16000 [15:18<1:48:22,  2.17it/s]

Epoch 1900 | Loss: 0.0514 | Hinge: 0.0485 | Anchor: 0.0000 | Times - RW: 0.19s, Bellman: 0.04s, Train: 0.16s


Training DQN:  12%|█▏        | 1951/16000 [15:40<1:46:09,  2.21it/s]

Epoch 1950 | Loss: 0.0505 | Hinge: 0.0477 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.16s


Training DQN:  13%|█▎        | 2001/16000 [16:03<1:48:29,  2.15it/s]

Epoch 2000 | Loss: 0.0555 | Hinge: 0.0442 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.18s


Training DQN:  13%|█▎        | 2051/16000 [16:26<1:47:23,  2.16it/s]

Epoch 2050 | Loss: 0.0523 | Hinge: 0.0479 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.17s


Training DQN:  13%|█▎        | 2101/16000 [16:50<1:50:06,  2.10it/s]

Epoch 2100 | Loss: 0.0538 | Hinge: 0.0524 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.19s


Training DQN:  13%|█▎        | 2151/16000 [17:15<2:04:35,  1.85it/s]

Epoch 2150 | Loss: 0.0555 | Hinge: 0.0489 | Anchor: 0.0000 | Times - RW: 0.25s, Bellman: 0.04s, Train: 0.20s


Training DQN:  14%|█▍        | 2201/16000 [17:40<1:52:44,  2.04it/s]

Epoch 2200 | Loss: 0.0526 | Hinge: 0.0517 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.20s


Training DQN:  14%|█▍        | 2251/16000 [18:05<1:54:40,  2.00it/s]

Epoch 2250 | Loss: 0.0490 | Hinge: 0.0311 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.16s


Training DQN:  14%|█▍        | 2301/16000 [18:28<1:46:14,  2.15it/s]

Epoch 2300 | Loss: 0.0512 | Hinge: 0.0339 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.18s


Training DQN:  15%|█▍        | 2351/16000 [18:52<1:46:43,  2.13it/s]

Epoch 2350 | Loss: 0.0508 | Hinge: 0.0548 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.18s


Training DQN:  15%|█▌        | 2401/16000 [19:15<1:45:52,  2.14it/s]

Epoch 2400 | Loss: 0.0513 | Hinge: 0.0469 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.18s


Training DQN:  15%|█▌        | 2451/16000 [19:39<1:45:22,  2.14it/s]

Epoch 2450 | Loss: 0.0514 | Hinge: 0.0454 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.18s


Training DQN:  16%|█▌        | 2501/16000 [20:02<1:46:00,  2.12it/s]

Epoch 2500 | Loss: 0.0517 | Hinge: 0.0495 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.18s


Training DQN:  16%|█▌        | 2551/16000 [20:26<1:44:53,  2.14it/s]

Epoch 2550 | Loss: 0.0500 | Hinge: 0.0558 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.18s


Training DQN:  16%|█▋        | 2601/16000 [20:49<1:43:54,  2.15it/s]

Epoch 2600 | Loss: 0.0515 | Hinge: 0.0420 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.18s


Training DQN:  17%|█▋        | 2651/16000 [21:12<1:44:55,  2.12it/s]

Epoch 2650 | Loss: 0.0504 | Hinge: 0.0405 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.18s


Training DQN:  17%|█▋        | 2701/16000 [21:36<1:43:02,  2.15it/s]

Epoch 2700 | Loss: 0.0533 | Hinge: 0.0323 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.17s


Training DQN:  17%|█▋        | 2751/16000 [21:59<1:43:58,  2.12it/s]

Epoch 2750 | Loss: 0.0517 | Hinge: 0.0547 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.18s


Training DQN:  18%|█▊        | 2801/16000 [22:23<1:42:28,  2.15it/s]

Epoch 2800 | Loss: 0.0498 | Hinge: 0.0403 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.18s


Training DQN:  18%|█▊        | 2851/16000 [22:46<1:42:07,  2.15it/s]

Epoch 2850 | Loss: 0.0495 | Hinge: 0.0590 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.18s


Training DQN:  18%|█▊        | 2901/16000 [23:09<1:41:54,  2.14it/s]

Epoch 2900 | Loss: 0.0488 | Hinge: 0.0604 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.18s


Training DQN:  18%|█▊        | 2951/16000 [23:33<1:41:08,  2.15it/s]

Epoch 2950 | Loss: 0.0483 | Hinge: 0.0462 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.18s


Training DQN:  19%|█▉        | 3001/16000 [23:56<1:40:52,  2.15it/s]

Epoch 3000 | Loss: 0.0459 | Hinge: 0.0414 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.17s


Training DQN:  19%|█▉        | 3051/16000 [24:20<1:44:49,  2.06it/s]

Epoch 3050 | Loss: 0.0470 | Hinge: 0.0504 | Anchor: 0.0000 | Times - RW: 0.19s, Bellman: 0.04s, Train: 0.17s


Training DQN:  19%|█▉        | 3101/16000 [24:45<1:51:18,  1.93it/s]

Epoch 3100 | Loss: 0.0479 | Hinge: 0.0459 | Anchor: 0.0000 | Times - RW: 0.27s, Bellman: 0.05s, Train: 0.19s


Training DQN:  20%|█▉        | 3151/16000 [25:11<1:57:51,  1.82it/s]

Epoch 3150 | Loss: 0.0575 | Hinge: 0.0438 | Anchor: 0.0000 | Times - RW: 0.23s, Bellman: 0.04s, Train: 0.21s


Training DQN:  20%|██        | 3201/16000 [25:36<1:41:19,  2.11it/s]

Epoch 3200 | Loss: 0.0436 | Hinge: 0.0467 | Anchor: 0.0000 | Times - RW: 0.19s, Bellman: 0.04s, Train: 0.16s


Training DQN:  20%|██        | 3251/16000 [25:59<1:40:33,  2.11it/s]

Epoch 3250 | Loss: 0.0457 | Hinge: 0.0419 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.17s


Training DQN:  21%|██        | 3301/16000 [26:22<1:37:06,  2.18it/s]

Epoch 3300 | Loss: 0.0452 | Hinge: 0.0379 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.17s


Training DQN:  21%|██        | 3351/16000 [26:45<1:37:30,  2.16it/s]

Epoch 3350 | Loss: 0.0474 | Hinge: 0.0421 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.18s


Training DQN:  21%|██▏       | 3401/16000 [27:08<1:36:07,  2.18it/s]

Epoch 3400 | Loss: 0.0489 | Hinge: 0.0355 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.16s


Training DQN:  22%|██▏       | 3451/16000 [27:32<1:47:02,  1.95it/s]

Epoch 3450 | Loss: 0.0472 | Hinge: 0.0428 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.19s


Training DQN:  22%|██▏       | 3501/16000 [27:57<1:41:25,  2.05it/s]

Epoch 3500 | Loss: 0.0474 | Hinge: 0.0369 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.19s


Training DQN:  22%|██▏       | 3551/16000 [28:22<1:41:17,  2.05it/s]

Epoch 3550 | Loss: 0.0458 | Hinge: 0.0406 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.18s


Training DQN:  23%|██▎       | 3601/16000 [28:47<1:43:15,  2.00it/s]

Epoch 3600 | Loss: 0.0445 | Hinge: 0.0367 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.20s


Training DQN:  23%|██▎       | 3651/16000 [29:13<1:47:25,  1.92it/s]

Epoch 3650 | Loss: 0.0478 | Hinge: 0.0448 | Anchor: 0.0000 | Times - RW: 0.21s, Bellman: 0.04s, Train: 0.20s


Training DQN:  23%|██▎       | 3701/16000 [29:38<1:41:46,  2.01it/s]

Epoch 3700 | Loss: 0.0464 | Hinge: 0.0396 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.18s


Training DQN:  23%|██▎       | 3751/16000 [30:04<1:50:02,  1.86it/s]

Epoch 3750 | Loss: 0.0459 | Hinge: 0.0367 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.25s


Training DQN:  24%|██▍       | 3801/16000 [30:30<1:42:50,  1.98it/s]

Epoch 3800 | Loss: 0.0437 | Hinge: 0.0292 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.19s


Training DQN:  24%|██▍       | 3851/16000 [30:55<1:43:46,  1.95it/s]

Epoch 3850 | Loss: 0.0443 | Hinge: 0.0378 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.19s


Training DQN:  24%|██▍       | 3901/16000 [31:21<1:41:31,  1.99it/s]

Epoch 3900 | Loss: 0.0465 | Hinge: 0.0394 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.19s


Training DQN:  25%|██▍       | 3951/16000 [31:47<1:39:31,  2.02it/s]

Epoch 3950 | Loss: 0.0443 | Hinge: 0.0308 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.18s


Training DQN:  25%|██▌       | 4001/16000 [32:12<1:41:06,  1.98it/s]

Epoch 4000 | Loss: 0.0440 | Hinge: 0.0406 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.20s


Training DQN:  25%|██▌       | 4051/16000 [32:37<1:40:26,  1.98it/s]

Epoch 4050 | Loss: 0.0417 | Hinge: 0.0416 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.19s


Training DQN:  26%|██▌       | 4101/16000 [33:03<1:44:39,  1.89it/s]

Epoch 4100 | Loss: 0.0434 | Hinge: 0.0421 | Anchor: 0.0000 | Times - RW: 0.20s, Bellman: 0.04s, Train: 0.18s


Training DQN:  26%|██▌       | 4151/16000 [33:28<1:38:36,  2.00it/s]

Epoch 4150 | Loss: 0.0439 | Hinge: 0.0408 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.20s


Training DQN:  26%|██▋       | 4201/16000 [33:54<1:40:51,  1.95it/s]

Epoch 4200 | Loss: 0.0429 | Hinge: 0.0491 | Anchor: 0.0000 | Times - RW: 0.19s, Bellman: 0.04s, Train: 0.19s


Training DQN:  27%|██▋       | 4251/16000 [34:19<1:38:18,  1.99it/s]

Epoch 4250 | Loss: 0.0467 | Hinge: 0.0379 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.19s


Training DQN:  27%|██▋       | 4301/16000 [34:44<1:37:56,  1.99it/s]

Epoch 4300 | Loss: 0.0443 | Hinge: 0.0408 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.18s


Training DQN:  27%|██▋       | 4351/16000 [35:10<1:38:52,  1.96it/s]

Epoch 4350 | Loss: 0.0440 | Hinge: 0.0463 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.19s


Training DQN:  28%|██▊       | 4401/16000 [35:35<1:36:50,  2.00it/s]

Epoch 4400 | Loss: 0.0441 | Hinge: 0.0396 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.19s


Training DQN:  28%|██▊       | 4451/16000 [36:01<1:43:00,  1.87it/s]

Epoch 4450 | Loss: 0.0464 | Hinge: 0.0510 | Anchor: 0.0000 | Times - RW: 0.20s, Bellman: 0.04s, Train: 0.21s


Training DQN:  28%|██▊       | 4501/16000 [36:26<1:31:55,  2.08it/s]

Epoch 4500 | Loss: 0.0442 | Hinge: 0.0384 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.17s


Training DQN:  28%|██▊       | 4551/16000 [36:50<1:30:27,  2.11it/s]

Epoch 4550 | Loss: 0.0454 | Hinge: 0.0377 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.17s


Training DQN:  29%|██▉       | 4601/16000 [37:14<1:30:13,  2.11it/s]

Epoch 4600 | Loss: 0.0434 | Hinge: 0.0361 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.17s


Training DQN:  29%|██▉       | 4651/16000 [37:38<1:39:06,  1.91it/s]

Epoch 4650 | Loss: 0.0441 | Hinge: 0.0410 | Anchor: 0.0000 | Times - RW: 0.20s, Bellman: 0.04s, Train: 0.20s


Training DQN:  29%|██▉       | 4701/16000 [38:05<1:36:53,  1.94it/s]

Epoch 4700 | Loss: 0.0454 | Hinge: 0.0374 | Anchor: 0.0000 | Times - RW: 0.22s, Bellman: 0.04s, Train: 0.19s


Training DQN:  30%|██▉       | 4751/16000 [38:30<1:33:36,  2.00it/s]

Epoch 4750 | Loss: 0.0419 | Hinge: 0.0390 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.18s


Training DQN:  30%|███       | 4801/16000 [38:55<1:34:12,  1.98it/s]

Epoch 4800 | Loss: 0.0401 | Hinge: 0.0353 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.21s


Training DQN:  30%|███       | 4851/16000 [39:19<1:30:50,  2.05it/s]

Epoch 4850 | Loss: 0.0435 | Hinge: 0.0371 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.18s


Training DQN:  31%|███       | 4901/16000 [39:43<1:29:37,  2.06it/s]

Epoch 4900 | Loss: 0.0416 | Hinge: 0.0361 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.18s


Training DQN:  31%|███       | 4951/16000 [40:08<1:29:01,  2.07it/s]

Epoch 4950 | Loss: 0.0451 | Hinge: 0.0261 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.18s


Training DQN:  31%|███▏      | 5001/16000 [40:32<1:28:46,  2.07it/s]

Epoch 5000 | Loss: 0.0428 | Hinge: 0.0429 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.18s


Training DQN:  32%|███▏      | 5051/16000 [40:57<1:36:30,  1.89it/s]

Epoch 5050 | Loss: 0.0420 | Hinge: 0.0427 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.18s


Training DQN:  32%|███▏      | 5101/16000 [41:23<1:33:29,  1.94it/s]

Epoch 5100 | Loss: 0.0396 | Hinge: 0.0342 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.20s


Training DQN:  32%|███▏      | 5151/16000 [41:48<1:31:45,  1.97it/s]

Epoch 5150 | Loss: 0.0424 | Hinge: 0.0407 | Anchor: 0.0000 | Times - RW: 0.19s, Bellman: 0.05s, Train: 0.20s


Training DQN:  33%|███▎      | 5201/16000 [42:13<1:28:55,  2.02it/s]

Epoch 5200 | Loss: 0.0416 | Hinge: 0.0412 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.18s


Training DQN:  33%|███▎      | 5251/16000 [42:38<1:38:43,  1.81it/s]

Epoch 5250 | Loss: 0.0415 | Hinge: 0.0341 | Anchor: 0.0000 | Times - RW: 0.22s, Bellman: 0.04s, Train: 0.21s


Training DQN:  33%|███▎      | 5301/16000 [43:03<1:28:06,  2.02it/s]

Epoch 5300 | Loss: 0.0435 | Hinge: 0.0343 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.18s


Training DQN:  33%|███▎      | 5351/16000 [43:29<1:28:49,  2.00it/s]

Epoch 5350 | Loss: 0.0424 | Hinge: 0.0349 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.20s


Training DQN:  34%|███▍      | 5401/16000 [43:54<1:27:56,  2.01it/s]

Epoch 5400 | Loss: 0.0424 | Hinge: 0.0424 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.18s


Training DQN:  34%|███▍      | 5451/16000 [44:20<1:28:58,  1.98it/s]

Epoch 5450 | Loss: 0.0421 | Hinge: 0.0407 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.05s, Train: 0.20s


Training DQN:  34%|███▍      | 5501/16000 [44:45<1:27:48,  1.99it/s]

Epoch 5500 | Loss: 0.0425 | Hinge: 0.0457 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.20s


Training DQN:  35%|███▍      | 5551/16000 [45:11<1:28:48,  1.96it/s]

Epoch 5550 | Loss: 0.0436 | Hinge: 0.0297 | Anchor: 0.0000 | Times - RW: 0.19s, Bellman: 0.04s, Train: 0.18s


Training DQN:  35%|███▌      | 5601/16000 [45:36<1:29:37,  1.93it/s]

Epoch 5600 | Loss: 0.0418 | Hinge: 0.0430 | Anchor: 0.0000 | Times - RW: 0.19s, Bellman: 0.04s, Train: 0.19s


Training DQN:  35%|███▌      | 5651/16000 [46:01<1:27:56,  1.96it/s]

Epoch 5650 | Loss: 0.0428 | Hinge: 0.0502 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.19s


Training DQN:  36%|███▌      | 5701/16000 [46:27<1:27:17,  1.97it/s]

Epoch 5700 | Loss: 0.0410 | Hinge: 0.0455 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.20s


Training DQN:  36%|███▌      | 5751/16000 [46:52<1:23:20,  2.05it/s]

Epoch 5750 | Loss: 0.0421 | Hinge: 0.0377 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.18s


Training DQN:  36%|███▋      | 5801/16000 [47:16<1:21:35,  2.08it/s]

Epoch 5800 | Loss: 0.0394 | Hinge: 0.0361 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.19s


Training DQN:  37%|███▋      | 5851/16000 [47:41<1:20:51,  2.09it/s]

Epoch 5850 | Loss: 0.0395 | Hinge: 0.0344 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.05s, Train: 0.18s


Training DQN:  37%|███▋      | 5901/16000 [48:05<1:22:30,  2.04it/s]

Epoch 5900 | Loss: 0.0422 | Hinge: 0.0387 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.18s


Training DQN:  37%|███▋      | 5951/16000 [48:30<1:23:58,  1.99it/s]

Epoch 5950 | Loss: 0.0410 | Hinge: 0.0349 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.19s


Training DQN:  38%|███▊      | 6001/16000 [48:55<1:22:07,  2.03it/s]

Epoch 6000 | Loss: 0.0381 | Hinge: 0.0336 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.19s


Training DQN:  38%|███▊      | 6051/16000 [49:21<1:23:46,  1.98it/s]

Epoch 6050 | Loss: 0.0402 | Hinge: 0.0335 | Anchor: 0.0000 | Times - RW: 0.19s, Bellman: 0.04s, Train: 0.18s


Training DQN:  38%|███▊      | 6101/16000 [49:46<1:21:16,  2.03it/s]

Epoch 6100 | Loss: 0.0432 | Hinge: 0.0464 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.18s


Training DQN:  38%|███▊      | 6151/16000 [50:11<1:21:59,  2.00it/s]

Epoch 6150 | Loss: 0.0403 | Hinge: 0.0370 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.19s


Training DQN:  39%|███▉      | 6201/16000 [50:37<1:31:10,  1.79it/s]

Epoch 6200 | Loss: 0.0400 | Hinge: 0.0393 | Anchor: 0.0000 | Times - RW: 0.20s, Bellman: 0.04s, Train: 0.23s


Training DQN:  39%|███▉      | 6251/16000 [51:02<1:20:30,  2.02it/s]

Epoch 6250 | Loss: 0.0398 | Hinge: 0.0380 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.18s


Training DQN:  39%|███▉      | 6301/16000 [51:27<1:21:14,  1.99it/s]

Epoch 6300 | Loss: 0.0398 | Hinge: 0.0356 | Anchor: 0.0000 | Times - RW: 0.21s, Bellman: 0.05s, Train: 0.19s


Training DQN:  40%|███▉      | 6351/16000 [51:52<1:18:43,  2.04it/s]

Epoch 6350 | Loss: 0.0385 | Hinge: 0.0368 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.20s


Training DQN:  40%|████      | 6401/16000 [52:16<1:17:27,  2.07it/s]

Epoch 6400 | Loss: 0.0388 | Hinge: 0.0300 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.18s


Training DQN:  40%|████      | 6451/16000 [52:41<1:17:15,  2.06it/s]

Epoch 6450 | Loss: 0.0406 | Hinge: 0.0247 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.17s


Training DQN:  41%|████      | 6501/16000 [53:05<1:16:24,  2.07it/s]

Epoch 6500 | Loss: 0.0413 | Hinge: 0.0423 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.17s


Training DQN:  41%|████      | 6551/16000 [53:30<1:19:57,  1.97it/s]

Epoch 6550 | Loss: 0.0394 | Hinge: 0.0364 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.19s


Training DQN:  41%|████▏     | 6601/16000 [53:55<1:16:30,  2.05it/s]

Epoch 6600 | Loss: 0.0419 | Hinge: 0.0437 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.19s


Training DQN:  42%|████▏     | 6651/16000 [54:20<1:21:27,  1.91it/s]

Epoch 6650 | Loss: 0.0377 | Hinge: 0.0266 | Anchor: 0.0000 | Times - RW: 0.19s, Bellman: 0.04s, Train: 0.19s


Training DQN:  42%|████▏     | 6701/16000 [54:45<1:19:39,  1.95it/s]

Epoch 6700 | Loss: 0.0409 | Hinge: 0.0326 | Anchor: 0.0000 | Times - RW: 0.20s, Bellman: 0.04s, Train: 0.19s


Training DQN:  42%|████▏     | 6751/16000 [55:11<1:17:12,  2.00it/s]

Epoch 6750 | Loss: 0.0397 | Hinge: 0.0334 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.20s


Training DQN:  43%|████▎     | 6801/16000 [55:36<1:13:39,  2.08it/s]

Epoch 6800 | Loss: 0.0398 | Hinge: 0.0466 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.17s


Training DQN:  43%|████▎     | 6851/16000 [56:01<1:26:17,  1.77it/s]

Epoch 6850 | Loss: 0.0416 | Hinge: 0.0498 | Anchor: 0.0000 | Times - RW: 0.22s, Bellman: 0.04s, Train: 0.21s


Training DQN:  43%|████▎     | 6901/16000 [56:27<1:19:10,  1.92it/s]

Epoch 6900 | Loss: 0.0378 | Hinge: 0.0335 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.21s


Training DQN:  43%|████▎     | 6951/16000 [56:53<1:14:43,  2.02it/s]

Epoch 6950 | Loss: 0.0408 | Hinge: 0.0357 | Anchor: 0.0000 | Times - RW: 0.19s, Bellman: 0.04s, Train: 0.18s


Training DQN:  44%|████▍     | 7001/16000 [57:18<1:15:08,  2.00it/s]

Epoch 7000 | Loss: 0.0363 | Hinge: 0.0342 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.18s


Training DQN:  44%|████▍     | 7051/16000 [57:43<1:14:37,  2.00it/s]

Epoch 7050 | Loss: 0.0423 | Hinge: 0.0401 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.18s


Training DQN:  44%|████▍     | 7101/16000 [58:08<1:15:07,  1.97it/s]

Epoch 7100 | Loss: 0.0394 | Hinge: 0.0354 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.19s


Training DQN:  45%|████▍     | 7151/16000 [58:34<1:16:11,  1.94it/s]

Epoch 7150 | Loss: 0.0394 | Hinge: 0.0357 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.05s, Train: 0.18s


Training DQN:  45%|████▌     | 7201/16000 [59:01<1:27:35,  1.67it/s]

Epoch 7200 | Loss: 0.0394 | Hinge: 0.0298 | Anchor: 0.0000 | Times - RW: 0.20s, Bellman: 0.04s, Train: 0.22s


Training DQN:  45%|████▌     | 7251/16000 [59:28<1:17:57,  1.87it/s]

Epoch 7250 | Loss: 0.0386 | Hinge: 0.0432 | Anchor: 0.0000 | Times - RW: 0.21s, Bellman: 0.05s, Train: 0.20s


Training DQN:  46%|████▌     | 7301/16000 [59:53<1:12:02,  2.01it/s]

Epoch 7300 | Loss: 0.0378 | Hinge: 0.0469 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.05s, Train: 0.18s


Training DQN:  46%|████▌     | 7351/16000 [1:00:19<1:11:58,  2.00it/s]

Epoch 7350 | Loss: 0.0440 | Hinge: 0.0348 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.19s


Training DQN:  46%|████▋     | 7401/16000 [1:00:44<1:15:37,  1.89it/s]

Epoch 7400 | Loss: 0.0365 | Hinge: 0.0309 | Anchor: 0.0000 | Times - RW: 0.25s, Bellman: 0.04s, Train: 0.20s


Training DQN:  47%|████▋     | 7451/16000 [1:01:09<1:10:41,  2.02it/s]

Epoch 7450 | Loss: 0.0395 | Hinge: 0.0364 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.18s


Training DQN:  47%|████▋     | 7501/16000 [1:01:34<1:10:42,  2.00it/s]

Epoch 7500 | Loss: 0.0384 | Hinge: 0.0338 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.20s


Training DQN:  47%|████▋     | 7551/16000 [1:02:00<1:13:31,  1.92it/s]

Epoch 7550 | Loss: 0.0392 | Hinge: 0.0360 | Anchor: 0.0000 | Times - RW: 0.21s, Bellman: 0.04s, Train: 0.20s


Training DQN:  48%|████▊     | 7601/16000 [1:02:25<1:09:12,  2.02it/s]

Epoch 7600 | Loss: 0.0399 | Hinge: 0.0441 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.19s


Training DQN:  48%|████▊     | 7651/16000 [1:02:52<1:19:29,  1.75it/s]

Epoch 7650 | Loss: 0.0390 | Hinge: 0.0326 | Anchor: 0.0000 | Times - RW: 0.22s, Bellman: 0.04s, Train: 0.21s


Training DQN:  48%|████▊     | 7701/16000 [1:03:18<1:15:25,  1.83it/s]

Epoch 7700 | Loss: 0.0380 | Hinge: 0.0335 | Anchor: 0.0000 | Times - RW: 0.21s, Bellman: 0.04s, Train: 0.19s


Training DQN:  48%|████▊     | 7751/16000 [1:03:45<1:12:51,  1.89it/s]

Epoch 7750 | Loss: 0.0399 | Hinge: 0.0326 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.22s


Training DQN:  49%|████▉     | 7801/16000 [1:04:10<1:10:25,  1.94it/s]

Epoch 7800 | Loss: 0.0402 | Hinge: 0.0472 | Anchor: 0.0000 | Times - RW: 0.21s, Bellman: 0.04s, Train: 0.18s


Training DQN:  49%|████▉     | 7851/16000 [1:04:35<1:09:08,  1.96it/s]

Epoch 7850 | Loss: 0.0393 | Hinge: 0.0373 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.20s


Training DQN:  49%|████▉     | 7901/16000 [1:05:00<1:09:14,  1.95it/s]

Epoch 7900 | Loss: 0.0381 | Hinge: 0.0335 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.21s


Training DQN:  50%|████▉     | 7951/16000 [1:05:25<1:06:53,  2.01it/s]

Epoch 7950 | Loss: 0.0380 | Hinge: 0.0394 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.19s


Training DQN:  50%|█████     | 8001/16000 [1:05:50<1:06:45,  2.00it/s]

Epoch 8000 | Loss: 0.0379 | Hinge: 0.0350 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.18s


Training DQN:  50%|█████     | 8051/16000 [1:06:17<1:21:30,  1.63it/s]

Epoch 8050 | Loss: 0.0436 | Hinge: 0.0336 | Anchor: 0.0000 | Times - RW: 0.19s, Bellman: 0.04s, Train: 0.22s


Training DQN:  51%|█████     | 8101/16000 [1:06:44<1:08:31,  1.92it/s]

Epoch 8100 | Loss: 0.0352 | Hinge: 0.0349 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.19s


Training DQN:  51%|█████     | 8151/16000 [1:07:10<1:05:27,  2.00it/s]

Epoch 8150 | Loss: 0.0413 | Hinge: 0.0325 | Anchor: 0.0000 | Times - RW: 0.19s, Bellman: 0.04s, Train: 0.19s


Training DQN:  51%|█████▏    | 8201/16000 [1:07:35<1:03:53,  2.03it/s]

Epoch 8200 | Loss: 0.0410 | Hinge: 0.0403 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.18s


Training DQN:  52%|█████▏    | 8251/16000 [1:08:00<1:02:04,  2.08it/s]

Epoch 8250 | Loss: 0.0394 | Hinge: 0.0329 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.18s


Training DQN:  52%|█████▏    | 8301/16000 [1:08:26<1:05:57,  1.95it/s]

Epoch 8300 | Loss: 0.0372 | Hinge: 0.0371 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.19s


Training DQN:  52%|█████▏    | 8351/16000 [1:08:52<1:05:46,  1.94it/s]

Epoch 8350 | Loss: 0.0380 | Hinge: 0.0312 | Anchor: 0.0000 | Times - RW: 0.19s, Bellman: 0.04s, Train: 0.20s


Training DQN:  53%|█████▎    | 8401/16000 [1:09:18<1:05:53,  1.92it/s]

Epoch 8400 | Loss: 0.0404 | Hinge: 0.0346 | Anchor: 0.0000 | Times - RW: 0.19s, Bellman: 0.04s, Train: 0.19s


Training DQN:  53%|█████▎    | 8451/16000 [1:09:44<1:06:27,  1.89it/s]

Epoch 8450 | Loss: 0.0386 | Hinge: 0.0420 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.20s


Training DQN:  53%|█████▎    | 8501/16000 [1:10:10<1:05:43,  1.90it/s]

Epoch 8500 | Loss: 0.0356 | Hinge: 0.0336 | Anchor: 0.0000 | Times - RW: 0.19s, Bellman: 0.04s, Train: 0.22s


Training DQN:  53%|█████▎    | 8551/16000 [1:10:36<1:04:27,  1.93it/s]

Epoch 8550 | Loss: 0.0388 | Hinge: 0.0256 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.05s, Train: 0.19s


Training DQN:  54%|█████▍    | 8601/16000 [1:11:03<1:06:59,  1.84it/s]

Epoch 8600 | Loss: 0.0406 | Hinge: 0.0366 | Anchor: 0.0000 | Times - RW: 0.19s, Bellman: 0.04s, Train: 0.20s


Training DQN:  54%|█████▍    | 8651/16000 [1:11:29<1:02:51,  1.95it/s]

Epoch 8650 | Loss: 0.0384 | Hinge: 0.0353 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.20s


Training DQN:  54%|█████▍    | 8701/16000 [1:11:55<1:06:02,  1.84it/s]

Epoch 8700 | Loss: 0.0375 | Hinge: 0.0456 | Anchor: 0.0000 | Times - RW: 0.21s, Bellman: 0.04s, Train: 0.22s


Training DQN:  55%|█████▍    | 8751/16000 [1:12:21<1:03:48,  1.89it/s]

Epoch 8750 | Loss: 0.0362 | Hinge: 0.0246 | Anchor: 0.0000 | Times - RW: 0.30s, Bellman: 0.04s, Train: 0.19s


Training DQN:  55%|█████▌    | 8801/16000 [1:12:47<1:01:57,  1.94it/s]

Epoch 8800 | Loss: 0.0392 | Hinge: 0.0421 | Anchor: 0.0000 | Times - RW: 0.20s, Bellman: 0.04s, Train: 0.18s


Training DQN:  55%|█████▌    | 8851/16000 [1:13:12<1:04:43,  1.84it/s]

Epoch 8850 | Loss: 0.0388 | Hinge: 0.0551 | Anchor: 0.0000 | Times - RW: 0.19s, Bellman: 0.04s, Train: 0.18s


Training DQN:  56%|█████▌    | 8901/16000 [1:13:36<58:08,  2.03it/s]  

Epoch 8900 | Loss: 0.0394 | Hinge: 0.0420 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.20s


Training DQN:  56%|█████▌    | 8951/16000 [1:14:02<1:00:15,  1.95it/s]

Epoch 8950 | Loss: 0.0370 | Hinge: 0.0291 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.21s


Training DQN:  56%|█████▋    | 9001/16000 [1:14:28<1:00:01,  1.94it/s]

Epoch 9000 | Loss: 0.0378 | Hinge: 0.0411 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.19s


Training DQN:  57%|█████▋    | 9051/16000 [1:14:53<57:47,  2.00it/s]  

Epoch 9050 | Loss: 0.0373 | Hinge: 0.0298 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.19s


Training DQN:  57%|█████▋    | 9101/16000 [1:15:19<58:48,  1.96it/s]  

Epoch 9100 | Loss: 0.0388 | Hinge: 0.0302 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.21s


Training DQN:  57%|█████▋    | 9151/16000 [1:15:44<53:52,  2.12it/s]  

Epoch 9150 | Loss: 0.0385 | Hinge: 0.0411 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.17s


Training DQN:  58%|█████▊    | 9201/16000 [1:16:09<56:51,  1.99it/s]  

Epoch 9200 | Loss: 0.0386 | Hinge: 0.0307 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.18s


Training DQN:  58%|█████▊    | 9251/16000 [1:16:34<55:31,  2.03it/s]

Epoch 9250 | Loss: 0.0375 | Hinge: 0.0363 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.18s


Training DQN:  58%|█████▊    | 9301/16000 [1:16:59<54:05,  2.06it/s]

Epoch 9300 | Loss: 0.0395 | Hinge: 0.0316 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.21s


Training DQN:  58%|█████▊    | 9351/16000 [1:17:24<57:02,  1.94it/s]  

Epoch 9350 | Loss: 0.0353 | Hinge: 0.0294 | Anchor: 0.0000 | Times - RW: 0.22s, Bellman: 0.04s, Train: 0.20s


Training DQN:  59%|█████▉    | 9401/16000 [1:17:50<55:33,  1.98it/s]

Epoch 9400 | Loss: 0.0390 | Hinge: 0.0416 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.17s


Training DQN:  59%|█████▉    | 9451/16000 [1:18:16<54:10,  2.01it/s]  

Epoch 9450 | Loss: 0.0372 | Hinge: 0.0391 | Anchor: 0.0000 | Times - RW: 0.19s, Bellman: 0.04s, Train: 0.19s


Training DQN:  59%|█████▉    | 9501/16000 [1:18:41<56:59,  1.90it/s]

Epoch 9500 | Loss: 0.0390 | Hinge: 0.0292 | Anchor: 0.0000 | Times - RW: 0.19s, Bellman: 0.06s, Train: 0.20s


Training DQN:  60%|█████▉    | 9551/16000 [1:19:06<52:47,  2.04it/s]

Epoch 9550 | Loss: 0.0369 | Hinge: 0.0426 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.17s


Training DQN:  60%|██████    | 9601/16000 [1:19:31<55:34,  1.92it/s]

Epoch 9600 | Loss: 0.0375 | Hinge: 0.0308 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.20s


Training DQN:  60%|██████    | 9651/16000 [1:19:57<53:17,  1.99it/s]

Epoch 9650 | Loss: 0.0383 | Hinge: 0.0389 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.20s


Training DQN:  61%|██████    | 9701/16000 [1:20:23<51:54,  2.02it/s]

Epoch 9700 | Loss: 0.0372 | Hinge: 0.0374 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.20s


Training DQN:  61%|██████    | 9751/16000 [1:20:48<52:13,  1.99it/s]

Epoch 9750 | Loss: 0.0388 | Hinge: 0.0335 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.18s


Training DQN:  61%|██████▏   | 9801/16000 [1:21:13<51:38,  2.00it/s]

Epoch 9800 | Loss: 0.0352 | Hinge: 0.0408 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.18s


Training DQN:  62%|██████▏   | 9851/16000 [1:21:38<51:14,  2.00it/s]

Epoch 9850 | Loss: 0.0358 | Hinge: 0.0322 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.20s


Training DQN:  62%|██████▏   | 9901/16000 [1:22:04<48:57,  2.08it/s]

Epoch 9900 | Loss: 0.0378 | Hinge: 0.0286 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.17s


Training DQN:  62%|██████▏   | 9951/16000 [1:22:29<49:23,  2.04it/s]

Epoch 9950 | Loss: 0.0376 | Hinge: 0.0363 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.18s


Training DQN:  63%|██████▎   | 10001/16000 [1:22:55<52:02,  1.92it/s]

Epoch 10000 | Loss: 0.0360 | Hinge: 0.0300 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.20s


Training DQN:  63%|██████▎   | 10051/16000 [1:23:21<49:13,  2.01it/s]

Epoch 10050 | Loss: 0.0368 | Hinge: 0.0382 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.18s


Training DQN:  63%|██████▎   | 10101/16000 [1:23:46<56:17,  1.75it/s]

Epoch 10100 | Loss: 0.0369 | Hinge: 0.0375 | Anchor: 0.0000 | Times - RW: 0.23s, Bellman: 0.05s, Train: 0.20s


Training DQN:  63%|██████▎   | 10151/16000 [1:24:13<52:37,  1.85it/s]

Epoch 10150 | Loss: 0.0329 | Hinge: 0.0363 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.24s


Training DQN:  64%|██████▍   | 10201/16000 [1:24:39<45:58,  2.10it/s]

Epoch 10200 | Loss: 0.0384 | Hinge: 0.0287 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.17s


Training DQN:  64%|██████▍   | 10251/16000 [1:25:03<45:27,  2.11it/s]

Epoch 10250 | Loss: 0.0358 | Hinge: 0.0350 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.18s


Training DQN:  64%|██████▍   | 10301/16000 [1:25:29<48:41,  1.95it/s]

Epoch 10300 | Loss: 0.0369 | Hinge: 0.0522 | Anchor: 0.0000 | Times - RW: 0.19s, Bellman: 0.04s, Train: 0.18s


Training DQN:  65%|██████▍   | 10351/16000 [1:25:54<49:36,  1.90it/s]

Epoch 10350 | Loss: 0.0347 | Hinge: 0.0298 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.05s, Train: 0.21s


Training DQN:  65%|██████▌   | 10401/16000 [1:26:18<43:01,  2.17it/s]

Epoch 10400 | Loss: 0.0375 | Hinge: 0.0270 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.17s


Training DQN:  65%|██████▌   | 10451/16000 [1:26:41<43:14,  2.14it/s]

Epoch 10450 | Loss: 0.0347 | Hinge: 0.0287 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.18s


Training DQN:  66%|██████▌   | 10501/16000 [1:27:04<42:24,  2.16it/s]

Epoch 10500 | Loss: 0.0357 | Hinge: 0.0289 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.17s


Training DQN:  66%|██████▌   | 10551/16000 [1:27:27<42:03,  2.16it/s]

Epoch 10550 | Loss: 0.0333 | Hinge: 0.0316 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.17s


Training DQN:  66%|██████▋   | 10601/16000 [1:27:50<41:49,  2.15it/s]

Epoch 10600 | Loss: 0.0383 | Hinge: 0.0322 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.18s


Training DQN:  67%|██████▋   | 10651/16000 [1:28:14<41:08,  2.17it/s]

Epoch 10650 | Loss: 0.0346 | Hinge: 0.0332 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.17s


Training DQN:  67%|██████▋   | 10701/16000 [1:28:37<40:36,  2.17it/s]

Epoch 10700 | Loss: 0.0357 | Hinge: 0.0362 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.17s


Training DQN:  67%|██████▋   | 10751/16000 [1:29:00<40:33,  2.16it/s]

Epoch 10750 | Loss: 0.0337 | Hinge: 0.0363 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.17s


Training DQN:  68%|██████▊   | 10801/16000 [1:29:23<39:46,  2.18it/s]

Epoch 10800 | Loss: 0.0422 | Hinge: 0.0301 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.17s


Training DQN:  68%|██████▊   | 10851/16000 [1:29:47<39:19,  2.18it/s]

Epoch 10850 | Loss: 0.0372 | Hinge: 0.0334 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.16s


Training DQN:  68%|██████▊   | 10901/16000 [1:30:09<38:41,  2.20it/s]

Epoch 10900 | Loss: 0.0360 | Hinge: 0.0265 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.16s


Training DQN:  68%|██████▊   | 10951/16000 [1:30:32<38:28,  2.19it/s]

Epoch 10950 | Loss: 0.0370 | Hinge: 0.0301 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.16s


Training DQN:  69%|██████▉   | 11001/16000 [1:30:55<37:59,  2.19it/s]

Epoch 11000 | Loss: 0.0367 | Hinge: 0.0419 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.17s


Training DQN:  69%|██████▉   | 11051/16000 [1:31:18<37:27,  2.20it/s]

Epoch 11050 | Loss: 0.0353 | Hinge: 0.0274 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.16s


Training DQN:  69%|██████▉   | 11101/16000 [1:31:41<37:27,  2.18it/s]

Epoch 11100 | Loss: 0.0368 | Hinge: 0.0324 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.17s


Training DQN:  70%|██████▉   | 11151/16000 [1:32:04<37:02,  2.18it/s]

Epoch 11150 | Loss: 0.0362 | Hinge: 0.0245 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.17s


Training DQN:  70%|███████   | 11201/16000 [1:32:27<36:30,  2.19it/s]

Epoch 11200 | Loss: 0.0345 | Hinge: 0.0336 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.17s


Training DQN:  70%|███████   | 11251/16000 [1:32:50<36:16,  2.18it/s]

Epoch 11250 | Loss: 0.0352 | Hinge: 0.0271 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.17s


Training DQN:  71%|███████   | 11301/16000 [1:33:13<35:47,  2.19it/s]

Epoch 11300 | Loss: 0.0368 | Hinge: 0.0336 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.16s


Training DQN:  71%|███████   | 11351/16000 [1:33:35<35:26,  2.19it/s]

Epoch 11350 | Loss: 0.0368 | Hinge: 0.0347 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.16s


Training DQN:  71%|███████▏  | 11401/16000 [1:33:58<35:59,  2.13it/s]

Epoch 11400 | Loss: 0.0368 | Hinge: 0.0404 | Anchor: 0.0000 | Times - RW: 0.20s, Bellman: 0.04s, Train: 0.16s


Training DQN:  72%|███████▏  | 11451/16000 [1:34:21<34:47,  2.18it/s]

Epoch 11450 | Loss: 0.0366 | Hinge: 0.0340 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.16s


Training DQN:  72%|███████▏  | 11501/16000 [1:34:44<34:19,  2.18it/s]

Epoch 11500 | Loss: 0.0390 | Hinge: 0.0372 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.17s


Training DQN:  72%|███████▏  | 11551/16000 [1:35:07<33:49,  2.19it/s]

Epoch 11550 | Loss: 0.0359 | Hinge: 0.0342 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.17s


Training DQN:  73%|███████▎  | 11601/16000 [1:35:30<33:30,  2.19it/s]

Epoch 11600 | Loss: 0.0365 | Hinge: 0.0293 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.17s


Training DQN:  73%|███████▎  | 11651/16000 [1:35:53<33:02,  2.19it/s]

Epoch 11650 | Loss: 0.0387 | Hinge: 0.0331 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.17s


Training DQN:  73%|███████▎  | 11701/16000 [1:36:16<32:51,  2.18it/s]

Epoch 11700 | Loss: 0.0378 | Hinge: 0.0347 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.17s


Training DQN:  73%|███████▎  | 11751/16000 [1:36:39<33:03,  2.14it/s]

Epoch 11750 | Loss: 0.0348 | Hinge: 0.0314 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.17s


Training DQN:  74%|███████▍  | 11801/16000 [1:37:02<32:04,  2.18it/s]

Epoch 11800 | Loss: 0.0366 | Hinge: 0.0368 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.17s


Training DQN:  74%|███████▍  | 11851/16000 [1:37:25<31:46,  2.18it/s]

Epoch 11850 | Loss: 0.0356 | Hinge: 0.0364 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.16s


Training DQN:  74%|███████▍  | 11901/16000 [1:37:48<31:17,  2.18it/s]

Epoch 11900 | Loss: 0.0365 | Hinge: 0.0390 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.17s


Training DQN:  75%|███████▍  | 11951/16000 [1:38:10<30:57,  2.18it/s]

Epoch 11950 | Loss: 0.0379 | Hinge: 0.0324 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.16s


Training DQN:  75%|███████▌  | 12001/16000 [1:38:33<30:30,  2.18it/s]

Epoch 12000 | Loss: 0.0345 | Hinge: 0.0306 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.16s


Training DQN:  75%|███████▌  | 12051/16000 [1:38:56<30:07,  2.18it/s]

Epoch 12050 | Loss: 0.0341 | Hinge: 0.0315 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.17s


Training DQN:  76%|███████▌  | 12101/16000 [1:39:19<29:36,  2.20it/s]

Epoch 12100 | Loss: 0.0350 | Hinge: 0.0219 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.16s


Training DQN:  76%|███████▌  | 12151/16000 [1:39:42<29:24,  2.18it/s]

Epoch 12150 | Loss: 0.0341 | Hinge: 0.0268 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.17s


Training DQN:  76%|███████▋  | 12201/16000 [1:40:05<28:52,  2.19it/s]

Epoch 12200 | Loss: 0.0353 | Hinge: 0.0422 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.16s


Training DQN:  77%|███████▋  | 12251/16000 [1:40:28<28:31,  2.19it/s]

Epoch 12250 | Loss: 0.0364 | Hinge: 0.0386 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.17s


Training DQN:  77%|███████▋  | 12301/16000 [1:40:50<28:07,  2.19it/s]

Epoch 12300 | Loss: 0.0347 | Hinge: 0.0290 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.16s


Training DQN:  77%|███████▋  | 12351/16000 [1:41:15<33:05,  1.84it/s]

Epoch 12350 | Loss: 0.0362 | Hinge: 0.0391 | Anchor: 0.0000 | Times - RW: 0.21s, Bellman: 0.04s, Train: 0.21s


Training DQN:  78%|███████▊  | 12401/16000 [1:41:41<29:15,  2.05it/s]

Epoch 12400 | Loss: 0.0382 | Hinge: 0.0353 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.17s


Training DQN:  78%|███████▊  | 12451/16000 [1:42:05<29:23,  2.01it/s]

Epoch 12450 | Loss: 0.0355 | Hinge: 0.0304 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.18s


Training DQN:  78%|███████▊  | 12501/16000 [1:42:29<29:08,  2.00it/s]

Epoch 12500 | Loss: 0.0355 | Hinge: 0.0422 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.22s


Training DQN:  78%|███████▊  | 12551/16000 [1:42:53<27:33,  2.09it/s]

Epoch 12550 | Loss: 0.0370 | Hinge: 0.0417 | Anchor: 0.0000 | Times - RW: 0.18s, Bellman: 0.04s, Train: 0.18s


Training DQN:  79%|███████▉  | 12601/16000 [1:43:17<27:03,  2.09it/s]

Epoch 12600 | Loss: 0.0361 | Hinge: 0.0304 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.19s


Training DQN:  79%|███████▉  | 12651/16000 [1:43:41<25:58,  2.15it/s]

Epoch 12650 | Loss: 0.0347 | Hinge: 0.0347 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.18s


Training DQN:  79%|███████▉  | 12701/16000 [1:44:04<26:11,  2.10it/s]

Epoch 12700 | Loss: 0.0348 | Hinge: 0.0286 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.18s


Training DQN:  80%|███████▉  | 12751/16000 [1:44:28<27:10,  1.99it/s]

Epoch 12750 | Loss: 0.0337 | Hinge: 0.0279 | Anchor: 0.0000 | Times - RW: 0.21s, Bellman: 0.07s, Train: 0.19s


Training DQN:  80%|████████  | 12801/16000 [1:44:53<27:07,  1.97it/s]

Epoch 12800 | Loss: 0.0359 | Hinge: 0.0371 | Anchor: 0.0000 | Times - RW: 0.21s, Bellman: 0.05s, Train: 0.19s


Training DQN:  80%|████████  | 12851/16000 [1:45:19<26:58,  1.95it/s]

Epoch 12850 | Loss: 0.0373 | Hinge: 0.0347 | Anchor: 0.0000 | Times - RW: 0.21s, Bellman: 0.04s, Train: 0.18s


Training DQN:  81%|████████  | 12901/16000 [1:45:44<27:35,  1.87it/s]

Epoch 12900 | Loss: 0.0406 | Hinge: 0.0376 | Anchor: 0.0000 | Times - RW: 0.22s, Bellman: 0.05s, Train: 0.19s


Training DQN:  81%|████████  | 12951/16000 [1:46:10<26:44,  1.90it/s]

Epoch 12950 | Loss: 0.0370 | Hinge: 0.0293 | Anchor: 0.0000 | Times - RW: 0.21s, Bellman: 0.05s, Train: 0.27s


Training DQN:  81%|████████▏ | 13001/16000 [1:46:35<25:12,  1.98it/s]

Epoch 13000 | Loss: 0.0371 | Hinge: 0.0281 | Anchor: 0.0000 | Times - RW: 0.19s, Bellman: 0.04s, Train: 0.20s


Training DQN:  82%|████████▏ | 13051/16000 [1:47:01<24:46,  1.98it/s]

Epoch 13050 | Loss: 0.0344 | Hinge: 0.0282 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.19s


Training DQN:  82%|████████▏ | 13101/16000 [1:47:26<25:07,  1.92it/s]

Epoch 13100 | Loss: 0.0337 | Hinge: 0.0264 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.18s


Training DQN:  82%|████████▏ | 13151/16000 [1:47:52<23:07,  2.05it/s]

Epoch 13150 | Loss: 0.0360 | Hinge: 0.0329 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.18s


Training DQN:  83%|████████▎ | 13201/16000 [1:48:15<23:16,  2.00it/s]

Epoch 13200 | Loss: 0.0349 | Hinge: 0.0441 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.05s, Train: 0.17s


Training DQN:  83%|████████▎ | 13251/16000 [1:48:42<24:25,  1.88it/s]

Epoch 13250 | Loss: 0.0349 | Hinge: 0.0341 | Anchor: 0.0000 | Times - RW: 0.21s, Bellman: 0.04s, Train: 0.18s


Training DQN:  83%|████████▎ | 13301/16000 [1:49:09<22:14,  2.02it/s]

Epoch 13300 | Loss: 0.0364 | Hinge: 0.0318 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.17s


Training DQN:  83%|████████▎ | 13351/16000 [1:49:34<22:45,  1.94it/s]

Epoch 13350 | Loss: 0.0339 | Hinge: 0.0317 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.20s


Training DQN:  84%|████████▍ | 13401/16000 [1:49:59<22:29,  1.93it/s]

Epoch 13400 | Loss: 0.0349 | Hinge: 0.0285 | Anchor: 0.0000 | Times - RW: 0.21s, Bellman: 0.05s, Train: 0.20s


Training DQN:  84%|████████▍ | 13451/16000 [1:50:24<19:58,  2.13it/s]

Epoch 13450 | Loss: 0.0351 | Hinge: 0.0262 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.19s


Training DQN:  84%|████████▍ | 13501/16000 [1:50:48<20:52,  1.99it/s]

Epoch 13500 | Loss: 0.0369 | Hinge: 0.0356 | Anchor: 0.0000 | Times - RW: 0.24s, Bellman: 0.04s, Train: 0.18s


Training DQN:  85%|████████▍ | 13551/16000 [1:51:12<19:46,  2.06it/s]

Epoch 13550 | Loss: 0.0341 | Hinge: 0.0393 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.17s


Training DQN:  85%|████████▌ | 13601/16000 [1:51:36<19:46,  2.02it/s]

Epoch 13600 | Loss: 0.0359 | Hinge: 0.0341 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.18s


Training DQN:  85%|████████▌ | 13651/16000 [1:52:00<18:33,  2.11it/s]

Epoch 13650 | Loss: 0.0337 | Hinge: 0.0296 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.17s


Training DQN:  86%|████████▌ | 13701/16000 [1:52:26<19:39,  1.95it/s]

Epoch 13700 | Loss: 0.0351 | Hinge: 0.0406 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.18s


Training DQN:  86%|████████▌ | 13751/16000 [1:52:51<18:23,  2.04it/s]

Epoch 13750 | Loss: 0.0363 | Hinge: 0.0365 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.05s, Train: 0.20s


Training DQN:  86%|████████▋ | 13801/16000 [1:53:17<17:28,  2.10it/s]

Epoch 13800 | Loss: 0.0360 | Hinge: 0.0338 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.17s


Training DQN:  87%|████████▋ | 13851/16000 [1:53:42<17:19,  2.07it/s]

Epoch 13850 | Loss: 0.0347 | Hinge: 0.0327 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.17s


Training DQN:  87%|████████▋ | 13901/16000 [1:54:06<17:15,  2.03it/s]

Epoch 13900 | Loss: 0.0371 | Hinge: 0.0319 | Anchor: 0.0000 | Times - RW: 0.21s, Bellman: 0.04s, Train: 0.20s


Training DQN:  87%|████████▋ | 13951/16000 [1:54:30<17:22,  1.97it/s]

Epoch 13950 | Loss: 0.0337 | Hinge: 0.0342 | Anchor: 0.0000 | Times - RW: 0.19s, Bellman: 0.04s, Train: 0.19s


Training DQN:  88%|████████▊ | 14001/16000 [1:54:55<15:09,  2.20it/s]

Epoch 14000 | Loss: 0.0364 | Hinge: 0.0348 | Anchor: 0.0000 | Times - RW: 0.16s, Bellman: 0.04s, Train: 0.16s


Training DQN:  88%|████████▊ | 14051/16000 [1:55:20<16:26,  1.98it/s]

Epoch 14050 | Loss: 0.0365 | Hinge: 0.0345 | Anchor: 0.0000 | Times - RW: 0.17s, Bellman: 0.04s, Train: 0.19s


Training DQN:  88%|████████▊ | 14062/16000 [1:55:25<15:54,  2.03it/s]


KeyboardInterrupt: 

In [None]:
summary_df

In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import hsv_to_rgb
import ipywidgets as widgets
from IPython.display import display, clear_output


def visualize_tensor_sequence_notebook(tensor_list):
    """
    Интерактивная визуализация в Jupyter Notebook в виде линии шариков:
    - Использует ipywidgets.IntSlider и перерисовывает график в Output
    - tensor_list: список torch.Tensor размера (N, M)
    """
    M = tensor_list[0].shape[0]
    N = len(tensor_list)
    # позиции на линии: равномерные точки по оси X
    xs = np.arange(M)
    ys = np.zeros(M)

    # слайдер и область вывода
    slider = widgets.IntSlider(value=0, min=0, max=N-1, step=1, description='Frame')
    out = widgets.Output()
    
    def plot_frame(idx):
        vals = tensor_list[idx]
        arr = vals.cpu().numpy().astype(float)
        norm = arr / arr.max()
        hsv = np.stack([np.zeros_like(norm), norm, np.ones_like(norm)], axis=1)
        rgb = hsv_to_rgb(hsv)
        
        with out:
            clear_output(wait=True)
            fig, ax = plt.subplots(figsize=(8,2))
            ax.set_aspect('equal')
            ax.axis('off')
            # рисуем шарики вдоль линии
            scatter = ax.scatter(xs, ys, s=800, color=rgb)
            # цифры внутри шариков
            for x, y, v in zip(xs, ys, vals):
                ax.text(x, y, str(v.item()), ha='center', va='center', fontsize=12, color='black')
            # статус 'start' и 'done'
            status = 'start' if idx == 0 else ('done' if idx == N-1 else '')
            ax.text(M/2 - 0.5, 0.3, status, ha='center', va='bottom', fontsize=16)
            # установка границ
            ax.set_xlim(-1, M)
            ax.set_ylim(-1, 1)
            plt.show()

    # подписка на изменение слайдера
    slider.observe(lambda change: plot_frame(change['new']), names='value')
    display(slider, out)
    # отрисовка начального кадра
    plot_frame(0)


visualize_tensor_sequence_notebook(path)