In [1]:
import os
import sys

# Get the absolute path of the notebook's directory
notebook_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(notebook_dir)

In [2]:
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from src.app import PermutationSolver


In [3]:
n = 12
config = {
            'n_permutations_length': n,
            
            # Random walks params
            'random_walks_type': 'non-backtracking-beam',
            'n_random_walk_length': int(n * (n-1) / 2),
            'n_random_walks_to_generate': 10000,
            'n_random_walks_steps_back_to_ban': 8,
            
            # Neural Net params
            'model_type': 'MLP',
            'list_layers_sizes': [2**9],
            'n_epochs': 30,
            'batch_size': 1024,
            'lr': 0.001,
            
            # DQN training
            'n_epochs_dqn': 300,
            'flag_dqn_round': False,
            'n_random_walks_to_generate_dqn': 1000,
            'verbose_loc': 5,
            'lr_dqn': 0.000001,
            
            # Beam search
            'beam_search_torch': True,
            'beam_search_Fironov': False,
            'beam_width': 1,
            'n_steps_limit': 4 * n**2,
            'alpha_previous_cost_accumulation': 0,
            'beam_search_models_or_heuristics': 'model_torch',
            'ban_p0_p1_transposition_if_p0_lt_p1_ie_already_sorted': False,
            'n_beam_search_steps_back_to_ban': 32,
            
            # What to solve
            'solve_random_or_longest_state': 'solve_LRX_longest',
            'verbose': 100
        }

In [4]:
n_perm_list = [48]
n_epoch_list = [100]
n_epoch_dqn_list = [300]
batch_size_list = [2**10]
lr_list = [0.000001]
beam_width_list = [2**0]
list_layers_sizes = [[2**13]]

In [5]:
summary_df = pd.DataFrame()

for n_permutations_length in n_perm_list:
    solver = PermutationSolver(config)
    solver.config['n_permutations_length'] = n_permutations_length
    for list_layers_size in list_layers_sizes:
        solver.config['list_layers_sizes'] = list_layers_size
        for lr in lr_list:
            solver.config['lr_dqn'] = lr
            for batch_size in batch_size_list:
                solver.config['batch_size'] = batch_size
                for n_epoch in n_epoch_list:
                    solver.config['n_epochs'] = n_epoch
                    mlp_losses = solver.train_mlp()
                    # save mlp_model
                    torch.save(solver.mlp_model.state_dict(), f'models/mlp_model_{n_permutations_length}_{n_epoch}_{list_layers_sizes[0]}_{lr}_{batch_size}.pth')
                    for n_epoch_dqn in n_epoch_dqn_list:
                        solver.config['n_epochs_dqn'] = n_epoch_dqn
                        dqn_losses = solver.train_dqn()
                        # save dqn_model
                        torch.save(solver.dqn_model.state_dict(), f'models/dqn_model_{n_permutations_length}_{n_epoch_dqn}_{list_layers_sizes[0]}_{lr}_{batch_size}.pth')
                        for beam_width in beam_width_list:
                            solver.config['beam_width'] = beam_width
                            i_step, flag_found_destination = solver.test_beam_search()
                            # save summary_df
                            summary_df = pd.concat([summary_df, pd.DataFrame({'n_permutations_length': n_permutations_length, 'list_layers_sizes': list_layers_sizes, 'lr': lr, 'n_epoch': n_epoch, 'n_epoch_dqn': n_epoch_dqn, 'beam_width': beam_width, 'i_step': i_step, 'flag_found_destination': flag_found_destination, 'mlp_losses': mlp_losses[-1], 'dqn_losses': dqn_losses[-1]})])
                            summary_df.to_csv('models/summary_df.csv', index=False)


Training MLP:  10%|█         | 10/100 [01:19<11:49,  7.89s/it]

Epoch 10, Loss: 47.2713


Training MLP:  20%|██        | 20/100 [02:37<10:30,  7.89s/it]

Epoch 20, Loss: 43.2165


Training MLP:  30%|███       | 30/100 [03:56<09:07,  7.81s/it]

Epoch 30, Loss: 41.6675


Training MLP:  40%|████      | 40/100 [05:12<07:37,  7.63s/it]

Epoch 40, Loss: 40.4901


Training MLP:  50%|█████     | 50/100 [06:30<06:34,  7.88s/it]

Epoch 50, Loss: 41.9083


Training MLP:  60%|██████    | 60/100 [07:48<05:13,  7.83s/it]

Epoch 60, Loss: 40.9582


Training MLP:  70%|███████   | 70/100 [09:07<03:55,  7.85s/it]

Epoch 70, Loss: 40.7895


Training MLP:  80%|████████  | 80/100 [10:26<02:38,  7.93s/it]

Epoch 80, Loss: 39.4219


Training MLP:  90%|█████████ | 90/100 [11:45<01:18,  7.88s/it]

Epoch 90, Loss: 42.6265


Training MLP: 100%|██████████| 100/100 [13:04<00:00,  7.84s/it]

Epoch 100, Loss: 41.3827





Starting DQN training for 300 epochs...


Training DQN:   0%|          | 1/300 [00:04<23:19,  4.68s/it]

Epoch   0 | Loss: 0.9287 | Times - RW: 2.94s, Train: 1.74s | Buffer size: 66000


Training DQN:   2%|▏         | 6/300 [00:33<27:43,  5.66s/it]

Epoch   5 | Loss: 0.9224 | Times - RW: 3.44s, Train: 2.28s | Buffer size: 200000


Training DQN:   4%|▎         | 11/300 [01:01<27:04,  5.62s/it]

Epoch  10 | Loss: 0.9255 | Times - RW: 3.49s, Train: 2.08s | Buffer size: 200000


Training DQN:   5%|▌         | 16/300 [01:30<27:01,  5.71s/it]

Epoch  15 | Loss: 0.9251 | Times - RW: 3.44s, Train: 2.25s | Buffer size: 200000


Training DQN:   7%|▋         | 21/300 [01:58<25:55,  5.57s/it]

Epoch  20 | Loss: 0.9148 | Times - RW: 3.40s, Train: 2.05s | Buffer size: 200000


Training DQN:   9%|▊         | 26/300 [02:26<25:44,  5.64s/it]

Epoch  25 | Loss: 0.9153 | Times - RW: 3.46s, Train: 2.29s | Buffer size: 200000


Training DQN:  10%|█         | 31/300 [02:54<25:05,  5.60s/it]

Epoch  30 | Loss: 0.9029 | Times - RW: 3.45s, Train: 2.08s | Buffer size: 200000


Training DQN:  12%|█▏        | 36/300 [03:22<24:50,  5.64s/it]

Epoch  35 | Loss: 0.9200 | Times - RW: 3.44s, Train: 2.32s | Buffer size: 200000


Training DQN:  14%|█▎        | 41/300 [03:50<24:12,  5.61s/it]

Epoch  40 | Loss: 0.9050 | Times - RW: 3.43s, Train: 2.07s | Buffer size: 200000


Training DQN:  15%|█▌        | 46/300 [04:18<23:47,  5.62s/it]

Epoch  45 | Loss: 0.9160 | Times - RW: 3.39s, Train: 2.27s | Buffer size: 200000


Training DQN:  17%|█▋        | 51/300 [04:46<23:01,  5.55s/it]

Epoch  50 | Loss: 0.9025 | Times - RW: 3.42s, Train: 2.02s | Buffer size: 200000


Training DQN:  19%|█▊        | 56/300 [05:14<22:46,  5.60s/it]

Epoch  55 | Loss: 0.9006 | Times - RW: 3.40s, Train: 2.32s | Buffer size: 200000


Training DQN:  20%|██        | 61/300 [05:42<22:04,  5.54s/it]

Epoch  60 | Loss: 0.9182 | Times - RW: 3.37s, Train: 2.04s | Buffer size: 200000


Training DQN:  22%|██▏       | 66/300 [06:10<21:45,  5.58s/it]

Epoch  65 | Loss: 0.9141 | Times - RW: 3.40s, Train: 2.27s | Buffer size: 200000


Training DQN:  24%|██▎       | 71/300 [06:37<21:08,  5.54s/it]

Epoch  70 | Loss: 0.9000 | Times - RW: 3.40s, Train: 2.04s | Buffer size: 200000


Training DQN:  25%|██▌       | 76/300 [07:05<20:59,  5.62s/it]

Epoch  75 | Loss: 0.9113 | Times - RW: 3.43s, Train: 2.30s | Buffer size: 200000


Training DQN:  27%|██▋       | 81/300 [07:33<20:16,  5.56s/it]

Epoch  80 | Loss: 0.9190 | Times - RW: 3.37s, Train: 2.03s | Buffer size: 200000


Training DQN:  29%|██▊       | 86/300 [08:02<20:14,  5.67s/it]

Epoch  85 | Loss: 0.9087 | Times - RW: 3.46s, Train: 2.33s | Buffer size: 200000


Training DQN:  30%|███       | 91/300 [08:30<20:11,  5.80s/it]

Epoch  90 | Loss: 0.9085 | Times - RW: 3.66s, Train: 2.22s | Buffer size: 200000


Training DQN:  32%|███▏      | 96/300 [08:59<19:31,  5.74s/it]

Epoch  95 | Loss: 0.9172 | Times - RW: 3.47s, Train: 2.10s | Buffer size: 200000


Training DQN:  34%|███▎      | 101/300 [09:28<19:17,  5.82s/it]

Epoch 100 | Loss: 0.9231 | Times - RW: 3.61s, Train: 2.40s | Buffer size: 200000


Training DQN:  35%|███▌      | 106/300 [09:57<18:20,  5.67s/it]

Epoch 105 | Loss: 0.9211 | Times - RW: 3.40s, Train: 2.04s | Buffer size: 200000


Training DQN:  37%|███▋      | 111/300 [10:25<17:55,  5.69s/it]

Epoch 110 | Loss: 0.9210 | Times - RW: 3.44s, Train: 2.32s | Buffer size: 200000


Training DQN:  39%|███▊      | 116/300 [10:53<17:06,  5.58s/it]

Epoch 115 | Loss: 0.9353 | Times - RW: 3.37s, Train: 2.04s | Buffer size: 200000


Training DQN:  40%|████      | 121/300 [11:21<16:40,  5.59s/it]

Epoch 120 | Loss: 0.9191 | Times - RW: 3.37s, Train: 2.34s | Buffer size: 200000


Training DQN:  42%|████▏     | 126/300 [11:50<16:21,  5.64s/it]

Epoch 125 | Loss: 0.9552 | Times - RW: 3.42s, Train: 2.17s | Buffer size: 200000


Training DQN:  44%|████▎     | 131/300 [12:18<15:53,  5.64s/it]

Epoch 130 | Loss: 0.9523 | Times - RW: 3.46s, Train: 2.30s | Buffer size: 200000


Training DQN:  45%|████▌     | 136/300 [12:45<15:09,  5.55s/it]

Epoch 135 | Loss: 0.9462 | Times - RW: 3.44s, Train: 2.03s | Buffer size: 200000


Training DQN:  47%|████▋     | 141/300 [13:14<15:03,  5.68s/it]

Epoch 140 | Loss: 0.9674 | Times - RW: 3.46s, Train: 2.39s | Buffer size: 200000


Training DQN:  49%|████▊     | 146/300 [13:42<14:18,  5.58s/it]

Epoch 145 | Loss: 0.9600 | Times - RW: 3.37s, Train: 2.05s | Buffer size: 200000


Training DQN:  50%|█████     | 151/300 [14:10<13:48,  5.56s/it]

Epoch 150 | Loss: 0.9616 | Times - RW: 3.35s, Train: 2.30s | Buffer size: 200000


Training DQN:  52%|█████▏    | 156/300 [14:37<13:16,  5.53s/it]

Epoch 155 | Loss: 0.9581 | Times - RW: 3.35s, Train: 2.05s | Buffer size: 200000


Training DQN:  54%|█████▎    | 161/300 [15:05<12:49,  5.54s/it]

Epoch 160 | Loss: 0.9582 | Times - RW: 3.34s, Train: 2.29s | Buffer size: 200000


Training DQN:  55%|█████▌    | 166/300 [15:32<12:15,  5.49s/it]

Epoch 165 | Loss: 0.9711 | Times - RW: 3.36s, Train: 2.02s | Buffer size: 200000


Training DQN:  57%|█████▋    | 171/300 [16:00<11:55,  5.55s/it]

Epoch 170 | Loss: 0.9938 | Times - RW: 3.35s, Train: 2.32s | Buffer size: 200000


Training DQN:  59%|█████▊    | 176/300 [16:28<11:22,  5.50s/it]

Epoch 175 | Loss: 1.0149 | Times - RW: 3.34s, Train: 2.05s | Buffer size: 200000


Training DQN:  60%|██████    | 181/300 [16:56<11:09,  5.63s/it]

Epoch 180 | Loss: 1.0028 | Times - RW: 3.50s, Train: 2.32s | Buffer size: 200000


Training DQN:  62%|██████▏   | 186/300 [17:23<10:33,  5.56s/it]

Epoch 185 | Loss: 1.0089 | Times - RW: 3.39s, Train: 2.04s | Buffer size: 200000


Training DQN:  64%|██████▎   | 191/300 [17:51<10:11,  5.61s/it]

Epoch 190 | Loss: 1.0306 | Times - RW: 3.39s, Train: 2.33s | Buffer size: 200000


Training DQN:  65%|██████▌   | 196/300 [18:19<09:38,  5.56s/it]

Epoch 195 | Loss: 1.0282 | Times - RW: 3.40s, Train: 2.06s | Buffer size: 200000


Training DQN:  67%|██████▋   | 201/300 [18:47<09:15,  5.61s/it]

Epoch 200 | Loss: 1.0345 | Times - RW: 3.42s, Train: 2.35s | Buffer size: 200000


Training DQN:  69%|██████▊   | 206/300 [19:16<08:50,  5.64s/it]

Epoch 205 | Loss: 1.0273 | Times - RW: 3.44s, Train: 2.06s | Buffer size: 200000


Training DQN:  70%|███████   | 211/300 [19:44<08:21,  5.64s/it]

Epoch 210 | Loss: 1.0564 | Times - RW: 3.44s, Train: 2.35s | Buffer size: 200000


Training DQN:  72%|███████▏  | 216/300 [20:12<07:48,  5.58s/it]

Epoch 215 | Loss: 1.0765 | Times - RW: 3.39s, Train: 2.02s | Buffer size: 200000


Training DQN:  74%|███████▎  | 221/300 [20:39<07:19,  5.56s/it]

Epoch 220 | Loss: 1.0650 | Times - RW: 3.33s, Train: 2.26s | Buffer size: 200000


Training DQN:  75%|███████▌  | 226/300 [21:07<06:51,  5.56s/it]

Epoch 225 | Loss: 1.0609 | Times - RW: 3.41s, Train: 2.04s | Buffer size: 200000


Training DQN:  77%|███████▋  | 231/300 [21:35<06:18,  5.49s/it]

Epoch 230 | Loss: 1.0877 | Times - RW: 3.35s, Train: 2.07s | Buffer size: 200000


Training DQN:  79%|███████▊  | 236/300 [22:03<05:57,  5.59s/it]

Epoch 235 | Loss: 1.0970 | Times - RW: 3.40s, Train: 2.37s | Buffer size: 200000


Training DQN:  80%|████████  | 241/300 [22:30<05:28,  5.56s/it]

Epoch 240 | Loss: 1.1153 | Times - RW: 3.39s, Train: 2.02s | Buffer size: 200000


Training DQN:  82%|████████▏ | 246/300 [22:58<05:00,  5.56s/it]

Epoch 245 | Loss: 1.1046 | Times - RW: 3.31s, Train: 2.31s | Buffer size: 200000


Training DQN:  84%|████████▎ | 251/300 [23:26<04:30,  5.51s/it]

Epoch 250 | Loss: 1.1149 | Times - RW: 3.35s, Train: 2.08s | Buffer size: 200000


Training DQN:  85%|████████▌ | 256/300 [23:54<04:06,  5.60s/it]

Epoch 255 | Loss: 1.1433 | Times - RW: 3.39s, Train: 2.28s | Buffer size: 200000


Training DQN:  87%|████████▋ | 261/300 [24:22<03:36,  5.55s/it]

Epoch 260 | Loss: 1.1364 | Times - RW: 3.40s, Train: 2.09s | Buffer size: 200000


Training DQN:  89%|████████▊ | 266/300 [24:50<03:10,  5.61s/it]

Epoch 265 | Loss: 1.1549 | Times - RW: 3.39s, Train: 2.31s | Buffer size: 200000


Training DQN:  90%|█████████ | 271/300 [25:17<02:39,  5.52s/it]

Epoch 270 | Loss: 1.1722 | Times - RW: 3.31s, Train: 1.98s | Buffer size: 200000


Training DQN:  92%|█████████▏| 276/300 [25:45<02:14,  5.62s/it]

Epoch 275 | Loss: 1.1942 | Times - RW: 3.42s, Train: 2.37s | Buffer size: 200000


Training DQN:  94%|█████████▎| 281/300 [26:13<01:44,  5.48s/it]

Epoch 280 | Loss: 1.1820 | Times - RW: 3.32s, Train: 2.00s | Buffer size: 200000


Training DQN:  95%|█████████▌| 286/300 [26:40<01:17,  5.54s/it]

Epoch 285 | Loss: 1.2033 | Times - RW: 3.38s, Train: 2.38s | Buffer size: 200000


Training DQN:  97%|█████████▋| 291/300 [27:08<00:49,  5.48s/it]

Epoch 290 | Loss: 1.2052 | Times - RW: 3.31s, Train: 1.99s | Buffer size: 200000


Training DQN:  99%|█████████▊| 296/300 [27:36<00:22,  5.64s/it]

Epoch 295 | Loss: 1.2379 | Times - RW: 3.41s, Train: 2.39s | Buffer size: 200000


Training DQN: 100%|██████████| 300/300 [27:59<00:00,  5.60s/it]


Training finished in 1679.2s
i_position_X_in_list_generators: 2
Step: 1 Beam (not cumulative) min: 36.14 median: 36.56 max: 37.56
Time: 0.0 t_moves  0.000, t_hash  0.000, t_isin 0.001, t_unique_els  0.002, t_full_step 0.010
Step: 11 Beam (not cumulative) min: 35.14 median: 35.28 max: 35.42
Time: 0.0 t_moves  0.001, t_hash  0.000, t_isin 0.001, t_unique_els  0.000, t_full_step 1747165147.164
Step: 21 Beam (not cumulative) min: 34.15 median: 34.66 max: 35.17
Step: 31 Beam (not cumulative) min: 35.05 median: 35.51 max: 35.97
Time: 0.0 t_moves  0.000, t_hash  0.000, t_isin 0.000, t_unique_els  0.001, t_full_step 0.002
Step: 41 Beam (not cumulative) min: 34.07 median: 34.52 max: 34.96
Time: 0.0 t_moves  0.000, t_hash  0.001, t_isin 0.000, t_unique_els  0.000, t_full_step 1747165147.217
Step: 51 Beam (not cumulative) min: 33.34 median: 33.77 max: 34.19
Step: 61 Beam (not cumulative) min: 0.00 median: 0.00 max: 0.00
Time: 0.0 t_moves  0.000, t_hash  0.000, t_isin 0.001, t_unique_els  0.000, t

In [6]:
solver.config['n_steps_limit'] = 10000

In [7]:
solver.config['beam_width'] = 1024*10
i_step, flag_found_destination = solver.test_beam_search()

i_position_X_in_list_generators: 2
Step: 1 Beam (not cumulative) min: 0.00 median: 0.00 max: 0.00
Time: 0.0 t_moves  0.000, t_hash  0.000, t_isin 0.003, t_unique_els  0.001, t_full_step 0.004
Step: 11 Beam (not cumulative) min: 0.00 median: 0.00 max: 0.00
Time: 0.0 t_moves  0.000, t_hash  0.000, t_isin 0.002, t_unique_els  0.058, t_full_step 1747165253.029
Step: 21 Beam (not cumulative) min: 31.30 median: 34.37 max: 38.73
Step: 31 Beam (not cumulative) min: 31.01 median: 33.58 max: 38.98
Time: 0.1 t_moves  0.000, t_hash  0.000, t_isin 0.002, t_unique_els  0.054, t_full_step 0.114
Step: 41 Beam (not cumulative) min: 30.41 median: 33.18 max: 40.02
Time: 0.0 t_moves  0.000, t_hash  0.000, t_isin 0.001, t_unique_els  0.051, t_full_step 1747165254.859
Step: 51 Beam (not cumulative) min: 30.30 median: 32.92 max: 38.29
Step: 61 Beam (not cumulative) min: 30.19 median: 32.89 max: 38.69
Time: 0.1 t_moves  0.000, t_hash  0.000, t_isin 0.002, t_unique_els  0.049, t_full_step 0.105
Step: 71 Beam (

In [9]:
# Initialize solver
solver = PermutationSolver(config)

# Train MLP first
print("\nTraining MLP model...")
mlp_losses = solver.train_mlp()

# Train DQN using pre-trained MLP weights
print("\nTraining DQN model with pre-trained MLP weights...")
dqn_losses = solver.train_dqn()

# Test models with beam search
print("\nTesting models with beam search...")
print("\nTesting DQN model...")
dqn_result = solver.test_beam_search()


Training MLP model...


Training MLP:  33%|███▎      | 10/30 [00:05<00:09,  2.04it/s]

Epoch 10, Loss: 1.6647


Training MLP:  47%|████▋     | 14/30 [00:07<00:08,  1.89it/s]


KeyboardInterrupt: 