In [1]:
import os
import sys

# Get the absolute path of the notebook's directory
notebook_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(notebook_dir)

In [2]:
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from src.app import PermutationSolver


In [3]:
n = 8
config = {
            'n_permutations_length': n,
            
            # Random walks params
            'random_walks_type': 'non-backtracking-beam',
            'n_random_walk_length': int(n * (n-1) / 2),
            'n_random_walks_to_generate': 10000,
            'n_random_walks_steps_back_to_ban': 8,
            
            # Neural Net params
            'model_type': 'MLP',
            'list_layers_sizes': [2**9],
            'n_epochs': 30,
            'batch_size': 1024,
            'lr': 0.001,
            
            # DQN training
            'n_epochs_dqn': 300,
            'flag_dqn_round': False,
            'n_random_walks_to_generate_dqn': 1000,
            
            # Beam search
            'beam_search_torch': True,
            'beam_search_Fironov': False,
            'beam_width': 1,
            'n_steps_limit': 4 * n**2,
            'alpha_previous_cost_accumulation': 0,
            'beam_search_models_or_heuristics': 'model_torch',
            'ban_p0_p1_transposition_if_p0_lt_p1_ie_already_sorted': False,
            'n_beam_search_steps_back_to_ban': 32,
            
            # What to solve
            'solve_random_or_longest_state': 'solve_LRX_longest',
            'verbose': 100
        }

In [6]:
n_perm_list = [8]
n_epoch_list = [10]
n_epoch_dqn_list = [20]
batch_size_list = [2**10]
lr_list = [0.001]
beam_width_list = [2**0]
list_layers_sizes = [[2**9]]

In [7]:
summary_df = pd.DataFrame()

for n_permutations_length in n_perm_list:
    solver = PermutationSolver(config)
    solver.config['n_permutations_length'] = n_permutations_length
    for list_layers_size in list_layers_sizes:
        solver.config['list_layers_sizes'] = list_layers_size
        for lr in lr_list:
            solver.config['lr'] = lr
            for batch_size in batch_size_list:
                solver.config['batch_size'] = batch_size
                for n_epoch in n_epoch_list:
                    solver.config['n_epochs'] = n_epoch
                    mlp_losses = solver.train_mlp()
                    # save mlp_model
                    torch.save(solver.mlp_model.state_dict(), f'models/mlp_model_{n_permutations_length}_{n_epoch}_{list_layers_sizes[0]}_{lr}_{batch_size}.pth')
                    for n_epoch_dqn in n_epoch_dqn_list:
                        solver.config['n_epochs_dqn'] = n_epoch_dqn
                        dqn_losses = solver.train_dqn()
                        # save dqn_model
                        torch.save(solver.dqn_model.state_dict(), f'models/dqn_model_{n_permutations_length}_{n_epoch_dqn}_{list_layers_sizes[0]}_{lr}_{batch_size}.pth')
                        for beam_width in beam_width_list:
                            solver.config['beam_width'] = beam_width
                            i_step, flag_found_destination = solver.test_beam_search()
                            # save summary_df
                            summary_df = pd.concat([summary_df, pd.DataFrame({'n_permutations_length': n_permutations_length, 'list_layers_sizes': list_layers_sizes, 'lr': lr, 'n_epoch': n_epoch, 'n_epoch_dqn': n_epoch_dqn, 'beam_width': beam_width, 'i_step': i_step, 'flag_found_destination': flag_found_destination, 'mlp_losses': mlp_losses[-1], 'dqn_losses': dqn_losses[-1]})])
                            summary_df.to_csv('models/summary_df.csv', index=False)


Training MLP: 100%|██████████| 10/10 [00:06<00:00,  1.63it/s]


Epoch 10, Loss: 0.9033
Starting DQN training for 20 epochs...


Training DQN:   5%|▌         | 1/20 [00:00<00:02,  8.10it/s]

Epoch   0 | Loss: 0.3357 | Times - RW: 0.03s, Bellman: 0.05s, Train: 0.04s


Training DQN: 100%|██████████| 20/20 [00:02<00:00,  8.22it/s]

Training finished in 2.4s
i_position_X_in_list_generators: 2
Step: 1 Beam (not cumulative) min: 24.67 median: 24.85 max: 25.21
Time: 0.0 t_moves  0.000, t_hash  0.000, t_isin 0.000, t_unique_els  0.000, t_full_step 0.001
Step: 11 Beam (not cumulative) min: 16.93 median: 17.68 max: 18.42
Time: 0.0 t_moves  0.000, t_hash  0.000, t_isin 0.000, t_unique_els  0.000, t_full_step 1747139722.640
Step: 21 Beam (not cumulative) min: 6.97 median: 7.70 max: 8.44
Found destination state.  i_step: 28  n_ways: tensor(1)

{'n_permutations_length': 8, 'random_walks_type': 'non-backtracking-beam', 'n_random_walk_length': 28, 'n_random_walks_to_generate': 10000, 'n_random_walks_steps_back_to_ban': 8, 'model_type': 'MLP', 'list_layers_sizes': [512], 'n_epochs': 10, 'batch_size': 1024, 'lr': 0.001, 'n_epochs_dqn': 20, 'flag_dqn_round': False, 'n_random_walks_to_generate_dqn': 1000, 'beam_search_torch': True, 'beam_search_Fironov': False, 'beam_width': 1, 'n_steps_limit': 256, 'alpha_previous_cost_accumulat




In [9]:
# Initialize solver
solver = PermutationSolver(config)

# Train MLP first
print("\nTraining MLP model...")
mlp_losses = solver.train_mlp()

# Train DQN using pre-trained MLP weights
print("\nTraining DQN model with pre-trained MLP weights...")
dqn_losses = solver.train_dqn()

# Test models with beam search
print("\nTesting models with beam search...")
print("\nTesting DQN model...")
dqn_result = solver.test_beam_search()


Training MLP model...


Training MLP:  33%|███▎      | 10/30 [00:05<00:09,  2.04it/s]

Epoch 10, Loss: 1.6647


Training MLP:  47%|████▋     | 14/30 [00:07<00:08,  1.89it/s]


KeyboardInterrupt: 