In [1]:
import os
import sys

# Get the absolute path of the notebook's directory
notebook_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(notebook_dir)

In [2]:
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from src.app import PermutationSolver


In [3]:
n = 8
config = {
            'n_permutations_length': n,
            
            # Random walks params
            'random_walks_type': 'non-backtracking-beam',
            'n_random_walk_length': int(n * (n-1) / 2),
            'n_random_walks_to_generate': 10000,
            'n_random_walks_steps_back_to_ban': 8,
            
            # Neural Net params
            'model_type': 'MLP',
            'list_layers_sizes': [2**9],
            'n_epochs': 30,
            'batch_size': 1024,
            'lr': 0.001,
            
            # DQN training
            'n_epochs_dqn': 300,
            'flag_dqn_round': False,
            'n_random_walks_to_generate_dqn': 1000,
            'verbose_loc': 5,
            'lr_dqn': 0.0005,
            
            # Beam search
            'beam_search_torch': True,
            'beam_search_Fironov': False,
            'beam_width': 1,
            'n_steps_limit': 4 * n**2,
            'alpha_previous_cost_accumulation': 0,
            'beam_search_models_or_heuristics': 'model_torch',
            'ban_p0_p1_transposition_if_p0_lt_p1_ie_already_sorted': False,
            'n_beam_search_steps_back_to_ban': 32,
            
            # What to solve
            'solve_random_or_longest_state': 'solve_LRX_longest',
            'verbose': 100
        }

In [4]:
n_perm_list = [n]
n_epoch_list = [30]
n_epoch_dqn_list = [100]
batch_size_list = [2**10]
lr_list = [0.001]
beam_width_list = [2**0]
list_layers_sizes = [[2*9]]


In [5]:
config['mode'] = 'single_hard_hinge'
config['w_anchor'] = 0.0
config['w_hinge'] = 1.0


In [6]:
summary_df = pd.DataFrame()

for n_permutations_length in n_perm_list:
    solver = PermutationSolver(config)
    solver.config['n_permutations_length'] = n_permutations_length
    for list_layers_size in list_layers_sizes:
        solver.config['list_layers_sizes'] = list_layers_size
        for lr in lr_list:
            solver.config['lr_dqn'] = lr
            for batch_size in batch_size_list:
                solver.config['batch_size'] = batch_size
                for n_epoch in n_epoch_list:
                    solver.config['n_epochs'] = n_epoch
                    mlp_losses = solver.train_mlp()
                    # save mlp_model
                    torch.save(solver.mlp_model.state_dict(), f'models/mlp_model_{n_permutations_length}_{n_epoch}_{list_layers_sizes[0]}_{lr}_{batch_size}.pth')
                    for n_epoch_dqn in n_epoch_dqn_list:
                        solver.config['n_epochs_dqn'] = n_epoch_dqn
                        dqn_losses = solver.train_dqn()
                        # save dqn_model
                        torch.save(solver.dqn_model.state_dict(), f'models/dqn_model_{n_permutations_length}_{n_epoch_dqn}_{list_layers_sizes[0]}_{lr}_{batch_size}.pth')
                        for beam_width in beam_width_list:
                            solver.config['beam_width'] = beam_width
                            i_step, flag_found_destination, path = solver.test_beam_search()
                            # save summary_df
                            summary_df = pd.concat([summary_df, pd.DataFrame({'n_permutations_length': n_permutations_length, 'list_layers_sizes': list_layers_sizes, 'lr': lr, 'n_epoch': n_epoch, 'n_epoch_dqn': n_epoch_dqn, 'beam_width': beam_width, 'i_step': i_step, 'flag_found_destination': flag_found_destination, 'mlp_losses': mlp_losses[-1], 'dqn_losses': dqn_losses[-1]})])
                            summary_df.to_csv('models/summary_df.csv', index=False)


Training MLP:  33%|███▎      | 10/30 [00:04<00:09,  2.22it/s]

Epoch 10, Loss: 2.1432


Training MLP:  67%|██████▋   | 20/30 [00:09<00:04,  2.08it/s]

Epoch 20, Loss: 1.2976


Training MLP: 100%|██████████| 30/30 [00:14<00:00,  2.08it/s]


Epoch 30, Loss: 2.1955
40320
X.shape: torch.Size([40320, 8])
y.shape: torch.Size([40320])
Starting DQN training for 100 epochs...


Training DQN:   2%|▏         | 2/100 [00:00<00:11,  8.89it/s]

Epoch   0 | Loss: 0.8001 | Hinge: 0.7727 | Anchor: 0.0000 | Times - RW: 0.05s, Bellman: 0.03s, Train: 0.05s


Training DQN:   7%|▋         | 7/100 [00:00<00:09,  9.53it/s]

Epoch   5 | Loss: 0.4822 | Hinge: 0.4482 | Anchor: 0.0000 | Times - RW: 0.05s, Bellman: 0.01s, Train: 0.04s


Training DQN:  12%|█▏        | 12/100 [00:01<00:09,  9.05it/s]

Epoch  10 | Loss: 0.3849 | Hinge: 0.4041 | Anchor: 0.0000 | Times - RW: 0.05s, Bellman: 0.01s, Train: 0.05s


Training DQN:  17%|█▋        | 17/100 [00:01<00:09,  8.64it/s]

Epoch  15 | Loss: 0.3410 | Hinge: 0.2612 | Anchor: 0.0000 | Times - RW: 0.05s, Bellman: 0.01s, Train: 0.05s


Training DQN:  22%|██▏       | 22/100 [00:02<00:08,  9.21it/s]

Epoch  20 | Loss: 0.3118 | Hinge: 0.3009 | Anchor: 0.0000 | Times - RW: 0.05s, Bellman: 0.01s, Train: 0.05s


Training DQN:  27%|██▋       | 27/100 [00:03<00:08,  8.92it/s]

Epoch  25 | Loss: 0.2995 | Hinge: 0.3039 | Anchor: 0.0000 | Times - RW: 0.06s, Bellman: 0.01s, Train: 0.05s


Training DQN:  32%|███▏      | 32/100 [00:03<00:08,  8.09it/s]

Epoch  30 | Loss: 0.2817 | Hinge: 0.2806 | Anchor: 0.0000 | Times - RW: 0.05s, Bellman: 0.01s, Train: 0.05s


Training DQN:  37%|███▋      | 37/100 [00:04<00:06,  9.06it/s]

Epoch  35 | Loss: 0.2669 | Hinge: 0.2958 | Anchor: 0.0000 | Times - RW: 0.05s, Bellman: 0.01s, Train: 0.05s


Training DQN:  42%|████▏     | 42/100 [00:04<00:06,  9.28it/s]

Epoch  40 | Loss: 0.2642 | Hinge: 0.2851 | Anchor: 0.0000 | Times - RW: 0.05s, Bellman: 0.01s, Train: 0.05s


Training DQN:  47%|████▋     | 47/100 [00:05<00:05,  9.19it/s]

Epoch  45 | Loss: 0.2578 | Hinge: 0.2677 | Anchor: 0.0000 | Times - RW: 0.05s, Bellman: 0.01s, Train: 0.06s


Training DQN:  53%|█████▎    | 53/100 [00:05<00:04,  9.61it/s]

Epoch  50 | Loss: 0.2542 | Hinge: 0.2128 | Anchor: 0.0000 | Times - RW: 0.05s, Bellman: 0.01s, Train: 0.05s


Training DQN:  57%|█████▋    | 57/100 [00:06<00:04,  9.50it/s]

Epoch  55 | Loss: 0.2470 | Hinge: 0.2485 | Anchor: 0.0000 | Times - RW: 0.05s, Bellman: 0.01s, Train: 0.05s


Training DQN:  63%|██████▎   | 63/100 [00:06<00:03,  9.51it/s]

Epoch  60 | Loss: 0.2482 | Hinge: 0.2140 | Anchor: 0.0000 | Times - RW: 0.05s, Bellman: 0.01s, Train: 0.04s


Training DQN:  67%|██████▋   | 67/100 [00:07<00:03,  9.31it/s]

Epoch  65 | Loss: 0.2374 | Hinge: 0.2334 | Anchor: 0.0000 | Times - RW: 0.05s, Bellman: 0.01s, Train: 0.05s


Training DQN:  72%|███████▏  | 72/100 [00:07<00:03,  9.11it/s]

Epoch  70 | Loss: 0.2365 | Hinge: 0.2577 | Anchor: 0.0000 | Times - RW: 0.05s, Bellman: 0.01s, Train: 0.05s


Training DQN:  77%|███████▋  | 77/100 [00:08<00:02,  8.99it/s]

Epoch  75 | Loss: 0.2354 | Hinge: 0.2074 | Anchor: 0.0000 | Times - RW: 0.05s, Bellman: 0.01s, Train: 0.06s


Training DQN:  82%|████████▏ | 82/100 [00:09<00:01,  9.26it/s]

Epoch  80 | Loss: 0.2296 | Hinge: 0.2397 | Anchor: 0.0000 | Times - RW: 0.05s, Bellman: 0.01s, Train: 0.05s


Training DQN:  87%|████████▋ | 87/100 [00:09<00:01,  9.24it/s]

Epoch  85 | Loss: 0.2335 | Hinge: 0.2354 | Anchor: 0.0000 | Times - RW: 0.05s, Bellman: 0.01s, Train: 0.05s


Training DQN:  92%|█████████▏| 92/100 [00:10<00:00,  8.99it/s]

Epoch  90 | Loss: 0.2224 | Hinge: 0.1942 | Anchor: 0.0000 | Times - RW: 0.05s, Bellman: 0.01s, Train: 0.05s


Training DQN:  97%|█████████▋| 97/100 [00:10<00:00,  9.02it/s]

Epoch  95 | Loss: 0.2305 | Hinge: 0.2324 | Anchor: 0.0000 | Times - RW: 0.05s, Bellman: 0.01s, Train: 0.05s


Training DQN: 100%|██████████| 100/100 [00:11<00:00,  9.08it/s]


Training finished in 11.0s
{'n_permutations_length': 8, 'random_walks_type': 'non-backtracking-beam', 'n_random_walk_length': 28, 'n_random_walks_to_generate': 10000, 'n_random_walks_steps_back_to_ban': 8, 'model_type': 'MLP', 'list_layers_sizes': [18], 'n_epochs': 30, 'batch_size': 1024, 'lr': 0.001, 'n_epochs_dqn': 100, 'flag_dqn_round': False, 'n_random_walks_to_generate_dqn': 1000, 'verbose_loc': 5, 'lr_dqn': 0.001, 'beam_search_torch': True, 'beam_search_Fironov': False, 'beam_width': 1, 'n_steps_limit': 256, 'alpha_previous_cost_accumulation': 0, 'beam_search_models_or_heuristics': 'model_torch', 'ban_p0_p1_transposition_if_p0_lt_p1_ie_already_sorted': False, 'n_beam_search_steps_back_to_ban': 32, 'solve_random_or_longest_state': 'solve_LRX_longest', 'verbose': 100, 'mode': 'single_hard_hinge', 'w_anchor': 0.0, 'w_hinge': 1.0}

beam_width: 1
n= 8
n(n-1)/2= 28
Found Path Length: 28 flag_found_destination: True


In [8]:
path

[tensor([1, 0, 7, 6, 5, 4, 3, 2], device='cuda:0'),
 tensor([0, 1, 7, 6, 5, 4, 3, 2]),
 tensor([1, 7, 6, 5, 4, 3, 2, 0]),
 tensor([7, 1, 6, 5, 4, 3, 2, 0]),
 tensor([0, 7, 1, 6, 5, 4, 3, 2]),
 tensor([7, 0, 1, 6, 5, 4, 3, 2]),
 tensor([0, 1, 6, 5, 4, 3, 2, 7]),
 tensor([1, 6, 5, 4, 3, 2, 7, 0]),
 tensor([6, 5, 4, 3, 2, 7, 0, 1]),
 tensor([5, 4, 3, 2, 7, 0, 1, 6]),
 tensor([4, 5, 3, 2, 7, 0, 1, 6]),
 tensor([5, 3, 2, 7, 0, 1, 6, 4]),
 tensor([3, 5, 2, 7, 0, 1, 6, 4]),
 tensor([4, 3, 5, 2, 7, 0, 1, 6]),
 tensor([3, 4, 5, 2, 7, 0, 1, 6]),
 tensor([6, 3, 4, 5, 2, 7, 0, 1]),
 tensor([3, 6, 4, 5, 2, 7, 0, 1]),
 tensor([6, 4, 5, 2, 7, 0, 1, 3]),
 tensor([4, 6, 5, 2, 7, 0, 1, 3]),
 tensor([6, 5, 2, 7, 0, 1, 3, 4]),
 tensor([5, 6, 2, 7, 0, 1, 3, 4]),
 tensor([6, 2, 7, 0, 1, 3, 4, 5]),
 tensor([2, 7, 0, 1, 3, 4, 5, 6]),
 tensor([7, 2, 0, 1, 3, 4, 5, 6]),
 tensor([2, 0, 1, 3, 4, 5, 6, 7]),
 tensor([0, 2, 1, 3, 4, 5, 6, 7]),
 tensor([2, 1, 3, 4, 5, 6, 7, 0]),
 tensor([1, 2, 3, 4, 5, 6, 7, 0]),
 te

In [7]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import hsv_to_rgb
import ipywidgets as widgets
from IPython.display import display, clear_output


def visualize_tensor_sequence_notebook(tensor_list):
    """
    Интерактивная визуализация в Jupyter Notebook в виде линии шариков:
    - Использует ipywidgets.IntSlider и перерисовывает график в Output
    - tensor_list: список torch.Tensor размера (N, M)
    """
    M = tensor_list[0].shape[0]
    N = len(tensor_list)
    # позиции на линии: равномерные точки по оси X
    xs = np.arange(M)
    ys = np.zeros(M)

    # слайдер и область вывода
    slider = widgets.IntSlider(value=0, min=0, max=N-1, step=1, description='Frame')
    out = widgets.Output()
    
    def plot_frame(idx):
        vals = tensor_list[idx]
        arr = vals.cpu().numpy().astype(float)
        norm = arr / arr.max()
        hsv = np.stack([np.zeros_like(norm), norm, np.ones_like(norm)], axis=1)
        rgb = hsv_to_rgb(hsv)
        
        with out:
            clear_output(wait=True)
            fig, ax = plt.subplots(figsize=(8,2))
            ax.set_aspect('equal')
            ax.axis('off')
            # рисуем шарики вдоль линии
            scatter = ax.scatter(xs, ys, s=800, color=rgb)
            # цифры внутри шариков
            for x, y, v in zip(xs, ys, vals):
                ax.text(x, y, str(v.item()), ha='center', va='center', fontsize=12, color='black')
            # статус 'start' и 'done'
            status = 'start' if idx == 0 else ('done' if idx == N-1 else '')
            ax.text(M/2 - 0.5, 0.3, status, ha='center', va='bottom', fontsize=16)
            # установка границ
            ax.set_xlim(-1, M)
            ax.set_ylim(-1, 1)
            plt.show()

    # подписка на изменение слайдера
    slider.observe(lambda change: plot_frame(change['new']), names='value')
    display(slider, out)
    # отрисовка начального кадра
    plot_frame(0)


visualize_tensor_sequence_notebook(path)

IntSlider(value=0, description='Frame', max=28)

Output()