# HPO

In [1]:
%cd ..

/Users/uribagi/Documents/GitHub/Latent-IQP


In [2]:
import iqpopt as iqp
from iqpopt.utils import initialize_from_data, local_gates
import iqpopt.gen_qml as genq
from iqpopt.gen_qml.utils import median_heuristic
import optuna
import pennylane as qml
import jax
from jax import numpy as jnp
from utils.nisq import aachen_connectivity, efficient_connectivity_gates
from datasets.bipartites import BipartiteGraphDataset
from datasets.er import ErdosRenyiGraphDataset
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
NODES = 12
TYPE = "Bipartite"
CONN = "Dense"
NUM_LAYERS = 1
QUBITS = NODES * (NODES - 1) //2

In [4]:
ds_path = f'./datasets/raw_data/{NODES}N_{TYPE}_{CONN}.pkl'
train_ds = jnp.array(BipartiteGraphDataset(nodes = 1, edge_prob=0.1).from_file(ds_path).vectors.copy())

[Dataset] Loaded 496 samples from ./datasets/raw_data/12N_Bipartite_Dense.pkl
  Created: 2025-05-30T13:15:50.624528
  Unique graphs: 496
  Version: 1.0


In [5]:
grid_conn = aachen_connectivity()
gates = efficient_connectivity_gates(grid_conn, QUBITS, 1) 
circ = iqp.IqpSimulator(QUBITS, gates, device='lightning.qubit')

base_key = jax.random.PRNGKey(42)

In [6]:
base_sigma = median_heuristic(train_ds)

In [7]:
from utils.hpo import run_hpo

In [8]:
study = run_hpo(
    grid_conn,
    QUBITS,
    base_sigma,
    train_ds = train_ds,
    n_trials = 100,
    n_iters_hpo = 250,
    n_ops = 2000,
    n_samples = 2000,
)

[I 2025-06-02 13:49:17,485] A new study created in memory with name: no-name-394139ff-ac6a-4b77-836e-09d422b4b48b


Trial 0:
  Learning Rate: 0.0030988224367687034
  Sigma Multiplier: 0.9745703427996208
  Initialization Multiplier: 2.18430404299934
  Number of Layers: 1


Training Progress: 100%|██████████| 250/250 [00:06<00:00, 39.09it/s, loss=0.012200, elapsed time=0.02, total time=7.07]
[I 2025-06-02 13:49:24,759] Trial 0 finished with value: 0.012199577876545345 and parameters: {'learning_rate': 0.0030988224367687034, 'sigma_multiplier': 0.9745703427996208, 'num_layers': 1, 'initialization_multiplier': 2.18430404299934}. Best is trial 0 with value: 0.012199577876545345.


Training has not converged after 250 steps
Trial 0 final loss: 0.01219958
Trial 1:
  Learning Rate: 5.7983026364449676e-05
  Sigma Multiplier: 0.6819456822529598
  Initialization Multiplier: 1.0129629921695478
  Number of Layers: 1


Training Progress: 100%|██████████| 250/250 [00:08<00:00, 30.05it/s, loss=0.025263, elapsed time=0.03, total time=8.45]
[I 2025-06-02 13:49:33,221] Trial 1 finished with value: 0.025263333883655755 and parameters: {'learning_rate': 5.7983026364449676e-05, 'sigma_multiplier': 0.6819456822529598, 'num_layers': 1, 'initialization_multiplier': 1.0129629921695478}. Best is trial 0 with value: 0.012199577876545345.


Training has not converged after 250 steps
Trial 1 final loss: 0.02526333
Trial 2:
  Learning Rate: 4.384424570085708e-05
  Sigma Multiplier: 0.13547127827840832
  Initialization Multiplier: 2.375292734533423
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:11<00:00, 21.04it/s, loss=0.000039, elapsed time=0.08, total time=12]   
[I 2025-06-02 13:49:45,386] Trial 2 finished with value: 3.8753138159196967e-05 and parameters: {'learning_rate': 4.384424570085708e-05, 'sigma_multiplier': 0.13547127827840832, 'num_layers': 2, 'initialization_multiplier': 2.375292734533423}. Best is trial 2 with value: 3.8753138159196967e-05.


Training has not converged after 250 steps
Trial 2 final loss: 0.00003875
Trial 3:
  Learning Rate: 0.0063790952750710775
  Sigma Multiplier: 1.7530188391692856
  Initialization Multiplier: 2.9346881063217607
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:08<00:00, 28.46it/s, loss=0.003070, elapsed time=0.03, total time=8.92]
[I 2025-06-02 13:49:54,329] Trial 3 finished with value: 0.0030702655846754612 and parameters: {'learning_rate': 0.0063790952750710775, 'sigma_multiplier': 1.7530188391692856, 'num_layers': 2, 'initialization_multiplier': 2.9346881063217607}. Best is trial 2 with value: 3.8753138159196967e-05.


Training has not converged after 250 steps
Trial 3 final loss: 0.00307027
Trial 4:
  Learning Rate: 0.007494266230132738
  Sigma Multiplier: 0.7227704677891643
  Initialization Multiplier: 2.5124335560168634
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:12<00:00, 20.34it/s, loss=0.009332, elapsed time=0.09, total time=12.4]
[I 2025-06-02 13:50:06,780] Trial 4 finished with value: 0.00933186134298375 and parameters: {'learning_rate': 0.007494266230132738, 'sigma_multiplier': 0.7227704677891643, 'num_layers': 2, 'initialization_multiplier': 2.5124335560168634}. Best is trial 2 with value: 3.8753138159196967e-05.


Training has not converged after 250 steps
Trial 4 final loss: 0.00933186
Trial 5:
  Learning Rate: 0.0007668071985379338
  Sigma Multiplier: 0.8165822216391421
  Initialization Multiplier: 2.470785062083408
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:19<00:00, 12.97it/s, loss=0.028193, elapsed time=0.07, total time=19.4]
[I 2025-06-02 13:50:26,261] Trial 5 finished with value: 0.028192605568861207 and parameters: {'learning_rate': 0.0007668071985379338, 'sigma_multiplier': 0.8165822216391421, 'num_layers': 2, 'initialization_multiplier': 2.470785062083408}. Best is trial 2 with value: 3.8753138159196967e-05.


Training has not converged after 250 steps
Trial 5 final loss: 0.02819261
Trial 6:
  Learning Rate: 0.00015999847844585087
  Sigma Multiplier: 1.0556235679065191
  Initialization Multiplier: 2.5453114749211325
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:18<00:00, 13.30it/s, loss=0.067284, elapsed time=0.08, total time=19]  
[I 2025-06-02 13:50:45,341] Trial 6 finished with value: 0.06728395712753009 and parameters: {'learning_rate': 0.00015999847844585087, 'sigma_multiplier': 1.0556235679065191, 'num_layers': 2, 'initialization_multiplier': 2.5453114749211325}. Best is trial 2 with value: 3.8753138159196967e-05.


Training has not converged after 250 steps
Trial 6 final loss: 0.06728396
Trial 7:
  Learning Rate: 0.005253795942410954
  Sigma Multiplier: 0.2587428346396303
  Initialization Multiplier: 1.9810883794589766
  Number of Layers: 1


Training Progress: 100%|██████████| 250/250 [00:21<00:00, 11.65it/s, loss=0.005069, elapsed time=0.08, total time=21.7]
[I 2025-06-02 13:51:07,037] Trial 7 finished with value: 0.0050694508856351595 and parameters: {'learning_rate': 0.005253795942410954, 'sigma_multiplier': 0.2587428346396303, 'num_layers': 1, 'initialization_multiplier': 1.9810883794589766}. Best is trial 2 with value: 3.8753138159196967e-05.


Training has not converged after 250 steps
Trial 7 final loss: 0.00506945
Trial 8:
  Learning Rate: 5.361767339882885e-05
  Sigma Multiplier: 1.7775257057455593
  Initialization Multiplier: 2.512327420501116
  Number of Layers: 1


Training Progress: 100%|██████████| 250/250 [00:17<00:00, 14.17it/s, loss=0.073514, elapsed time=0.13, total time=17.9]
[I 2025-06-02 13:51:25,004] Trial 8 finished with value: 0.07351390762546811 and parameters: {'learning_rate': 5.361767339882885e-05, 'sigma_multiplier': 1.7775257057455593, 'num_layers': 1, 'initialization_multiplier': 2.512327420501116}. Best is trial 2 with value: 3.8753138159196967e-05.


Training has not converged after 250 steps
Trial 8 final loss: 0.07351391
Trial 9:
  Learning Rate: 0.0007657553291079904
  Sigma Multiplier: 1.9005461960621666
  Initialization Multiplier: 2.950068578087959
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:26<00:00,  9.30it/s, loss=0.019941, elapsed time=0.09, total time=27.1]
[I 2025-06-02 13:51:52,176] Trial 9 finished with value: 0.019941284690788947 and parameters: {'learning_rate': 0.0007657553291079904, 'sigma_multiplier': 1.9005461960621666, 'num_layers': 2, 'initialization_multiplier': 2.950068578087959}. Best is trial 2 with value: 3.8753138159196967e-05.


Training has not converged after 250 steps
Trial 9 final loss: 0.01994128
Trial 10:
  Learning Rate: 0.09815766567389977
  Sigma Multiplier: 0.14392117818133862
  Initialization Multiplier: 0.17476374996163235
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:41<00:00,  6.06it/s, loss=0.000091, elapsed time=0.19, total time=41.8] 
[I 2025-06-02 13:52:33,981] Trial 10 finished with value: 9.149615150379934e-05 and parameters: {'learning_rate': 0.09815766567389977, 'sigma_multiplier': 0.14392117818133862, 'num_layers': 2, 'initialization_multiplier': 0.17476374996163235}. Best is trial 2 with value: 3.8753138159196967e-05.


Training has not converged after 250 steps
Trial 10 final loss: 0.00009150
Trial 11:
  Learning Rate: 0.060919475288425895
  Sigma Multiplier: 0.18396334240000295
  Initialization Multiplier: 0.10187275743771562
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:41<00:00,  6.07it/s, loss=0.000349, elapsed time=0.2, total time=41.6] 
[I 2025-06-02 13:53:15,667] Trial 11 finished with value: 0.0003489374059112884 and parameters: {'learning_rate': 0.060919475288425895, 'sigma_multiplier': 0.18396334240000295, 'num_layers': 2, 'initialization_multiplier': 0.10187275743771562}. Best is trial 2 with value: 3.8753138159196967e-05.


Training has not converged after 250 steps
Trial 11 final loss: 0.00034894
Trial 12:
  Learning Rate: 0.06540099412896573
  Sigma Multiplier: 0.4155540988914753
  Initialization Multiplier: 1.0919896448038804
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:39<00:00,  6.33it/s, loss=0.016919, elapsed time=0.17, total time=40]  
[I 2025-06-02 13:53:55,684] Trial 12 finished with value: 0.016918966371006483 and parameters: {'learning_rate': 0.06540099412896573, 'sigma_multiplier': 0.4155540988914753, 'num_layers': 2, 'initialization_multiplier': 1.0919896448038804}. Best is trial 2 with value: 3.8753138159196967e-05.


Training has not converged after 250 steps
Trial 12 final loss: 0.01691897
Trial 13:
  Learning Rate: 0.02217697122204292
  Sigma Multiplier: 1.399875977432425
  Initialization Multiplier: 0.2832488408708043
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:30<00:00,  8.19it/s, loss=0.000967, elapsed time=0.13, total time=31]  
[I 2025-06-02 13:54:26,740] Trial 13 finished with value: 0.0009668162587197555 and parameters: {'learning_rate': 0.02217697122204292, 'sigma_multiplier': 1.399875977432425, 'num_layers': 2, 'initialization_multiplier': 0.2832488408708043}. Best is trial 2 with value: 3.8753138159196967e-05.


Training has not converged after 250 steps
Trial 13 final loss: 0.00096682
Trial 14:
  Learning Rate: 0.000270900651033801
  Sigma Multiplier: 0.47779646659265856
  Initialization Multiplier: 1.3911753577058223
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:39<00:00,  6.36it/s, loss=0.118251, elapsed time=0.21, total time=39.8]
[I 2025-06-02 13:55:06,535] Trial 14 finished with value: 0.11825142704569461 and parameters: {'learning_rate': 0.000270900651033801, 'sigma_multiplier': 0.47779646659265856, 'num_layers': 2, 'initialization_multiplier': 1.3911753577058223}. Best is trial 2 with value: 3.8753138159196967e-05.


Training has not converged after 250 steps
Trial 14 final loss: 0.11825143
Trial 15:
  Learning Rate: 0.01776342586072066
  Sigma Multiplier: 0.15113211929144402
  Initialization Multiplier: 0.634082016046001
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:43<00:00,  5.74it/s, loss=0.000212, elapsed time=0.19, total time=44]   
[I 2025-06-02 13:55:50,619] Trial 15 finished with value: 0.0002116453981385416 and parameters: {'learning_rate': 0.01776342586072066, 'sigma_multiplier': 0.15113211929144402, 'num_layers': 2, 'initialization_multiplier': 0.634082016046001}. Best is trial 2 with value: 3.8753138159196967e-05.


Training has not converged after 250 steps
Trial 15 final loss: 0.00021165
Trial 16:
  Learning Rate: 0.0008569691938129657
  Sigma Multiplier: 0.46936480494762944
  Initialization Multiplier: 1.757737841812363
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:44<00:00,  5.62it/s, loss=0.047873, elapsed time=0.22, total time=45]  
[I 2025-06-02 13:56:35,619] Trial 16 finished with value: 0.047873494320996725 and parameters: {'learning_rate': 0.0008569691938129657, 'sigma_multiplier': 0.46936480494762944, 'num_layers': 2, 'initialization_multiplier': 1.757737841812363}. Best is trial 2 with value: 3.8753138159196967e-05.


Training has not converged after 250 steps
Trial 16 final loss: 0.04787349
Trial 17:
  Learning Rate: 0.000171426296308462
  Sigma Multiplier: 1.3132953519256485
  Initialization Multiplier: 1.5210764836993003
  Number of Layers: 1


Training Progress: 100%|██████████| 250/250 [00:24<00:00, 10.27it/s, loss=0.036343, elapsed time=0.08, total time=24.9]
[I 2025-06-02 13:57:00,555] Trial 17 finished with value: 0.03634310476667784 and parameters: {'learning_rate': 0.000171426296308462, 'sigma_multiplier': 1.3132953519256485, 'num_layers': 1, 'initialization_multiplier': 1.5210764836993003}. Best is trial 2 with value: 3.8753138159196967e-05.


Training has not converged after 250 steps
Trial 17 final loss: 0.03634310
Trial 18:
  Learning Rate: 3.2766641942555026e-05
  Sigma Multiplier: 0.11746746646899822
  Initialization Multiplier: 0.4650171128473607
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:49<00:00,  5.10it/s, loss=-0.000007, elapsed time=0.19, total time=49.6]
[I 2025-06-02 13:57:50,163] Trial 18 finished with value: -6.6334704472575464e-06 and parameters: {'learning_rate': 3.2766641942555026e-05, 'sigma_multiplier': 0.11746746646899822, 'num_layers': 2, 'initialization_multiplier': 0.4650171128473607}. Best is trial 18 with value: -6.6334704472575464e-06.


Training has not converged after 250 steps
Trial 18 final loss: -0.00000663
Trial 19:
  Learning Rate: 3.264997312887791e-05
  Sigma Multiplier: 0.36799698306477924
  Initialization Multiplier: 0.6326344206000053
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:51<00:00,  4.89it/s, loss=0.030477, elapsed time=0.22, total time=51.8]
[I 2025-06-02 13:58:41,966] Trial 19 finished with value: 0.03047716955270618 and parameters: {'learning_rate': 3.264997312887791e-05, 'sigma_multiplier': 0.36799698306477924, 'num_layers': 2, 'initialization_multiplier': 0.6326344206000053}. Best is trial 18 with value: -6.6334704472575464e-06.


Training has not converged after 250 steps
Trial 19 final loss: 0.03047717
Trial 20:
  Learning Rate: 0.00010224712871110972
  Sigma Multiplier: 0.6062630899167613
  Initialization Multiplier: 1.2358517012582877
  Number of Layers: 1


Training Progress: 100%|██████████| 250/250 [00:34<00:00,  7.17it/s, loss=0.045343, elapsed time=0.11, total time=35.4]
[I 2025-06-02 13:59:17,447] Trial 20 finished with value: 0.045342642773202026 and parameters: {'learning_rate': 0.00010224712871110972, 'sigma_multiplier': 0.6062630899167613, 'num_layers': 1, 'initialization_multiplier': 1.2358517012582877}. Best is trial 18 with value: -6.6334704472575464e-06.


Training has not converged after 250 steps
Trial 20 final loss: 0.04534264
Trial 21:
  Learning Rate: 3.215879552474536e-05
  Sigma Multiplier: 0.13412454552915654
  Initialization Multiplier: 0.4877487597610717
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:46<00:00,  5.32it/s, loss=0.000065, elapsed time=0.19, total time=47.5] 
[I 2025-06-02 14:00:05,003] Trial 21 finished with value: 6.527232255739707e-05 and parameters: {'learning_rate': 3.215879552474536e-05, 'sigma_multiplier': 0.13412454552915654, 'num_layers': 2, 'initialization_multiplier': 0.4877487597610717}. Best is trial 18 with value: -6.6334704472575464e-06.


Training has not converged after 250 steps
Trial 21 final loss: 0.00006527
Trial 22:
  Learning Rate: 3.408490136971628e-05
  Sigma Multiplier: 0.300062268230022
  Initialization Multiplier: 0.6157396291372602
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:45<00:00,  5.46it/s, loss=0.012071, elapsed time=0.15, total time=46.4]
[I 2025-06-02 14:00:51,489] Trial 22 finished with value: 0.012070592525159329 and parameters: {'learning_rate': 3.408490136971628e-05, 'sigma_multiplier': 0.300062268230022, 'num_layers': 2, 'initialization_multiplier': 0.6157396291372602}. Best is trial 18 with value: -6.6334704472575464e-06.


Training has not converged after 250 steps
Trial 22 final loss: 0.01207059
Trial 23:
  Learning Rate: 0.00035480806305818944
  Sigma Multiplier: 0.10933958804248853
  Initialization Multiplier: 0.819516817671665
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:51<00:00,  4.81it/s, loss=-0.000070, elapsed time=0.33, total time=52.4]
[I 2025-06-02 14:01:43,945] Trial 23 finished with value: -6.997161935930872e-05 and parameters: {'learning_rate': 0.00035480806305818944, 'sigma_multiplier': 0.10933958804248853, 'num_layers': 2, 'initialization_multiplier': 0.819516817671665}. Best is trial 23 with value: -6.997161935930872e-05.


Training has not converged after 250 steps
Trial 23 final loss: -0.00006997
Trial 24:
  Learning Rate: 0.00035323058658548256
  Sigma Multiplier: 0.572409896948295
  Initialization Multiplier: 1.0914004760168616
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:48<00:00,  5.18it/s, loss=0.068499, elapsed time=0.21, total time=49.1]
[I 2025-06-02 14:02:33,116] Trial 24 finished with value: 0.06849944057807687 and parameters: {'learning_rate': 0.00035323058658548256, 'sigma_multiplier': 0.572409896948295, 'num_layers': 2, 'initialization_multiplier': 1.0914004760168616}. Best is trial 23 with value: -6.997161935930872e-05.


Training has not converged after 250 steps
Trial 24 final loss: 0.06849944
Trial 25:
  Learning Rate: 8.69486373209723e-05
  Sigma Multiplier: 0.33578699300534354
  Initialization Multiplier: 0.8189820704549295
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:52<00:00,  4.76it/s, loss=0.022469, elapsed time=0.12, total time=53.3]
[I 2025-06-02 14:03:26,446] Trial 25 finished with value: 0.02246941963829405 and parameters: {'learning_rate': 8.69486373209723e-05, 'sigma_multiplier': 0.33578699300534354, 'num_layers': 2, 'initialization_multiplier': 0.8189820704549295}. Best is trial 23 with value: -6.997161935930872e-05.


Training has not converged after 250 steps
Trial 25 final loss: 0.02246942
Trial 26:
  Learning Rate: 0.00033040442017843866
  Sigma Multiplier: 0.8613764095486182
  Initialization Multiplier: 1.7163290932379334
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:41<00:00,  6.09it/s, loss=0.108393, elapsed time=0.12, total time=41.8]
[I 2025-06-02 14:04:08,274] Trial 26 finished with value: 0.10839318489870084 and parameters: {'learning_rate': 0.00033040442017843866, 'sigma_multiplier': 0.8613764095486182, 'num_layers': 2, 'initialization_multiplier': 1.7163290932379334}. Best is trial 23 with value: -6.997161935930872e-05.


Training has not converged after 250 steps
Trial 26 final loss: 0.10839318
Trial 27:
  Learning Rate: 0.001374744389906681
  Sigma Multiplier: 0.10273866779957488
  Initialization Multiplier: 0.3772319803338617
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:54<00:00,  4.57it/s, loss=0.000043, elapsed time=0.25, total time=55.4] 
[I 2025-06-02 14:05:03,687] Trial 27 finished with value: 4.33914066594536e-05 and parameters: {'learning_rate': 0.001374744389906681, 'sigma_multiplier': 0.10273866779957488, 'num_layers': 2, 'initialization_multiplier': 0.3772319803338617}. Best is trial 23 with value: -6.997161935930872e-05.


Training has not converged after 250 steps
Trial 27 final loss: 0.00004339
Trial 28:
  Learning Rate: 0.00011252731772529997
  Sigma Multiplier: 1.251543138908185
  Initialization Multiplier: 0.7935875195986992
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:36<00:00,  6.90it/s, loss=0.034845, elapsed time=0.11, total time=36.9]
[I 2025-06-02 14:05:40,600] Trial 28 finished with value: 0.034844990378658694 and parameters: {'learning_rate': 0.00011252731772529997, 'sigma_multiplier': 1.251543138908185, 'num_layers': 2, 'initialization_multiplier': 0.7935875195986992}. Best is trial 23 with value: -6.997161935930872e-05.


Training has not converged after 250 steps
Trial 28 final loss: 0.03484499
Trial 29:
  Learning Rate: 0.0021926972249298915
  Sigma Multiplier: 1.0144103743352497
  Initialization Multiplier: 1.9984276827855134
  Number of Layers: 1


Training Progress: 100%|██████████| 250/250 [00:29<00:00,  8.54it/s, loss=0.009538, elapsed time=0.13, total time=29.9]
[I 2025-06-02 14:06:10,519] Trial 29 finished with value: 0.009537604393212752 and parameters: {'learning_rate': 0.0021926972249298915, 'sigma_multiplier': 1.0144103743352497, 'num_layers': 1, 'initialization_multiplier': 1.9984276827855134}. Best is trial 23 with value: -6.997161935930872e-05.


Training has not converged after 250 steps
Trial 29 final loss: 0.00953760
Trial 30:
  Learning Rate: 5.856601143440028e-05
  Sigma Multiplier: 0.24939716809771056
  Initialization Multiplier: 2.2298200330308564
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:49<00:00,  5.01it/s, loss=0.006496, elapsed time=0.22, total time=50.7]
[I 2025-06-02 14:07:01,292] Trial 30 finished with value: 0.006496070808597438 and parameters: {'learning_rate': 5.856601143440028e-05, 'sigma_multiplier': 0.24939716809771056, 'num_layers': 2, 'initialization_multiplier': 2.2298200330308564}. Best is trial 23 with value: -6.997161935930872e-05.


Training has not converged after 250 steps
Trial 30 final loss: 0.00649607
Trial 31:
  Learning Rate: 0.0012347770629328294
  Sigma Multiplier: 0.11481458675563812
  Initialization Multiplier: 0.0017256703834985232
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:46<00:00,  5.35it/s, loss=0.001801, elapsed time=0.17, total time=47.4]
[I 2025-06-02 14:07:48,805] Trial 31 finished with value: 0.0018007780030210331 and parameters: {'learning_rate': 0.0012347770629328294, 'sigma_multiplier': 0.11481458675563812, 'num_layers': 2, 'initialization_multiplier': 0.0017256703834985232}. Best is trial 23 with value: -6.997161935930872e-05.


Training has not converged after 250 steps
Trial 31 final loss: 0.00180078
Trial 32:
  Learning Rate: 0.0029537825769076187
  Sigma Multiplier: 0.27692328238237535
  Initialization Multiplier: 0.28513420344094886
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:47<00:00,  5.26it/s, loss=0.005687, elapsed time=0.21, total time=48.2]
[I 2025-06-02 14:08:37,074] Trial 32 finished with value: 0.005687356910577611 and parameters: {'learning_rate': 0.0029537825769076187, 'sigma_multiplier': 0.27692328238237535, 'num_layers': 2, 'initialization_multiplier': 0.28513420344094886}. Best is trial 23 with value: -6.997161935930872e-05.


Training has not converged after 250 steps
Trial 32 final loss: 0.00568736
Trial 33:
  Learning Rate: 0.00040978919734662995
  Sigma Multiplier: 0.5405728603164593
  Initialization Multiplier: 0.4274280157424315
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:43<00:00,  5.77it/s, loss=0.017430, elapsed time=0.18, total time=43.9]
[I 2025-06-02 14:09:21,117] Trial 33 finished with value: 0.017429814796095417 and parameters: {'learning_rate': 0.00040978919734662995, 'sigma_multiplier': 0.5405728603164593, 'num_layers': 2, 'initialization_multiplier': 0.4274280157424315}. Best is trial 23 with value: -6.997161935930872e-05.


Training has not converged after 250 steps
Trial 33 final loss: 0.01742981
Trial 34:
  Learning Rate: 0.001440211159952513
  Sigma Multiplier: 0.3936353878848041
  Initialization Multiplier: 0.8287243648693841
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:48<00:00,  5.20it/s, loss=0.017058, elapsed time=0.16, total time=48.5]
[I 2025-06-02 14:10:09,756] Trial 34 finished with value: 0.017058248081825066 and parameters: {'learning_rate': 0.001440211159952513, 'sigma_multiplier': 0.3936353878848041, 'num_layers': 2, 'initialization_multiplier': 0.8287243648693841}. Best is trial 23 with value: -6.997161935930872e-05.


Training has not converged after 250 steps
Trial 34 final loss: 0.01705825
Trial 35:
  Learning Rate: 5.945717379598693e-05
  Sigma Multiplier: 0.6896626937416791
  Initialization Multiplier: 2.750486080103608
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:42<00:00,  5.89it/s, loss=0.164523, elapsed time=0.14, total time=43]  
[I 2025-06-02 14:10:52,837] Trial 35 finished with value: 0.16452269220539859 and parameters: {'learning_rate': 5.945717379598693e-05, 'sigma_multiplier': 0.6896626937416791, 'num_layers': 2, 'initialization_multiplier': 2.750486080103608}. Best is trial 23 with value: -6.997161935930872e-05.


Training has not converged after 250 steps
Trial 35 final loss: 0.16452269
Trial 36:
  Learning Rate: 0.0001923033285415814
  Sigma Multiplier: 0.11009594038825848
  Initialization Multiplier: 3.1136321304819368
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:49<00:00,  5.10it/s, loss=0.000015, elapsed time=0.18, total time=49.6] 
[I 2025-06-02 14:11:42,523] Trial 36 finished with value: 1.503705921597053e-05 and parameters: {'learning_rate': 0.0001923033285415814, 'sigma_multiplier': 0.11009594038825848, 'num_layers': 2, 'initialization_multiplier': 3.1136321304819368}. Best is trial 23 with value: -6.997161935930872e-05.


Training has not converged after 250 steps
Trial 36 final loss: 0.00001504
Trial 37:
  Learning Rate: 0.00019265749068432897
  Sigma Multiplier: 0.2391549694881588
  Initialization Multiplier: 3.1399695999548918
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:47<00:00,  5.30it/s, loss=0.004402, elapsed time=0.18, total time=47.7]
[I 2025-06-02 14:12:30,309] Trial 37 finished with value: 0.004401626124933161 and parameters: {'learning_rate': 0.00019265749068432897, 'sigma_multiplier': 0.2391549694881588, 'num_layers': 2, 'initialization_multiplier': 3.1399695999548918}. Best is trial 23 with value: -6.997161935930872e-05.


Training has not converged after 250 steps
Trial 37 final loss: 0.00440163
Trial 38:
  Learning Rate: 0.0005763326759636173
  Sigma Multiplier: 0.7912283784302491
  Initialization Multiplier: 2.2907700671528834
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:40<00:00,  6.22it/s, loss=0.044486, elapsed time=0.22, total time=40.8]
[I 2025-06-02 14:13:11,216] Trial 38 finished with value: 0.04448618677770022 and parameters: {'learning_rate': 0.0005763326759636173, 'sigma_multiplier': 0.7912283784302491, 'num_layers': 2, 'initialization_multiplier': 2.2907700671528834}. Best is trial 23 with value: -6.997161935930872e-05.


Training has not converged after 250 steps
Trial 38 final loss: 0.04448619
Trial 39:
  Learning Rate: 7.283230772280661e-05
  Sigma Multiplier: 0.2235653304202207
  Initialization Multiplier: 2.7429948855414343
  Number of Layers: 1


Training Progress: 100%|██████████| 250/250 [00:38<00:00,  6.52it/s, loss=0.002289, elapsed time=0.17, total time=38.9]
[I 2025-06-02 14:13:50,108] Trial 39 finished with value: 0.0022888814581733118 and parameters: {'learning_rate': 7.283230772280661e-05, 'sigma_multiplier': 0.2235653304202207, 'num_layers': 1, 'initialization_multiplier': 2.7429948855414343}. Best is trial 23 with value: -6.997161935930872e-05.


Training has not converged after 250 steps
Trial 39 final loss: 0.00228888
Trial 40:
  Learning Rate: 0.00013350252401493813
  Sigma Multiplier: 1.4770289614885237
  Initialization Multiplier: 1.9887308499082315
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:34<00:00,  7.35it/s, loss=0.044541, elapsed time=0.09, total time=34.6]
[I 2025-06-02 14:14:24,754] Trial 40 finished with value: 0.044540848267062484 and parameters: {'learning_rate': 0.00013350252401493813, 'sigma_multiplier': 1.4770289614885237, 'num_layers': 2, 'initialization_multiplier': 1.9887308499082315}. Best is trial 23 with value: -6.997161935930872e-05.


Training has not converged after 250 steps
Trial 40 final loss: 0.04454085
Trial 41:
  Learning Rate: 0.00022758281853282574
  Sigma Multiplier: 0.11447623985387137
  Initialization Multiplier: 0.3542086936170296
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:48<00:00,  5.19it/s, loss=-0.000077, elapsed time=0.15, total time=48.6]
[I 2025-06-02 14:15:13,414] Trial 41 finished with value: -7.705698844984952e-05 and parameters: {'learning_rate': 0.00022758281853282574, 'sigma_multiplier': 0.11447623985387137, 'num_layers': 2, 'initialization_multiplier': 0.3542086936170296}. Best is trial 41 with value: -7.705698844984952e-05.


Training has not converged after 250 steps
Trial 41 final loss: -0.00007706
Trial 42:
  Learning Rate: 0.00022022988634327935
  Sigma Multiplier: 0.33126977055326207
  Initialization Multiplier: 2.7004450219375777
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:47<00:00,  5.31it/s, loss=0.025654, elapsed time=0.23, total time=47.7]
[I 2025-06-02 14:16:01,200] Trial 42 finished with value: 0.025653862172690736 and parameters: {'learning_rate': 0.00022022988634327935, 'sigma_multiplier': 0.33126977055326207, 'num_layers': 2, 'initialization_multiplier': 2.7004450219375777}. Best is trial 41 with value: -7.705698844984952e-05.


Training has not converged after 250 steps
Trial 42 final loss: 0.02565386
Trial 43:
  Learning Rate: 3.9760375610828714e-05
  Sigma Multiplier: 0.20451803180019718
  Initialization Multiplier: 3.018444638135189
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:46<00:00,  5.42it/s, loss=0.001095, elapsed time=0.12, total time=46.8]
[I 2025-06-02 14:16:48,137] Trial 43 finished with value: 0.0010948175766858704 and parameters: {'learning_rate': 3.9760375610828714e-05, 'sigma_multiplier': 0.20451803180019718, 'num_layers': 2, 'initialization_multiplier': 3.018444638135189}. Best is trial 41 with value: -7.705698844984952e-05.


Training has not converged after 250 steps
Trial 43 final loss: 0.00109482
Trial 44:
  Learning Rate: 0.0005177281273261084
  Sigma Multiplier: 0.10270998902340822
  Initialization Multiplier: 0.2078236777232545
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:46<00:00,  5.36it/s, loss=0.000007, elapsed time=0.1, total time=47.3]  
[I 2025-06-02 14:17:35,481] Trial 44 finished with value: 6.790814732771576e-06 and parameters: {'learning_rate': 0.0005177281273261084, 'sigma_multiplier': 0.10270998902340822, 'num_layers': 2, 'initialization_multiplier': 0.2078236777232545}. Best is trial 41 with value: -7.705698844984952e-05.


Training has not converged after 250 steps
Trial 44 final loss: 0.00000679
Trial 45:
  Learning Rate: 0.0005469113230904252
  Sigma Multiplier: 0.47815171596382844
  Initialization Multiplier: 0.23294377216063586
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:44<00:00,  5.62it/s, loss=0.067594, elapsed time=0.24, total time=45.1]
[I 2025-06-02 14:18:20,661] Trial 45 finished with value: 0.06759420151009428 and parameters: {'learning_rate': 0.0005469113230904252, 'sigma_multiplier': 0.47815171596382844, 'num_layers': 2, 'initialization_multiplier': 0.23294377216063586}. Best is trial 41 with value: -7.705698844984952e-05.


Training has not converged after 250 steps
Trial 45 final loss: 0.06759420
Trial 46:
  Learning Rate: 0.000491798547172819
  Sigma Multiplier: 0.10274722745081276
  Initialization Multiplier: 0.03701906274782413
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:48<00:00,  5.16it/s, loss=0.006713, elapsed time=0.21, total time=49]  
[I 2025-06-02 14:19:09,761] Trial 46 finished with value: 0.00671324654429491 and parameters: {'learning_rate': 0.000491798547172819, 'sigma_multiplier': 0.10274722745081276, 'num_layers': 2, 'initialization_multiplier': 0.03701906274782413}. Best is trial 41 with value: -7.705698844984952e-05.


Training has not converged after 250 steps
Trial 46 final loss: 0.00671325
Trial 47:
  Learning Rate: 0.0009292924334336538
  Sigma Multiplier: 0.2099629239120814
  Initialization Multiplier: 0.9425564368331638
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:47<00:00,  5.23it/s, loss=0.001498, elapsed time=0.13, total time=48.5]
[I 2025-06-02 14:19:58,280] Trial 47 finished with value: 0.0014976413590315062 and parameters: {'learning_rate': 0.0009292924334336538, 'sigma_multiplier': 0.2099629239120814, 'num_layers': 2, 'initialization_multiplier': 0.9425564368331638}. Best is trial 41 with value: -7.705698844984952e-05.


Training has not converged after 250 steps
Trial 47 final loss: 0.00149764
Trial 48:
  Learning Rate: 0.0002588627284945882
  Sigma Multiplier: 1.1483419698114314
  Initialization Multiplier: 0.5446592487097367
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:37<00:00,  6.72it/s, loss=0.006106, elapsed time=0.16, total time=37.8]
[I 2025-06-02 14:20:36,199] Trial 48 finished with value: 0.00610609435024011 and parameters: {'learning_rate': 0.0002588627284945882, 'sigma_multiplier': 1.1483419698114314, 'num_layers': 2, 'initialization_multiplier': 0.5446592487097367}. Best is trial 41 with value: -7.705698844984952e-05.


Training has not converged after 250 steps
Trial 48 final loss: 0.00610609
Trial 49:
  Learning Rate: 0.00013939523966349963
  Sigma Multiplier: 1.6517616505833301
  Initialization Multiplier: 0.3825879314665842
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:32<00:00,  7.70it/s, loss=0.010440, elapsed time=0.17, total time=32.9]
[I 2025-06-02 14:21:09,195] Trial 49 finished with value: 0.010440186027404536 and parameters: {'learning_rate': 0.00013939523966349963, 'sigma_multiplier': 1.6517616505833301, 'num_layers': 2, 'initialization_multiplier': 0.3825879314665842}. Best is trial 41 with value: -7.705698844984952e-05.


Training has not converged after 250 steps
Trial 49 final loss: 0.01044019
Trial 50:
  Learning Rate: 0.004572624104325846
  Sigma Multiplier: 0.43960486647143737
  Initialization Multiplier: 0.699926767759785
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:45<00:00,  5.52it/s, loss=0.015948, elapsed time=0.22, total time=45.9]
[I 2025-06-02 14:21:55,150] Trial 50 finished with value: 0.01594796077702874 and parameters: {'learning_rate': 0.004572624104325846, 'sigma_multiplier': 0.43960486647143737, 'num_layers': 2, 'initialization_multiplier': 0.699926767759785}. Best is trial 41 with value: -7.705698844984952e-05.


Training has not converged after 250 steps
Trial 50 final loss: 0.01594796
Trial 51:
  Learning Rate: 5.387851672561043e-05
  Sigma Multiplier: 0.17917048846658967
  Initialization Multiplier: 0.17375847727649008
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:45<00:00,  5.53it/s, loss=0.003922, elapsed time=0.15, total time=45.7]
[I 2025-06-02 14:22:40,936] Trial 51 finished with value: 0.00392175900545764 and parameters: {'learning_rate': 5.387851672561043e-05, 'sigma_multiplier': 0.17917048846658967, 'num_layers': 2, 'initialization_multiplier': 0.17375847727649008}. Best is trial 41 with value: -7.705698844984952e-05.


Training has not converged after 250 steps
Trial 51 final loss: 0.00392176
Trial 52:
  Learning Rate: 9.386260785079694e-05
  Sigma Multiplier: 0.30399893634753883
  Initialization Multiplier: 1.2354730550949622
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:47<00:00,  5.27it/s, loss=0.018983, elapsed time=0.14, total time=48.1]
[I 2025-06-02 14:23:30,111] Trial 52 finished with value: 0.018983002538627135 and parameters: {'learning_rate': 9.386260785079694e-05, 'sigma_multiplier': 0.30399893634753883, 'num_layers': 2, 'initialization_multiplier': 1.2354730550949622}. Best is trial 41 with value: -7.705698844984952e-05.


Training has not converged after 250 steps
Trial 52 final loss: 0.01898300
Trial 53:
  Learning Rate: 0.0001748766222806584
  Sigma Multiplier: 0.10008405071938553
  Initialization Multiplier: 2.389306601247285
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:46<00:00,  5.34it/s, loss=-0.000001, elapsed time=0.19, total time=47.4]
[I 2025-06-02 14:24:17,670] Trial 53 finished with value: -8.069865620358163e-07 and parameters: {'learning_rate': 0.0001748766222806584, 'sigma_multiplier': 0.10008405071938553, 'num_layers': 2, 'initialization_multiplier': 2.389306601247285}. Best is trial 41 with value: -7.705698844984952e-05.


Training has not converged after 250 steps
Trial 53 final loss: -0.00000081
Trial 54:
  Learning Rate: 0.00028175924431465284
  Sigma Multiplier: 0.3735050312297993
  Initialization Multiplier: 2.365172873548844
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:50<00:00,  4.92it/s, loss=0.041467, elapsed time=0.19, total time=51.4]
[I 2025-06-02 14:25:09,117] Trial 54 finished with value: 0.04146654441420814 and parameters: {'learning_rate': 0.00028175924431465284, 'sigma_multiplier': 0.3735050312297993, 'num_layers': 2, 'initialization_multiplier': 2.365172873548844}. Best is trial 41 with value: -7.705698844984952e-05.


Training has not converged after 250 steps
Trial 54 final loss: 0.04146654
Trial 55:
  Learning Rate: 0.000737121076479564
  Sigma Multiplier: 0.17251380520038417
  Initialization Multiplier: 2.5761581453016027
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:49<00:00,  5.07it/s, loss=0.000173, elapsed time=0.2, total time=50]   
[I 2025-06-02 14:25:59,296] Trial 55 finished with value: 0.00017326945902270544 and parameters: {'learning_rate': 0.000737121076479564, 'sigma_multiplier': 0.17251380520038417, 'num_layers': 2, 'initialization_multiplier': 2.5761581453016027}. Best is trial 41 with value: -7.705698844984952e-05.


Training has not converged after 250 steps
Trial 55 final loss: 0.00017327
Trial 56:
  Learning Rate: 0.00018896595822825527
  Sigma Multiplier: 0.2720546866834496
  Initialization Multiplier: 0.14057953713798943
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:45<00:00,  5.44it/s, loss=0.038181, elapsed time=0.18, total time=46.5]
[I 2025-06-02 14:26:45,845] Trial 56 finished with value: 0.03818148787091854 and parameters: {'learning_rate': 0.00018896595822825527, 'sigma_multiplier': 0.2720546866834496, 'num_layers': 2, 'initialization_multiplier': 0.14057953713798943}. Best is trial 41 with value: -7.705698844984952e-05.


Training has not converged after 250 steps
Trial 56 final loss: 0.03818149
Trial 57:
  Learning Rate: 0.00044826546408348626
  Sigma Multiplier: 0.1008563231725168
  Initialization Multiplier: 2.0805194845894777
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:47<00:00,  5.24it/s, loss=-0.000063, elapsed time=0.2, total time=48.5] 
[I 2025-06-02 14:27:34,496] Trial 57 finished with value: -6.289454233656324e-05 and parameters: {'learning_rate': 0.00044826546408348626, 'sigma_multiplier': 0.1008563231725168, 'num_layers': 2, 'initialization_multiplier': 2.0805194845894777}. Best is trial 41 with value: -7.705698844984952e-05.


Training has not converged after 250 steps
Trial 57 final loss: -0.00006289
Trial 58:
  Learning Rate: 0.0003894029623496448
  Sigma Multiplier: 0.18458040653652125
  Initialization Multiplier: 2.120148063050428
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:47<00:00,  5.27it/s, loss=0.000608, elapsed time=0.17, total time=48.1]
[I 2025-06-02 14:28:22,656] Trial 58 finished with value: 0.0006084913155499568 and parameters: {'learning_rate': 0.0003894029623496448, 'sigma_multiplier': 0.18458040653652125, 'num_layers': 2, 'initialization_multiplier': 2.120148063050428}. Best is trial 41 with value: -7.705698844984952e-05.


Training has not converged after 250 steps
Trial 58 final loss: 0.00060849
Trial 59:
  Learning Rate: 0.0006641885690741936
  Sigma Multiplier: 0.3337832579813812
  Initialization Multiplier: 1.8821240545763018
  Number of Layers: 1


Training Progress: 100%|██████████| 250/250 [00:38<00:00,  6.57it/s, loss=0.020560, elapsed time=0.18, total time=38.6]
[I 2025-06-02 14:29:01,276] Trial 59 finished with value: 0.02056002078899283 and parameters: {'learning_rate': 0.0006641885690741936, 'sigma_multiplier': 0.3337832579813812, 'num_layers': 1, 'initialization_multiplier': 1.8821240545763018}. Best is trial 41 with value: -7.705698844984952e-05.


Training has not converged after 250 steps
Trial 59 final loss: 0.02056002
Trial 60:
  Learning Rate: 0.0010123162720804854
  Sigma Multiplier: 0.416622742007297
  Initialization Multiplier: 0.31127809445662635
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:44<00:00,  5.64it/s, loss=0.021568, elapsed time=0.25, total time=45]  
[I 2025-06-02 14:29:46,377] Trial 60 finished with value: 0.021567826942685966 and parameters: {'learning_rate': 0.0010123162720804854, 'sigma_multiplier': 0.416622742007297, 'num_layers': 2, 'initialization_multiplier': 0.31127809445662635}. Best is trial 41 with value: -7.705698844984952e-05.


Training has not converged after 250 steps
Trial 60 final loss: 0.02156783
Trial 61:
  Learning Rate: 0.0002458312378236889
  Sigma Multiplier: 0.1024198698488968
  Initialization Multiplier: 0.47992268774770785
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:47<00:00,  5.21it/s, loss=0.000000, elapsed time=0.18, total time=48.5] 
[I 2025-06-02 14:30:34,997] Trial 61 finished with value: 2.7124489471413994e-07 and parameters: {'learning_rate': 0.0002458312378236889, 'sigma_multiplier': 0.1024198698488968, 'num_layers': 2, 'initialization_multiplier': 0.47992268774770785}. Best is trial 41 with value: -7.705698844984952e-05.


Training has not converged after 250 steps
Trial 61 final loss: 0.00000027
Trial 62:
  Learning Rate: 0.00042667754722022034
  Sigma Multiplier: 0.16143562596296304
  Initialization Multiplier: 0.4511416816330794
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:46<00:00,  5.32it/s, loss=0.000183, elapsed time=0.18, total time=47.6] 
[I 2025-06-02 14:31:22,770] Trial 62 finished with value: 0.00018328765523000596 and parameters: {'learning_rate': 0.00042667754722022034, 'sigma_multiplier': 0.16143562596296304, 'num_layers': 2, 'initialization_multiplier': 0.4511416816330794}. Best is trial 41 with value: -7.705698844984952e-05.


Training has not converged after 250 steps
Trial 62 final loss: 0.00018329
Trial 63:
  Learning Rate: 0.0002671584190892313
  Sigma Multiplier: 0.24343342931569595
  Initialization Multiplier: 0.5916415193239902
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:45<00:00,  5.47it/s, loss=0.003628, elapsed time=0.22, total time=46.3]
[I 2025-06-02 14:32:09,163] Trial 63 finished with value: 0.0036284409584355602 and parameters: {'learning_rate': 0.0002671584190892313, 'sigma_multiplier': 0.24343342931569595, 'num_layers': 2, 'initialization_multiplier': 0.5916415193239902}. Best is trial 41 with value: -7.705698844984952e-05.


Training has not converged after 250 steps
Trial 63 final loss: 0.00362844
Trial 64:
  Learning Rate: 0.00015810847701641458
  Sigma Multiplier: 1.961728266987437
  Initialization Multiplier: 1.6181987284298596
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:31<00:00,  7.83it/s, loss=0.069677, elapsed time=0.1, total time=32.6] 
[I 2025-06-02 14:32:41,825] Trial 64 finished with value: 0.06967672304053614 and parameters: {'learning_rate': 0.00015810847701641458, 'sigma_multiplier': 1.961728266987437, 'num_layers': 2, 'initialization_multiplier': 1.6181987284298596}. Best is trial 41 with value: -7.705698844984952e-05.


Training has not converged after 250 steps
Trial 64 final loss: 0.06967672
Trial 65:
  Learning Rate: 0.0004934380602216942
  Sigma Multiplier: 0.10919685893044441
  Initialization Multiplier: 0.7445586427307485
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:46<00:00,  5.36it/s, loss=-0.000160, elapsed time=0.24, total time=47.5]
[I 2025-06-02 14:33:29,431] Trial 65 finished with value: -0.00016024724958283408 and parameters: {'learning_rate': 0.0004934380602216942, 'sigma_multiplier': 0.10919685893044441, 'num_layers': 2, 'initialization_multiplier': 0.7445586427307485}. Best is trial 65 with value: -0.00016024724958283408.


Training has not converged after 250 steps
Trial 65 final loss: -0.00016025
Trial 66:
  Learning Rate: 0.0003256227111989637
  Sigma Multiplier: 0.2776355259859423
  Initialization Multiplier: 0.6929726395201087
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:45<00:00,  5.44it/s, loss=0.007982, elapsed time=0.21, total time=46.4]
[I 2025-06-02 14:34:15,914] Trial 66 finished with value: 0.007981862241576182 and parameters: {'learning_rate': 0.0003256227111989637, 'sigma_multiplier': 0.2776355259859423, 'num_layers': 2, 'initialization_multiplier': 0.6929726395201087}. Best is trial 65 with value: -0.00016024724958283408.


Training has not converged after 250 steps
Trial 66 final loss: 0.00798186
Trial 67:
  Learning Rate: 0.00013191124850917422
  Sigma Multiplier: 0.5265448843825438
  Initialization Multiplier: 0.8658819635995917
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:40<00:00,  6.16it/s, loss=0.060490, elapsed time=0.11, total time=41.1]
[I 2025-06-02 14:34:57,123] Trial 67 finished with value: 0.060490425163071404 and parameters: {'learning_rate': 0.00013191124850917422, 'sigma_multiplier': 0.5265448843825438, 'num_layers': 2, 'initialization_multiplier': 0.8658819635995917}. Best is trial 65 with value: -0.00016024724958283408.


Training has not converged after 250 steps
Trial 67 final loss: 0.06049043
Trial 68:
  Learning Rate: 0.0020296866604226993
  Sigma Multiplier: 0.16227979692800532
  Initialization Multiplier: 1.0687729631132774
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:40<00:00,  6.12it/s, loss=0.000213, elapsed time=0.14, total time=41.3] 
[I 2025-06-02 14:35:38,501] Trial 68 finished with value: 0.0002130439313398033 and parameters: {'learning_rate': 0.0020296866604226993, 'sigma_multiplier': 0.16227979692800532, 'num_layers': 2, 'initialization_multiplier': 1.0687729631132774}. Best is trial 65 with value: -0.00016024724958283408.


Training has not converged after 250 steps
Trial 68 final loss: 0.00021304
Trial 69:
  Learning Rate: 8.696174776060717e-05
  Sigma Multiplier: 0.35952056615107
  Initialization Multiplier: 0.7269442154072879
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:40<00:00,  6.19it/s, loss=0.029526, elapsed time=0.18, total time=40.9]
[I 2025-06-02 14:36:19,445] Trial 69 finished with value: 0.029526398217231895 and parameters: {'learning_rate': 8.696174776060717e-05, 'sigma_multiplier': 0.35952056615107, 'num_layers': 2, 'initialization_multiplier': 0.7269442154072879}. Best is trial 65 with value: -0.00016024724958283408.


Training has not converged after 250 steps
Trial 69 final loss: 0.02952640
Trial 70:
  Learning Rate: 0.0002234019319625696
  Sigma Multiplier: 0.21999874723281565
  Initialization Multiplier: 0.5238593824847105
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:40<00:00,  6.17it/s, loss=0.002115, elapsed time=0.14, total time=41]  
[I 2025-06-02 14:37:00,549] Trial 70 finished with value: 0.002115032128718597 and parameters: {'learning_rate': 0.0002234019319625696, 'sigma_multiplier': 0.21999874723281565, 'num_layers': 2, 'initialization_multiplier': 0.5238593824847105}. Best is trial 65 with value: -0.00016024724958283408.


Training has not converged after 250 steps
Trial 70 final loss: 0.00211503
Trial 71:
  Learning Rate: 0.0004840820296388809
  Sigma Multiplier: 0.10015947965043426
  Initialization Multiplier: 0.35895835024105993
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:41<00:00,  6.02it/s, loss=-0.000033, elapsed time=0.13, total time=42]  
[I 2025-06-02 14:37:42,616] Trial 71 finished with value: -3.31372108104453e-05 and parameters: {'learning_rate': 0.0004840820296388809, 'sigma_multiplier': 0.10015947965043426, 'num_layers': 2, 'initialization_multiplier': 0.35895835024105993}. Best is trial 65 with value: -0.00016024724958283408.


Training has not converged after 250 steps
Trial 71 final loss: -0.00003314
Trial 72:
  Learning Rate: 0.00033395779577054317
  Sigma Multiplier: 0.14820776569689384
  Initialization Multiplier: 2.1245172067784774
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:41<00:00,  6.04it/s, loss=0.000192, elapsed time=0.1, total time=42]    
[I 2025-06-02 14:38:24,636] Trial 72 finished with value: 0.00019239959240836298 and parameters: {'learning_rate': 0.00033395779577054317, 'sigma_multiplier': 0.14820776569689384, 'num_layers': 2, 'initialization_multiplier': 2.1245172067784774}. Best is trial 65 with value: -0.00016024724958283408.


Training has not converged after 250 steps
Trial 72 final loss: 0.00019240
Trial 73:
  Learning Rate: 0.0004074657474042596
  Sigma Multiplier: 0.27812524663218563
  Initialization Multiplier: 0.3530450088807621
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:39<00:00,  6.39it/s, loss=0.007736, elapsed time=0.14, total time=39.5]
[I 2025-06-02 14:39:04,229] Trial 73 finished with value: 0.007736171635112189 and parameters: {'learning_rate': 0.0004074657474042596, 'sigma_multiplier': 0.27812524663218563, 'num_layers': 2, 'initialization_multiplier': 0.3530450088807621}. Best is trial 65 with value: -0.00016024724958283408.


Training has not converged after 250 steps
Trial 73 final loss: 0.00773617
Trial 74:
  Learning Rate: 0.0006624963769879029
  Sigma Multiplier: 0.1444952723661477
  Initialization Multiplier: 0.9757758252494484
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:41<00:00,  6.02it/s, loss=0.000109, elapsed time=0.12, total time=41.8] 
[I 2025-06-02 14:39:46,074] Trial 74 finished with value: 0.00010894616123971007 and parameters: {'learning_rate': 0.0006624963769879029, 'sigma_multiplier': 0.1444952723661477, 'num_layers': 2, 'initialization_multiplier': 0.9757758252494484}. Best is trial 65 with value: -0.00016024724958283408.


Training has not converged after 250 steps
Trial 74 final loss: 0.00010895
Trial 75:
  Learning Rate: 0.011918396244140319
  Sigma Multiplier: 0.10426535411846505
  Initialization Multiplier: 1.2373991460389289
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:40<00:00,  6.22it/s, loss=0.000110, elapsed time=0.14, total time=40.7] 
[I 2025-06-02 14:40:26,867] Trial 75 finished with value: 0.00010951623754014092 and parameters: {'learning_rate': 0.011918396244140319, 'sigma_multiplier': 0.10426535411846505, 'num_layers': 2, 'initialization_multiplier': 1.2373991460389289}. Best is trial 65 with value: -0.00016024724958283408.


Training has not converged after 250 steps
Trial 75 final loss: 0.00010952
Trial 76:
  Learning Rate: 0.0012240364581473375
  Sigma Multiplier: 0.2182651035000303
  Initialization Multiplier: 0.4818692663395298
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:41<00:00,  6.05it/s, loss=0.001650, elapsed time=0.1, total time=41.8] 
[I 2025-06-02 14:41:08,744] Trial 76 finished with value: 0.001650007808873616 and parameters: {'learning_rate': 0.0012240364581473375, 'sigma_multiplier': 0.2182651035000303, 'num_layers': 2, 'initialization_multiplier': 0.4818692663395298}. Best is trial 65 with value: -0.00016024724958283408.


Training has not converged after 250 steps
Trial 76 final loss: 0.00165001
Trial 77:
  Learning Rate: 0.00023903495509522044
  Sigma Multiplier: 0.3097001472116993
  Initialization Multiplier: 2.453931920555994
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:40<00:00,  6.17it/s, loss=0.018505, elapsed time=0.14, total time=41.1]
[I 2025-06-02 14:41:49,890] Trial 77 finished with value: 0.0185049126936779 and parameters: {'learning_rate': 0.00023903495509522044, 'sigma_multiplier': 0.3097001472116993, 'num_layers': 2, 'initialization_multiplier': 2.453931920555994}. Best is trial 65 with value: -0.00016024724958283408.


Training has not converged after 250 steps
Trial 77 final loss: 0.01850491
Trial 78:
  Learning Rate: 0.0004782146067363031
  Sigma Multiplier: 0.1842187650033514
  Initialization Multiplier: 0.09064052550033097
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:39<00:00,  6.40it/s, loss=0.004662, elapsed time=0.14, total time=39.4]
[I 2025-06-02 14:42:29,357] Trial 78 finished with value: 0.004661592523374486 and parameters: {'learning_rate': 0.0004782146067363031, 'sigma_multiplier': 0.1842187650033514, 'num_layers': 2, 'initialization_multiplier': 0.09064052550033097}. Best is trial 65 with value: -0.00016024724958283408.


Training has not converged after 250 steps
Trial 78 final loss: 0.00466159
Trial 79:
  Learning Rate: 6.927375868625798e-05
  Sigma Multiplier: 0.8871649015325613
  Initialization Multiplier: 0.5918723527604929
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:32<00:00,  7.80it/s, loss=0.029226, elapsed time=0.09, total time=32.5]
[I 2025-06-02 14:43:01,962] Trial 79 finished with value: 0.029225707045705672 and parameters: {'learning_rate': 6.927375868625798e-05, 'sigma_multiplier': 0.8871649015325613, 'num_layers': 2, 'initialization_multiplier': 0.5918723527604929}. Best is trial 65 with value: -0.00016024724958283408.


Training has not converged after 250 steps
Trial 79 final loss: 0.02922571
Trial 80:
  Learning Rate: 4.439304555534926e-05
  Sigma Multiplier: 0.6273476816685759
  Initialization Multiplier: 0.42576739818616455
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:34<00:00,  7.33it/s, loss=0.033077, elapsed time=0.11, total time=34.5]
[I 2025-06-02 14:43:36,509] Trial 80 finished with value: 0.033077130719280944 and parameters: {'learning_rate': 4.439304555534926e-05, 'sigma_multiplier': 0.6273476816685759, 'num_layers': 2, 'initialization_multiplier': 0.42576739818616455}. Best is trial 65 with value: -0.00016024724958283408.


Training has not converged after 250 steps
Trial 80 final loss: 0.03307713
Trial 81:
  Learning Rate: 0.0005555560126789988
  Sigma Multiplier: 0.10083871403988327
  Initialization Multiplier: 0.27135335554615797
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:39<00:00,  6.33it/s, loss=-0.000157, elapsed time=0.22, total time=39.9]
[I 2025-06-02 14:44:16,478] Trial 81 finished with value: -0.00015650293619180636 and parameters: {'learning_rate': 0.0005555560126789988, 'sigma_multiplier': 0.10083871403988327, 'num_layers': 2, 'initialization_multiplier': 0.27135335554615797}. Best is trial 65 with value: -0.00016024724958283408.


Training has not converged after 250 steps
Trial 81 final loss: -0.00015650
Trial 82:
  Learning Rate: 0.000983431769417657
  Sigma Multiplier: 0.1507908537953837
  Initialization Multiplier: 0.25986961300165323
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:39<00:00,  6.40it/s, loss=0.000044, elapsed time=0.16, total time=39.5] 
[I 2025-06-02 14:44:56,028] Trial 82 finished with value: 4.382740997378134e-05 and parameters: {'learning_rate': 0.000983431769417657, 'sigma_multiplier': 0.1507908537953837, 'num_layers': 2, 'initialization_multiplier': 0.25986961300165323}. Best is trial 65 with value: -0.00016024724958283408.


Training has not converged after 250 steps
Trial 82 final loss: 0.00004383
Trial 83:
  Learning Rate: 0.000612001265634411
  Sigma Multiplier: 0.24015600086445307
  Initialization Multiplier: 0.35156980509142266
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:39<00:00,  6.40it/s, loss=0.003509, elapsed time=0.13, total time=39.5]
[I 2025-06-02 14:45:35,568] Trial 83 finished with value: 0.003509210399342991 and parameters: {'learning_rate': 0.000612001265634411, 'sigma_multiplier': 0.24015600086445307, 'num_layers': 2, 'initialization_multiplier': 0.35156980509142266}. Best is trial 65 with value: -0.00016024724958283408.


Training has not converged after 250 steps
Trial 83 final loss: 0.00350921
Trial 84:
  Learning Rate: 0.0003015077871316421
  Sigma Multiplier: 0.1080822249970184
  Initialization Multiplier: 0.7828279084446716
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:41<00:00,  6.01it/s, loss=-0.000096, elapsed time=0.16, total time=42]  
[I 2025-06-02 14:46:17,651] Trial 84 finished with value: -9.581247004602239e-05 and parameters: {'learning_rate': 0.0003015077871316421, 'sigma_multiplier': 0.1080822249970184, 'num_layers': 2, 'initialization_multiplier': 0.7828279084446716}. Best is trial 65 with value: -0.00016024724958283408.


Training has not converged after 250 steps
Trial 84 final loss: -0.00009581
Trial 85:
  Learning Rate: 0.0008056408872179125
  Sigma Multiplier: 0.19902193849228061
  Initialization Multiplier: 0.8955946274561344
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:41<00:00,  5.96it/s, loss=0.001329, elapsed time=0.2, total time=42.4] 
[I 2025-06-02 14:47:00,156] Trial 85 finished with value: 0.0013287889212997928 and parameters: {'learning_rate': 0.0008056408872179125, 'sigma_multiplier': 0.19902193849228061, 'num_layers': 2, 'initialization_multiplier': 0.8955946274561344}. Best is trial 65 with value: -0.00016024724958283408.


Training has not converged after 250 steps
Trial 85 final loss: 0.00132879
Trial 86:
  Learning Rate: 0.0001152088420177505
  Sigma Multiplier: 0.14743817618364713
  Initialization Multiplier: 0.6497205550217238
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:38<00:00,  6.45it/s, loss=0.000074, elapsed time=0.15, total time=39.4] 
[I 2025-06-02 14:47:39,613] Trial 86 finished with value: 7.35518690495183e-05 and parameters: {'learning_rate': 0.0001152088420177505, 'sigma_multiplier': 0.14743817618364713, 'num_layers': 2, 'initialization_multiplier': 0.6497205550217238}. Best is trial 65 with value: -0.00016024724958283408.


Training has not converged after 250 steps
Trial 86 final loss: 0.00007355
Trial 87:
  Learning Rate: 0.0003236030499961824
  Sigma Multiplier: 0.2610178823394462
  Initialization Multiplier: 1.3996671002677272
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:35<00:00,  7.14it/s, loss=0.008493, elapsed time=0.12, total time=35.5]
[I 2025-06-02 14:48:15,192] Trial 87 finished with value: 0.00849318544595386 and parameters: {'learning_rate': 0.0003236030499961824, 'sigma_multiplier': 0.2610178823394462, 'num_layers': 2, 'initialization_multiplier': 1.3996671002677272}. Best is trial 65 with value: -0.00016024724958283408.


Training has not converged after 250 steps
Trial 87 final loss: 0.00849319
Trial 88:
  Learning Rate: 0.00016988352456176475
  Sigma Multiplier: 0.10243741149083195
  Initialization Multiplier: 0.7485322857182461
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:34<00:00,  7.26it/s, loss=0.000097, elapsed time=0.09, total time=34.9] 
[I 2025-06-02 14:48:50,155] Trial 88 finished with value: 9.707193696499042e-05 and parameters: {'learning_rate': 0.00016988352456176475, 'sigma_multiplier': 0.10243741149083195, 'num_layers': 2, 'initialization_multiplier': 0.7485322857182461}. Best is trial 65 with value: -0.00016024724958283408.


Training has not converged after 250 steps
Trial 88 final loss: 0.00009707
Trial 89:
  Learning Rate: 0.00044818444323525174
  Sigma Multiplier: 0.306280219620099
  Initialization Multiplier: 2.650332530146974
  Number of Layers: 1


Training Progress: 100%|██████████| 250/250 [00:28<00:00,  8.81it/s, loss=0.021346, elapsed time=0.1, total time=28.7] 
[I 2025-06-02 14:49:18,937] Trial 89 finished with value: 0.021345741875223007 and parameters: {'learning_rate': 0.00044818444323525174, 'sigma_multiplier': 0.306280219620099, 'num_layers': 1, 'initialization_multiplier': 2.650332530146974}. Best is trial 65 with value: -0.00016024724958283408.


Training has not converged after 250 steps
Trial 89 final loss: 0.02134574
Trial 90:
  Learning Rate: 0.001571872597148199
  Sigma Multiplier: 0.206200155719083
  Initialization Multiplier: 0.7860267388439889
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:33<00:00,  7.54it/s, loss=0.001052, elapsed time=0.08, total time=33.6]
[I 2025-06-02 14:49:52,562] Trial 90 finished with value: 0.0010523601534106293 and parameters: {'learning_rate': 0.001571872597148199, 'sigma_multiplier': 0.206200155719083, 'num_layers': 2, 'initialization_multiplier': 0.7860267388439889}. Best is trial 65 with value: -0.00016024724958283408.


Training has not converged after 250 steps
Trial 90 final loss: 0.00105236
Trial 91:
  Learning Rate: 0.0002997383500050416
  Sigma Multiplier: 0.1011619356374892
  Initialization Multiplier: 0.5536439212216102
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:30<00:00,  8.16it/s, loss=-0.000058, elapsed time=0.1, total time=31]   
[I 2025-06-02 14:50:23,608] Trial 91 finished with value: -5.763720073077833e-05 and parameters: {'learning_rate': 0.0002997383500050416, 'sigma_multiplier': 0.1011619356374892, 'num_layers': 2, 'initialization_multiplier': 0.5536439212216102}. Best is trial 65 with value: -0.00016024724958283408.


Training has not converged after 250 steps
Trial 91 final loss: -0.00005764
Trial 92:
  Learning Rate: 0.00021409538427544282
  Sigma Multiplier: 0.1450485793339208
  Initialization Multiplier: 2.8647493892420393
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:27<00:00,  9.07it/s, loss=0.000104, elapsed time=0.1, total time=27.8]  
[I 2025-06-02 14:50:51,507] Trial 92 finished with value: 0.00010407627333945247 and parameters: {'learning_rate': 0.00021409538427544282, 'sigma_multiplier': 0.1450485793339208, 'num_layers': 2, 'initialization_multiplier': 2.8647493892420393}. Best is trial 65 with value: -0.00016024724958283408.


Training has not converged after 250 steps
Trial 92 final loss: 0.00010408
Trial 93:
  Learning Rate: 0.0005470890052219948
  Sigma Multiplier: 0.19359860152908548
  Initialization Multiplier: 0.5386245155222048
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:26<00:00,  9.40it/s, loss=0.000710, elapsed time=0.08, total time=26.9]
[I 2025-06-02 14:51:18,458] Trial 93 finished with value: 0.0007095219711358764 and parameters: {'learning_rate': 0.0005470890052219948, 'sigma_multiplier': 0.19359860152908548, 'num_layers': 2, 'initialization_multiplier': 0.5386245155222048}. Best is trial 65 with value: -0.00016024724958283408.


Training has not converged after 250 steps
Trial 93 final loss: 0.00070952
Trial 94:
  Learning Rate: 0.0003835090310221903
  Sigma Multiplier: 0.14224806829739894
  Initialization Multiplier: 0.27763510742337194
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:28<00:00,  8.92it/s, loss=0.000064, elapsed time=0.14, total time=28.3] 
[I 2025-06-02 14:51:46,813] Trial 94 finished with value: 6.365610926306309e-05 and parameters: {'learning_rate': 0.0003835090310221903, 'sigma_multiplier': 0.14224806829739894, 'num_layers': 2, 'initialization_multiplier': 0.27763510742337194}. Best is trial 65 with value: -0.00016024724958283408.


Training has not converged after 250 steps
Trial 94 final loss: 0.00006366
Trial 95:
  Learning Rate: 0.00030014473826126654
  Sigma Multiplier: 0.2421709872057139
  Initialization Multiplier: 0.09048376278843026
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:27<00:00,  8.95it/s, loss=0.033119, elapsed time=0.1, total time=28.2] 
[I 2025-06-02 14:52:15,048] Trial 95 finished with value: 0.033118551913249104 and parameters: {'learning_rate': 0.00030014473826126654, 'sigma_multiplier': 0.2421709872057139, 'num_layers': 2, 'initialization_multiplier': 0.09048376278843026}. Best is trial 65 with value: -0.00016024724958283408.


Training has not converged after 250 steps
Trial 95 final loss: 0.03311855
Trial 96:
  Learning Rate: 0.0001544394636087411
  Sigma Multiplier: 0.1355923859726877
  Initialization Multiplier: 0.6705085542526878
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:37<00:00,  6.73it/s, loss=0.000082, elapsed time=0.12, total time=37.4] 
[I 2025-06-02 14:52:52,529] Trial 96 finished with value: 8.185051396321652e-05 and parameters: {'learning_rate': 0.0001544394636087411, 'sigma_multiplier': 0.1355923859726877, 'num_layers': 2, 'initialization_multiplier': 0.6705085542526878}. Best is trial 65 with value: -0.00016024724958283408.


Training has not converged after 250 steps
Trial 96 final loss: 0.00008185
Trial 97:
  Learning Rate: 0.0007249791321099133
  Sigma Multiplier: 0.35018586575529964
  Initialization Multiplier: 1.853047693116239
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:29<00:00,  8.57it/s, loss=0.024691, elapsed time=0.08, total time=29.6]
[I 2025-06-02 14:53:22,183] Trial 97 finished with value: 0.024691175670029708 and parameters: {'learning_rate': 0.0007249791321099133, 'sigma_multiplier': 0.35018586575529964, 'num_layers': 2, 'initialization_multiplier': 1.853047693116239}. Best is trial 65 with value: -0.00016024724958283408.


Training has not converged after 250 steps
Trial 97 final loss: 0.02469118
Trial 98:
  Learning Rate: 0.00027690749895958617
  Sigma Multiplier: 0.18450952784276214
  Initialization Multiplier: 0.40049272690469995
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:27<00:00,  9.05it/s, loss=0.000427, elapsed time=0.09, total time=27.9]
[I 2025-06-02 14:53:50,137] Trial 98 finished with value: 0.00042685514187806563 and parameters: {'learning_rate': 0.00027690749895958617, 'sigma_multiplier': 0.18450952784276214, 'num_layers': 2, 'initialization_multiplier': 0.40049272690469995}. Best is trial 65 with value: -0.00016024724958283408.


Training has not converged after 250 steps
Trial 98 final loss: 0.00042686
Trial 99:
  Learning Rate: 0.0011236523169525575
  Sigma Multiplier: 0.39688773101369895
  Initialization Multiplier: 0.5862026855521589
  Number of Layers: 2


Training Progress: 100%|██████████| 250/250 [00:21<00:00, 11.73it/s, loss=0.016625, elapsed time=0.07, total time=21.6]
[I 2025-06-02 14:54:11,728] Trial 99 finished with value: 0.016624684242950834 and parameters: {'learning_rate': 0.0011236523169525575, 'sigma_multiplier': 0.39688773101369895, 'num_layers': 2, 'initialization_multiplier': 0.5862026855521589}. Best is trial 65 with value: -0.00016024724958283408.


Training has not converged after 250 steps
Trial 99 final loss: 0.01662468


In [9]:
best_hyperparams = study.best_params
best_loss_value = study.best_value

print("\nOptimization Finished!")
print(f"Best hyperparameters found: {best_hyperparams}")
print(f"Best loss value: {best_loss_value}")


Optimization Finished!
Best hyperparameters found: {'learning_rate': 0.0004934380602216942, 'sigma_multiplier': 0.10919685893044441, 'num_layers': 2, 'initialization_multiplier': 0.7445586427307485}
Best loss value: -0.00016024724958283408


In [10]:
LR = best_hyperparams['learning_rate']
SIGMA_M = best_hyperparams['sigma_multiplier']
NUM_LAYERS = best_hyperparams['num_layers']
INIT_M = best_hyperparams['initialization_multiplier']

In [11]:
def train_on_dataset(dataset=train_ds):
    grid_conn= aachen_connectivity()
    num_qubits = NODES * (NODES - 1) // 2
    gates = efficient_connectivity_gates(grid_conn, num_qubits, NUM_LAYERS)
    
    circuit = iqp.IqpSimulator(num_qubits, gates, device="lightning.qubit")
    
    initial_params = initialize_from_data(gates, dataset) * INIT_M
    loss = iqp.gen_qml.mmd_loss_iqp
    learning_rate = LR
    sigma = median_heuristic(dataset) * SIGMA_M
    
    loss_kwarg = {
        "params": initial_params,
        "iqp_circuit": circuit,
        "ground_truth": dataset,
        "sigma": [sigma],
        "n_ops": 2000,
        "n_samples": 2000,
        "key": jax.random.PRNGKey(42),
    }
    
    trainer = iqp.Trainer("Adam", loss, stepsize=learning_rate)
    trainer.train(n_iters= 2000,loss_kwargs=loss_kwarg, turbo=1)
    
    return trainer.final_params

In [12]:
params = train_on_dataset(train_ds)

Training Progress: 100%|██████████| 2000/2000 [02:56<00:00, 11.32it/s, loss=-0.000086, elapsed time=0.09, total time=177] 

Training has not converged after 2000 steps





In [13]:
import numpy as np
np.save(f'./results/params/params_{NODES}N_{TYPE}_{CONN}_LR{LR}_SIGMA{SIGMA_M}_INIT{INIT_M}_NUMLAYERS{NUM_LAYERS}.npy', params)