# HPO

In [14]:
import iqpopt as iqp
from iqpopt.utils import initialize_from_data, local_gates
import iqpopt.gen_qml as genq
from iqpopt.gen_qml.utils import median_heuristic
import optuna
import pennylane as qml
import jax
from jax import numpy as jnp
from utils.nisq import aachen_connectivity, efficient_connectivity_gates
from datasets.bipartites import BipartiteGraphDataset
from datasets.er import ErdosRenyiGraphDataset
import numpy as np

In [15]:
NODES = 8
TYPE = "Bipartite"
CONN = "Dense"
NUM_LAYERS = 1
QUBITS = NODES * (NODES - 1) //2

In [16]:
ds_path = f'./datasets/raw_data/{NODES}N_{TYPE}_{CONN}.pkl'
train_ds = jnp.array(BipartiteGraphDataset(nodes = 1, edge_prob=0.1).from_file(ds_path).vectors.copy())

[Dataset] Loaded 261 samples from ./datasets/raw_data/8N_Bipartite_Dense.pkl
  Created: 2025-05-30T13:15:24.181251
  Unique graphs: 261
  Version: 1.0


In [17]:
grid_conn = aachen_connectivity()
gates = efficient_connectivity_gates(grid_conn, QUBITS, 1) 
circ = iqp.IqpSimulator(QUBITS, gates, device='lightning.qubit')

base_key = jax.random.PRNGKey(42)

In [18]:
base_sigma = median_heuristic(train_ds)

In [19]:
from utils.hpo import run_hpo

In [20]:
study = run_hpo(
    grid_conn,
    QUBITS,
    base_sigma,
    train_ds = train_ds,
    n_trials = 100,
    n_iters_hpo = 150,
    n_ops = 2000,
    n_samples = 2000,
)

[I 2025-06-07 22:03:35,056] A new study created in memory with name: no-name-caceac5a-f838-43aa-ac74-167e4a12b958


Trial 0:
  Learning Rate: 0.00020920610965706498
  Sigma Multiplier: 0.9267376474182981
  Initialization Multiplier: 0.13506239907877726
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.59it/s, loss=0.114332, elapsed time=0.02, total time=12.1]
[I 2025-06-07 22:03:47,872] Trial 0 finished with value: 0.11433172891666163 and parameters: {'learning_rate': 0.00020920610965706498, 'sigma_multiplier': 0.9267376474182981, 'num_layers': 3, 'initialization_multiplier': 0.13506239907877726}. Best is trial 0 with value: 0.11433172891666163.


Training has not converged after 150 steps
Trial 0 final loss: 0.11433173
Trial 1:
  Learning Rate: 7.51420071708812e-05
  Sigma Multiplier: 1.5294635965663048
  Initialization Multiplier: 1.1435756946141078
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 27.12it/s, loss=0.018020, elapsed time=0.02, total time=5.85]
[I 2025-06-07 22:03:53,736] Trial 1 finished with value: 0.018020203110270947 and parameters: {'learning_rate': 7.51420071708812e-05, 'sigma_multiplier': 1.5294635965663048, 'num_layers': 1, 'initialization_multiplier': 1.1435756946141078}. Best is trial 1 with value: 0.018020203110270947.


Training has not converged after 150 steps
Trial 1 final loss: 0.01802020
Trial 2:
  Learning Rate: 0.04000568971210246
  Sigma Multiplier: 0.5381078045165725
  Initialization Multiplier: 1.370773077704032
  Number of Layers: 4


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.07it/s, loss=0.004363, elapsed time=0.05, total time=9.62]
[I 2025-06-07 22:04:03,383] Trial 2 finished with value: 0.004363404669375494 and parameters: {'learning_rate': 0.04000568971210246, 'sigma_multiplier': 0.5381078045165725, 'num_layers': 4, 'initialization_multiplier': 1.370773077704032}. Best is trial 2 with value: 0.004363404669375494.


Training has not converged after 150 steps
Trial 2 final loss: 0.00436340
Trial 3:
  Learning Rate: 0.00514086401122043
  Sigma Multiplier: 0.9925262129312334
  Initialization Multiplier: 0.7739460491380102
  Number of Layers: 4


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 20.24it/s, loss=0.000369, elapsed time=0.04, total time=7.67]
[I 2025-06-07 22:04:11,081] Trial 3 finished with value: 0.0003688149532599076 and parameters: {'learning_rate': 0.00514086401122043, 'sigma_multiplier': 0.9925262129312334, 'num_layers': 4, 'initialization_multiplier': 0.7739460491380102}. Best is trial 3 with value: 0.0003688149532599076.


Training has not converged after 150 steps
Trial 3 final loss: 0.00036881
Trial 4:
  Learning Rate: 0.011160144371724835
  Sigma Multiplier: 0.822162679134401
  Initialization Multiplier: 0.9507635805228539
  Number of Layers: 4


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 19.42it/s, loss=0.002079, elapsed time=0.04, total time=7.97]
[I 2025-06-07 22:04:19,084] Trial 4 finished with value: 0.002078611101040876 and parameters: {'learning_rate': 0.011160144371724835, 'sigma_multiplier': 0.822162679134401, 'num_layers': 4, 'initialization_multiplier': 0.9507635805228539}. Best is trial 3 with value: 0.0003688149532599076.


Training has not converged after 150 steps
Trial 4 final loss: 0.00207861
Trial 5:
  Learning Rate: 0.0002975799938363662
  Sigma Multiplier: 1.081358768696012
  Initialization Multiplier: 1.9160880901419215
  Number of Layers: 4


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 20.18it/s, loss=0.038761, elapsed time=0.05, total time=7.66]
[I 2025-06-07 22:04:26,780] Trial 5 finished with value: 0.0387611396634556 and parameters: {'learning_rate': 0.0002975799938363662, 'sigma_multiplier': 1.081358768696012, 'num_layers': 4, 'initialization_multiplier': 1.9160880901419215}. Best is trial 3 with value: 0.0003688149532599076.


Training has not converged after 150 steps
Trial 5 final loss: 0.03876114
Trial 6:
  Learning Rate: 0.012629329881867102
  Sigma Multiplier: 1.838514082887099
  Initialization Multiplier: 1.7939158083987674
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 27.71it/s, loss=-0.000336, elapsed time=0.03, total time=5.66]
[I 2025-06-07 22:04:32,463] Trial 6 finished with value: -0.0003361187228255863 and parameters: {'learning_rate': 0.012629329881867102, 'sigma_multiplier': 1.838514082887099, 'num_layers': 2, 'initialization_multiplier': 1.7939158083987674}. Best is trial 6 with value: -0.0003361187228255863.


Training has not converged after 150 steps
Trial 6 final loss: -0.00033612
Trial 7:
  Learning Rate: 0.001319043412210596
  Sigma Multiplier: 0.42277540425098276
  Initialization Multiplier: 1.3478970500431473
  Number of Layers: 5


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.30it/s, loss=0.006207, elapsed time=0.05, total time=10.7]
[I 2025-06-07 22:04:43,244] Trial 7 finished with value: 0.006206554550001129 and parameters: {'learning_rate': 0.001319043412210596, 'sigma_multiplier': 0.42277540425098276, 'num_layers': 5, 'initialization_multiplier': 1.3478970500431473}. Best is trial 6 with value: -0.0003361187228255863.


Training has not converged after 150 steps
Trial 7 final loss: 0.00620655
Trial 8:
  Learning Rate: 0.0012507092089153599
  Sigma Multiplier: 1.3600556657259646
  Initialization Multiplier: 0.9640975915951372
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 26.43it/s, loss=0.011949, elapsed time=0.04, total time=5.93]
[I 2025-06-07 22:04:49,191] Trial 8 finished with value: 0.011948768373695495 and parameters: {'learning_rate': 0.0012507092089153599, 'sigma_multiplier': 1.3600556657259646, 'num_layers': 2, 'initialization_multiplier': 0.9640975915951372}. Best is trial 6 with value: -0.0003361187228255863.


Training has not converged after 150 steps
Trial 8 final loss: 0.01194877
Trial 9:
  Learning Rate: 0.022958211202527706
  Sigma Multiplier: 0.9544882658995565
  Initialization Multiplier: 0.12086150898256745
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 18.62it/s, loss=0.000648, elapsed time=0.04, total time=8.32]
[I 2025-06-07 22:04:57,541] Trial 9 finished with value: 0.0006483842793655133 and parameters: {'learning_rate': 0.022958211202527706, 'sigma_multiplier': 0.9544882658995565, 'num_layers': 3, 'initialization_multiplier': 0.12086150898256745}. Best is trial 6 with value: -0.0003361187228255863.


Training has not converged after 150 steps
Trial 9 final loss: 0.00064838
Trial 10:
  Learning Rate: 0.06891118461271599
  Sigma Multiplier: 1.9507511093848413
  Initialization Multiplier: 1.9389297331725006
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 26.03it/s, loss=-0.000290, elapsed time=0.04, total time=6.03]
[I 2025-06-07 22:05:03,659] Trial 10 finished with value: -0.00029012639934173024 and parameters: {'learning_rate': 0.06891118461271599, 'sigma_multiplier': 1.9507511093848413, 'num_layers': 1, 'initialization_multiplier': 1.9389297331725006}. Best is trial 6 with value: -0.0003361187228255863.


Training has not converged after 150 steps
Trial 10 final loss: -0.00029013
Trial 11:
  Learning Rate: 0.09387375438851604
  Sigma Multiplier: 1.9516127656581836
  Initialization Multiplier: 1.9806914385831318
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.44it/s, loss=-0.000346, elapsed time=0.05, total time=6.41]
[I 2025-06-07 22:05:10,095] Trial 11 finished with value: -0.00034612709592001466 and parameters: {'learning_rate': 0.09387375438851604, 'sigma_multiplier': 1.9516127656581836, 'num_layers': 1, 'initialization_multiplier': 1.9806914385831318}. Best is trial 11 with value: -0.00034612709592001466.


Training has not converged after 150 steps
Trial 11 final loss: -0.00034613
Trial 12:
  Learning Rate: 0.09785240191603475
  Sigma Multiplier: 1.997662075570952
  Initialization Multiplier: 1.639503865034247
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 19.30it/s, loss=-0.000133, elapsed time=0.04, total time=8.03]
[I 2025-06-07 22:05:18,158] Trial 12 finished with value: -0.0001334364690882209 and parameters: {'learning_rate': 0.09785240191603475, 'sigma_multiplier': 1.997662075570952, 'num_layers': 2, 'initialization_multiplier': 1.639503865034247}. Best is trial 11 with value: -0.00034612709592001466.


Training has not converged after 150 steps
Trial 12 final loss: -0.00013344
Trial 13:
  Learning Rate: 0.005526835486367488
  Sigma Multiplier: 1.698407230447402
  Initialization Multiplier: 1.6603271041292278
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 20.68it/s, loss=0.002302, elapsed time=0.03, total time=7.52]
[I 2025-06-07 22:05:25,713] Trial 13 finished with value: 0.002302252379908779 and parameters: {'learning_rate': 0.005526835486367488, 'sigma_multiplier': 1.698407230447402, 'num_layers': 2, 'initialization_multiplier': 1.6603271041292278}. Best is trial 11 with value: -0.00034612709592001466.


Training has not converged after 150 steps
Trial 13 final loss: 0.00230225
Trial 14:
  Learning Rate: 0.014849750345100034
  Sigma Multiplier: 1.6899694913332264
  Initialization Multiplier: 1.9926703102046042
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:04<00:00, 32.76it/s, loss=-0.000120, elapsed time=0.05, total time=4.81]
[I 2025-06-07 22:05:30,542] Trial 14 finished with value: -0.00012033992403754947 and parameters: {'learning_rate': 0.014849750345100034, 'sigma_multiplier': 1.6899694913332264, 'num_layers': 1, 'initialization_multiplier': 1.9926703102046042}. Best is trial 11 with value: -0.00034612709592001466.


Training has not converged after 150 steps
Trial 14 final loss: -0.00012034
Trial 15:
  Learning Rate: 0.0042556613550545335
  Sigma Multiplier: 1.3274780419902708
  Initialization Multiplier: 1.6561734654175266
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 28.29it/s, loss=0.003316, elapsed time=0.03, total time=5.52]
[I 2025-06-07 22:05:36,088] Trial 15 finished with value: 0.0033156504700621622 and parameters: {'learning_rate': 0.0042556613550545335, 'sigma_multiplier': 1.3274780419902708, 'num_layers': 2, 'initialization_multiplier': 1.6561734654175266}. Best is trial 11 with value: -0.00034612709592001466.


Training has not converged after 150 steps
Trial 15 final loss: 0.00331565
Trial 16:
  Learning Rate: 0.0357986583088428
  Sigma Multiplier: 0.1781265177473519
  Initialization Multiplier: 0.5496153006997349
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 19.56it/s, loss=0.000361, elapsed time=0.04, total time=7.89] 
[I 2025-06-07 22:05:44,008] Trial 16 finished with value: 0.000361493676564525 and parameters: {'learning_rate': 0.0357986583088428, 'sigma_multiplier': 0.1781265177473519, 'num_layers': 1, 'initialization_multiplier': 0.5496153006997349}. Best is trial 11 with value: -0.00034612709592001466.


Training has not converged after 150 steps
Trial 16 final loss: 0.00036149
Trial 17:
  Learning Rate: 0.09994731840237957
  Sigma Multiplier: 1.7602808486134311
  Initialization Multiplier: 1.4332747964443837
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 26.10it/s, loss=-0.000367, elapsed time=0.03, total time=5.98]
[I 2025-06-07 22:05:50,017] Trial 17 finished with value: -0.00036698287928743525 and parameters: {'learning_rate': 0.09994731840237957, 'sigma_multiplier': 1.7602808486134311, 'num_layers': 2, 'initialization_multiplier': 1.4332747964443837}. Best is trial 17 with value: -0.00036698287928743525.


Training has not converged after 150 steps
Trial 17 final loss: -0.00036698
Trial 18:
  Learning Rate: 0.0868500559714475
  Sigma Multiplier: 1.4619911481613728
  Initialization Multiplier: 1.3920218928655408
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.95it/s, loss=-0.000145, elapsed time=0.03, total time=6.03]
[I 2025-06-07 22:05:56,080] Trial 18 finished with value: -0.00014507790997673155 and parameters: {'learning_rate': 0.0868500559714475, 'sigma_multiplier': 1.4619911481613728, 'num_layers': 1, 'initialization_multiplier': 1.3920218928655408}. Best is trial 17 with value: -0.00036698287928743525.


Training has not converged after 150 steps
Trial 18 final loss: -0.00014508
Trial 19:
  Learning Rate: 0.0006366808442579972
  Sigma Multiplier: 1.6517161597417962
  Initialization Multiplier: 1.520038372576414
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 20.86it/s, loss=0.007399, elapsed time=0.03, total time=7.44]
[I 2025-06-07 22:06:03,564] Trial 19 finished with value: 0.007399377933493516 and parameters: {'learning_rate': 0.0006366808442579972, 'sigma_multiplier': 1.6517161597417962, 'num_layers': 3, 'initialization_multiplier': 1.520038372576414}. Best is trial 17 with value: -0.00036698287928743525.


Training has not converged after 150 steps
Trial 19 final loss: 0.00739938
Trial 20:
  Learning Rate: 0.04351901696536566
  Sigma Multiplier: 1.22612870624712
  Initialization Multiplier: 1.1860780419139454
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.25it/s, loss=-0.000085, elapsed time=0.04, total time=7.05]
[I 2025-06-07 22:06:10,649] Trial 20 finished with value: -8.534886554171908e-05 and parameters: {'learning_rate': 0.04351901696536566, 'sigma_multiplier': 1.22612870624712, 'num_layers': 2, 'initialization_multiplier': 1.1860780419139454}. Best is trial 17 with value: -0.00036698287928743525.


Training has not converged after 150 steps
Trial 20 final loss: -0.00008535
Trial 21:
  Learning Rate: 0.013620871098161187
  Sigma Multiplier: 1.8485217070488034
  Initialization Multiplier: 1.7492182754962307
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 20.40it/s, loss=-0.000096, elapsed time=0.04, total time=7.65]
[I 2025-06-07 22:06:18,338] Trial 21 finished with value: -9.621573030069966e-05 and parameters: {'learning_rate': 0.013620871098161187, 'sigma_multiplier': 1.8485217070488034, 'num_layers': 2, 'initialization_multiplier': 1.7492182754962307}. Best is trial 17 with value: -0.00036698287928743525.


Training has not converged after 150 steps
Trial 21 final loss: -0.00009622
Trial 22:
  Learning Rate: 0.026359374288530257
  Sigma Multiplier: 1.8016854412787044
  Initialization Multiplier: 1.8412789371372236
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 21.14it/s, loss=-0.000454, elapsed time=0.04, total time=7.39]
[I 2025-06-07 22:06:25,768] Trial 22 finished with value: -0.0004539468517293779 and parameters: {'learning_rate': 0.026359374288530257, 'sigma_multiplier': 1.8016854412787044, 'num_layers': 2, 'initialization_multiplier': 1.8412789371372236}. Best is trial 22 with value: -0.0004539468517293779.


Training has not converged after 150 steps
Trial 22 final loss: -0.00045395
Trial 23:
  Learning Rate: 0.045861767103238374
  Sigma Multiplier: 1.80240894184778
  Initialization Multiplier: 1.8358221762851235
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.37it/s, loss=-0.000191, elapsed time=0.02, total time=7.06]
[I 2025-06-07 22:06:32,853] Trial 23 finished with value: -0.0001912272464445882 and parameters: {'learning_rate': 0.045861767103238374, 'sigma_multiplier': 1.80240894184778, 'num_layers': 1, 'initialization_multiplier': 1.8358221762851235}. Best is trial 22 with value: -0.0004539468517293779.


Training has not converged after 150 steps
Trial 23 final loss: -0.00019123
Trial 24:
  Learning Rate: 0.03503399141610891
  Sigma Multiplier: 1.546727116369354
  Initialization Multiplier: 1.5616535204189042
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 20.37it/s, loss=-0.000146, elapsed time=0.04, total time=7.79]
[I 2025-06-07 22:06:40,694] Trial 24 finished with value: -0.00014598326328430194 and parameters: {'learning_rate': 0.03503399141610891, 'sigma_multiplier': 1.546727116369354, 'num_layers': 3, 'initialization_multiplier': 1.5616535204189042}. Best is trial 22 with value: -0.0004539468517293779.


Training has not converged after 150 steps
Trial 24 final loss: -0.00014598
Trial 25:
  Learning Rate: 0.09233126682289218
  Sigma Multiplier: 1.9645005738179964
  Initialization Multiplier: 1.471281888537898
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 27.01it/s, loss=-0.000241, elapsed time=0.02, total time=5.87]
[I 2025-06-07 22:06:46,600] Trial 25 finished with value: -0.00024095794288473888 and parameters: {'learning_rate': 0.09233126682289218, 'sigma_multiplier': 1.9645005738179964, 'num_layers': 2, 'initialization_multiplier': 1.471281888537898}. Best is trial 22 with value: -0.0004539468517293779.


Training has not converged after 150 steps
Trial 25 final loss: -0.00024096
Trial 26:
  Learning Rate: 0.0029037648387455757
  Sigma Multiplier: 1.752581118301449
  Initialization Multiplier: 1.2296341075468127
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:04<00:00, 30.18it/s, loss=-0.000101, elapsed time=0.03, total time=5.25]
[I 2025-06-07 22:06:51,877] Trial 26 finished with value: -0.00010056025494571478 and parameters: {'learning_rate': 0.0029037648387455757, 'sigma_multiplier': 1.752581118301449, 'num_layers': 1, 'initialization_multiplier': 1.2296341075468127}. Best is trial 22 with value: -0.0004539468517293779.


Training has not converged after 150 steps
Trial 26 final loss: -0.00010056
Trial 27:
  Learning Rate: 0.02167126748942985
  Sigma Multiplier: 1.5788824264871768
  Initialization Multiplier: 1.799034414697425
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 26.44it/s, loss=-0.000474, elapsed time=0.02, total time=5.93]
[I 2025-06-07 22:06:57,847] Trial 27 finished with value: -0.00047425718078470113 and parameters: {'learning_rate': 0.02167126748942985, 'sigma_multiplier': 1.5788824264871768, 'num_layers': 2, 'initialization_multiplier': 1.799034414697425}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 27 final loss: -0.00047426
Trial 28:
  Learning Rate: 0.021599807944814465
  Sigma Multiplier: 1.19740063222513
  Initialization Multiplier: 1.7596734601267159
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 20.95it/s, loss=0.000201, elapsed time=0.04, total time=7.46] 
[I 2025-06-07 22:07:05,396] Trial 28 finished with value: 0.00020147406446063452 and parameters: {'learning_rate': 0.021599807944814465, 'sigma_multiplier': 1.19740063222513, 'num_layers': 3, 'initialization_multiplier': 1.7596734601267159}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 28 final loss: 0.00020147
Trial 29:
  Learning Rate: 0.008255335474161168
  Sigma Multiplier: 1.5484565027056698
  Initialization Multiplier: 0.6970625491689337
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 21.48it/s, loss=-0.000068, elapsed time=0.05, total time=7.2] 
[I 2025-06-07 22:07:12,635] Trial 29 finished with value: -6.785228385802209e-05 and parameters: {'learning_rate': 0.008255335474161168, 'sigma_multiplier': 1.5484565027056698, 'num_layers': 3, 'initialization_multiplier': 0.6970625491689337}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 29 final loss: -0.00006785
Trial 30:
  Learning Rate: 0.023429631933878446
  Sigma Multiplier: 1.4205407929356983
  Initialization Multiplier: 0.48663792321888777
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.12it/s, loss=-0.000103, elapsed time=0.03, total time=6.49]
[I 2025-06-07 22:07:19,167] Trial 30 finished with value: -0.00010310648017322793 and parameters: {'learning_rate': 0.023429631933878446, 'sigma_multiplier': 1.4205407929356983, 'num_layers': 2, 'initialization_multiplier': 0.48663792321888777}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 30 final loss: -0.00010311
Trial 31:
  Learning Rate: 0.0592453284380825
  Sigma Multiplier: 1.8694815694184315
  Initialization Multiplier: 1.8747319601942816
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 26.32it/s, loss=-0.000425, elapsed time=0.03, total time=5.98]
[I 2025-06-07 22:07:25,183] Trial 31 finished with value: -0.0004245900355791742 and parameters: {'learning_rate': 0.0592453284380825, 'sigma_multiplier': 1.8694815694184315, 'num_layers': 2, 'initialization_multiplier': 1.8747319601942816}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 31 final loss: -0.00042459
Trial 32:
  Learning Rate: 0.0510764472685366
  Sigma Multiplier: 1.5906550437828584
  Initialization Multiplier: 1.8578605248225821
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.29it/s, loss=-0.000124, elapsed time=0.04, total time=6.25]
[I 2025-06-07 22:07:31,464] Trial 32 finished with value: -0.00012365361262614387 and parameters: {'learning_rate': 0.0510764472685366, 'sigma_multiplier': 1.5906550437828584, 'num_layers': 2, 'initialization_multiplier': 1.8578605248225821}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 32 final loss: -0.00012365
Trial 33:
  Learning Rate: 0.02507855627287595
  Sigma Multiplier: 1.8175819636218042
  Initialization Multiplier: 1.6969157859510455
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 21.72it/s, loss=-0.000242, elapsed time=0.05, total time=7.16]
[I 2025-06-07 22:07:38,670] Trial 33 finished with value: -0.00024213736645075398 and parameters: {'learning_rate': 0.02507855627287595, 'sigma_multiplier': 1.8175819636218042, 'num_layers': 3, 'initialization_multiplier': 1.6969157859510455}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 33 final loss: -0.00024214
Trial 34:
  Learning Rate: 4.542360596000181e-05
  Sigma Multiplier: 1.649788170039798
  Initialization Multiplier: 1.289559826456636
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 29.24it/s, loss=0.072158, elapsed time=0.06, total time=5.39]
[I 2025-06-07 22:07:44,099] Trial 34 finished with value: 0.0721575449859564 and parameters: {'learning_rate': 4.542360596000181e-05, 'sigma_multiplier': 1.649788170039798, 'num_layers': 2, 'initialization_multiplier': 1.289559826456636}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 34 final loss: 0.07215754
Trial 35:
  Learning Rate: 0.052265347977629883
  Sigma Multiplier: 1.8687404749334249
  Initialization Multiplier: 1.554769631467488
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 27.50it/s, loss=-0.000240, elapsed time=0.03, total time=5.93]
[I 2025-06-07 22:07:50,155] Trial 35 finished with value: -0.00024014617070025564 and parameters: {'learning_rate': 0.052265347977629883, 'sigma_multiplier': 1.8687404749334249, 'num_layers': 2, 'initialization_multiplier': 1.554769631467488}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 35 final loss: -0.00024015
Trial 36:
  Learning Rate: 0.007832848277154511
  Sigma Multiplier: 0.6539524304968511
  Initialization Multiplier: 1.1119983333713421
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.60it/s, loss=0.003539, elapsed time=0.06, total time=10]  
[I 2025-06-07 22:08:00,280] Trial 36 finished with value: 0.0035389589014103696 and parameters: {'learning_rate': 0.007832848277154511, 'sigma_multiplier': 0.6539524304968511, 'num_layers': 3, 'initialization_multiplier': 1.1119983333713421}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 36 final loss: 0.00353896
Trial 37:
  Learning Rate: 0.0026742381197305018
  Sigma Multiplier: 1.4973092113572497
  Initialization Multiplier: 1.4384267566681628
  Number of Layers: 5


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 17.04it/s, loss=-0.000284, elapsed time=0.04, total time=9.12]
[I 2025-06-07 22:08:09,488] Trial 37 finished with value: -0.0002838263798032352 and parameters: {'learning_rate': 0.0026742381197305018, 'sigma_multiplier': 1.4973092113572497, 'num_layers': 5, 'initialization_multiplier': 1.4384267566681628}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 37 final loss: -0.00028383
Trial 38:
  Learning Rate: 9.393253785354055e-05
  Sigma Multiplier: 0.793207152562847
  Initialization Multiplier: 1.8528933256088305
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 18.95it/s, loss=0.111619, elapsed time=0.06, total time=8.22]
[I 2025-06-07 22:08:17,739] Trial 38 finished with value: 0.1116191899656015 and parameters: {'learning_rate': 9.393253785354055e-05, 'sigma_multiplier': 0.793207152562847, 'num_layers': 2, 'initialization_multiplier': 1.8528933256088305}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 38 final loss: 0.11161919
Trial 39:
  Learning Rate: 0.06129429132616737
  Sigma Multiplier: 1.7471568750704534
  Initialization Multiplier: 1.6043909603148614
  Number of Layers: 4


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 19.52it/s, loss=-0.000144, elapsed time=0.05, total time=7.97]
[I 2025-06-07 22:08:25,756] Trial 39 finished with value: -0.00014431342928715275 and parameters: {'learning_rate': 0.06129429132616737, 'sigma_multiplier': 1.7471568750704534, 'num_layers': 4, 'initialization_multiplier': 1.6043909603148614}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 39 final loss: -0.00014431
Trial 40:
  Learning Rate: 0.027890060498484727
  Sigma Multiplier: 1.1297128118194177
  Initialization Multiplier: 1.7732183046497476
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 21.96it/s, loss=-0.000202, elapsed time=0.04, total time=7.12]
[I 2025-06-07 22:08:32,912] Trial 40 finished with value: -0.0002023788632002656 and parameters: {'learning_rate': 0.027890060498484727, 'sigma_multiplier': 1.1297128118194177, 'num_layers': 2, 'initialization_multiplier': 1.7732183046497476}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 40 final loss: -0.00020238
Trial 41:
  Learning Rate: 0.06857540640653849
  Sigma Multiplier: 1.8970076883927194
  Initialization Multiplier: 1.9471652134138777
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 28.33it/s, loss=-0.000257, elapsed time=0.03, total time=5.59]
[I 2025-06-07 22:08:38,525] Trial 41 finished with value: -0.0002566452303396292 and parameters: {'learning_rate': 0.06857540640653849, 'sigma_multiplier': 1.8970076883927194, 'num_layers': 1, 'initialization_multiplier': 1.9471652134138777}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 41 final loss: -0.00025665
Trial 42:
  Learning Rate: 0.01748920236126756
  Sigma Multiplier: 1.9350211660638401
  Initialization Multiplier: 1.8908124552320873
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 27.89it/s, loss=-0.000157, elapsed time=0.03, total time=5.65]
[I 2025-06-07 22:08:44,200] Trial 42 finished with value: -0.00015650553616400938 and parameters: {'learning_rate': 0.01748920236126756, 'sigma_multiplier': 1.9350211660638401, 'num_layers': 1, 'initialization_multiplier': 1.8908124552320873}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 42 final loss: -0.00015651
Trial 43:
  Learning Rate: 0.035325395219902855
  Sigma Multiplier: 1.7551722172489408
  Initialization Multiplier: 1.9915712815611992
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 27.18it/s, loss=-0.000213, elapsed time=0.03, total time=5.79]
[I 2025-06-07 22:08:50,028] Trial 43 finished with value: -0.00021293315636557118 and parameters: {'learning_rate': 0.035325395219902855, 'sigma_multiplier': 1.7551722172489408, 'num_layers': 1, 'initialization_multiplier': 1.9915712815611992}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 43 final loss: -0.00021293
Trial 44:
  Learning Rate: 0.0980158637004381
  Sigma Multiplier: 1.9932176936201396
  Initialization Multiplier: 0.24454824790456497
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.75it/s, loss=-0.000207, elapsed time=0.02, total time=6.61]
[I 2025-06-07 22:08:56,680] Trial 44 finished with value: -0.00020658290659830733 and parameters: {'learning_rate': 0.0980158637004381, 'sigma_multiplier': 1.9932176936201396, 'num_layers': 2, 'initialization_multiplier': 0.24454824790456497}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 44 final loss: -0.00020658
Trial 45:
  Learning Rate: 0.009856470408459536
  Sigma Multiplier: 1.6352418541969003
  Initialization Multiplier: 1.7340029640749062
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 26.33it/s, loss=0.000029, elapsed time=0.04, total time=5.95] 
[I 2025-06-07 22:09:02,666] Trial 45 finished with value: 2.887356120386702e-05 and parameters: {'learning_rate': 0.009856470408459536, 'sigma_multiplier': 1.6352418541969003, 'num_layers': 1, 'initialization_multiplier': 1.7340029640749062}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 45 final loss: 0.00002887
Trial 46:
  Learning Rate: 0.07241686475951102
  Sigma Multiplier: 1.8927838871536724
  Initialization Multiplier: 1.0933049105430312
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.25it/s, loss=-0.000091, elapsed time=0.04, total time=6.47]
[I 2025-06-07 22:09:09,181] Trial 46 finished with value: -9.144891854317671e-05 and parameters: {'learning_rate': 0.07241686475951102, 'sigma_multiplier': 1.8927838871536724, 'num_layers': 2, 'initialization_multiplier': 1.0933049105430312}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 46 final loss: -0.00009145
Trial 47:
  Learning Rate: 0.01756554940161855
  Sigma Multiplier: 1.3036271260600787
  Initialization Multiplier: 1.9163395356915527
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 19.02it/s, loss=-0.000320, elapsed time=0.05, total time=8.16]
[I 2025-06-07 22:09:17,383] Trial 47 finished with value: -0.0003198314896564871 and parameters: {'learning_rate': 0.01756554940161855, 'sigma_multiplier': 1.3036271260600787, 'num_layers': 3, 'initialization_multiplier': 1.9163395356915527}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 47 final loss: -0.00031983
Trial 48:
  Learning Rate: 0.0004677158656954059
  Sigma Multiplier: 1.718112303597083
  Initialization Multiplier: 1.828557125186225
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.68it/s, loss=0.037528, elapsed time=0.03, total time=6.14]
[I 2025-06-07 22:09:23,557] Trial 48 finished with value: 0.0375282206881929 and parameters: {'learning_rate': 0.0004677158656954059, 'sigma_multiplier': 1.718112303597083, 'num_layers': 1, 'initialization_multiplier': 1.828557125186225}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 48 final loss: 0.03752822
Trial 49:
  Learning Rate: 0.05873915622440126
  Sigma Multiplier: 1.797850501261806
  Initialization Multiplier: 1.698046242175229
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.10it/s, loss=-0.000119, elapsed time=0.04, total time=6.85]
[I 2025-06-07 22:09:30,445] Trial 49 finished with value: -0.00011888983249356086 and parameters: {'learning_rate': 0.05873915622440126, 'sigma_multiplier': 1.797850501261806, 'num_layers': 2, 'initialization_multiplier': 1.698046242175229}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 49 final loss: -0.00011889
Trial 50:
  Learning Rate: 0.030572994478039132
  Sigma Multiplier: 1.4254154298557948
  Initialization Multiplier: 1.6258529230924772
  Number of Layers: 4


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 17.82it/s, loss=-0.000410, elapsed time=0.05, total time=8.83]
[I 2025-06-07 22:09:39,329] Trial 50 finished with value: -0.000410021881850518 and parameters: {'learning_rate': 0.030572994478039132, 'sigma_multiplier': 1.4254154298557948, 'num_layers': 4, 'initialization_multiplier': 1.6258529230924772}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 50 final loss: -0.00041002
Trial 51:
  Learning Rate: 0.03163023483604532
  Sigma Multiplier: 1.4464564318848205
  Initialization Multiplier: 1.645853591602018
  Number of Layers: 4


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 17.89it/s, loss=-0.000232, elapsed time=0.04, total time=8.65]
[I 2025-06-07 22:09:48,025] Trial 51 finished with value: -0.00023160621091444385 and parameters: {'learning_rate': 0.03163023483604532, 'sigma_multiplier': 1.4464564318848205, 'num_layers': 4, 'initialization_multiplier': 1.645853591602018}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 51 final loss: -0.00023161
Trial 52:
  Learning Rate: 0.040704582151166936
  Sigma Multiplier: 1.5892980736802667
  Initialization Multiplier: 1.9764607236215066
  Number of Layers: 5


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.28it/s, loss=-0.000262, elapsed time=0.06, total time=10.7]
[I 2025-06-07 22:09:58,853] Trial 52 finished with value: -0.0002617291470634223 and parameters: {'learning_rate': 0.040704582151166936, 'sigma_multiplier': 1.5892980736802667, 'num_layers': 5, 'initialization_multiplier': 1.9764607236215066}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 52 final loss: -0.00026173
Trial 53:
  Learning Rate: 0.07134512311895524
  Sigma Multiplier: 1.3700364203368376
  Initialization Multiplier: 1.494694396627461
  Number of Layers: 4


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.44it/s, loss=0.000043, elapsed time=0.14, total time=10.1] 
[I 2025-06-07 22:10:09,021] Trial 53 finished with value: 4.2598419374518695e-05 and parameters: {'learning_rate': 0.07134512311895524, 'sigma_multiplier': 1.3700364203368376, 'num_layers': 4, 'initialization_multiplier': 1.494694396627461}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 53 final loss: 0.00004260
Trial 54:
  Learning Rate: 0.011848907404342886
  Sigma Multiplier: 1.8594728777163367
  Initialization Multiplier: 0.8775789566249511
  Number of Layers: 4


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.95it/s, loss=-0.000266, elapsed time=0.05, total time=10.4]
[I 2025-06-07 22:10:19,425] Trial 54 finished with value: -0.0002663726756645543 and parameters: {'learning_rate': 0.011848907404342886, 'sigma_multiplier': 1.8594728777163367, 'num_layers': 4, 'initialization_multiplier': 0.8775789566249511}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 54 final loss: -0.00026637
Trial 55:
  Learning Rate: 0.006129267309218792
  Sigma Multiplier: 1.7006586442517804
  Initialization Multiplier: 1.363873955660007
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.41it/s, loss=-0.000003, elapsed time=0.06, total time=12.5]
[I 2025-06-07 22:10:32,019] Trial 55 finished with value: -3.1762081059188015e-06 and parameters: {'learning_rate': 0.006129267309218792, 'sigma_multiplier': 1.7006586442517804, 'num_layers': 3, 'initialization_multiplier': 1.363873955660007}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 55 final loss: -0.00000318
Trial 56:
  Learning Rate: 0.04746092935598124
  Sigma Multiplier: 1.6152997351240845
  Initialization Multiplier: 1.7901239003525307
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 17.21it/s, loss=-0.000418, elapsed time=0.04, total time=9.56]
[I 2025-06-07 22:10:41,672] Trial 56 finished with value: -0.00041812107993382294 and parameters: {'learning_rate': 0.04746092935598124, 'sigma_multiplier': 1.6152997351240845, 'num_layers': 2, 'initialization_multiplier': 1.7901239003525307}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 56 final loss: -0.00041812
Trial 57:
  Learning Rate: 0.017427945879660473
  Sigma Multiplier: 1.6086039861205075
  Initialization Multiplier: 1.5677497730519503
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 18.84it/s, loss=-0.000080, elapsed time=0.04, total time=8.27]
[I 2025-06-07 22:10:49,978] Trial 57 finished with value: -8.047593943355148e-05 and parameters: {'learning_rate': 0.017427945879660473, 'sigma_multiplier': 1.6086039861205075, 'num_layers': 2, 'initialization_multiplier': 1.5677497730519503}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 57 final loss: -0.00008048
Trial 58:
  Learning Rate: 0.04133540513623128
  Sigma Multiplier: 1.5232113692450726
  Initialization Multiplier: 1.8111137829633281
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 19.15it/s, loss=-0.000244, elapsed time=0.06, total time=8.19]
[I 2025-06-07 22:10:58,205] Trial 58 finished with value: -0.00024352833213934724 and parameters: {'learning_rate': 0.04133540513623128, 'sigma_multiplier': 1.5232113692450726, 'num_layers': 2, 'initialization_multiplier': 1.8111137829633281}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 58 final loss: -0.00024353
Trial 59:
  Learning Rate: 0.02642664524837777
  Sigma Multiplier: 1.264790250967699
  Initialization Multiplier: 1.6407492984292578
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 21.62it/s, loss=0.000210, elapsed time=0.03, total time=7.25] 
[I 2025-06-07 22:11:05,489] Trial 59 finished with value: 0.000209996624061583 and parameters: {'learning_rate': 0.02642664524837777, 'sigma_multiplier': 1.264790250967699, 'num_layers': 2, 'initialization_multiplier': 1.6407492984292578}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 59 final loss: 0.00021000
Trial 60:
  Learning Rate: 0.0012493563981450654
  Sigma Multiplier: 1.0295028912208453
  Initialization Multiplier: 1.4202450007855245
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 18.77it/s, loss=0.006812, elapsed time=0.05, total time=8.3] 
[I 2025-06-07 22:11:13,831] Trial 60 finished with value: 0.006811879187682794 and parameters: {'learning_rate': 0.0012493563981450654, 'sigma_multiplier': 1.0295028912208453, 'num_layers': 3, 'initialization_multiplier': 1.4202450007855245}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 60 final loss: 0.00681188
Trial 61:
  Learning Rate: 0.08172082696539787
  Sigma Multiplier: 1.7645995913476145
  Initialization Multiplier: 1.9032627366646204
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.69it/s, loss=-0.000188, elapsed time=0.03, total time=6.39]
[I 2025-06-07 22:11:20,251] Trial 61 finished with value: -0.00018797048185115596 and parameters: {'learning_rate': 0.08172082696539787, 'sigma_multiplier': 1.7645995913476145, 'num_layers': 2, 'initialization_multiplier': 1.9032627366646204}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 61 final loss: -0.00018797
Trial 62:
  Learning Rate: 0.09975630265075709
  Sigma Multiplier: 1.9247206038043627
  Initialization Multiplier: 1.783049636445993
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.80it/s, loss=-0.000203, elapsed time=0.05, total time=6.34]
[I 2025-06-07 22:11:26,623] Trial 62 finished with value: -0.00020258547117336118 and parameters: {'learning_rate': 0.09975630265075709, 'sigma_multiplier': 1.9247206038043627, 'num_layers': 2, 'initialization_multiplier': 1.783049636445993}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 62 final loss: -0.00020259
Trial 63:
  Learning Rate: 0.05279438151488817
  Sigma Multiplier: 1.6658408186381726
  Initialization Multiplier: 1.7083674572556862
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.04it/s, loss=-0.000096, elapsed time=0.03, total time=6.27]
[I 2025-06-07 22:11:32,931] Trial 63 finished with value: -9.62948892751458e-05 and parameters: {'learning_rate': 0.05279438151488817, 'sigma_multiplier': 1.6658408186381726, 'num_layers': 1, 'initialization_multiplier': 1.7083674572556862}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 63 final loss: -0.00009629
Trial 64:
  Learning Rate: 0.03167152707811468
  Sigma Multiplier: 0.22263902299867833
  Initialization Multiplier: 1.8872799409788026
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.62it/s, loss=0.000643, elapsed time=0.08, total time=11.4]
[I 2025-06-07 22:11:44,377] Trial 64 finished with value: 0.0006431216300553471 and parameters: {'learning_rate': 0.03167152707811468, 'sigma_multiplier': 0.22263902299867833, 'num_layers': 2, 'initialization_multiplier': 1.8872799409788026}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 64 final loss: 0.00064312
Trial 65:
  Learning Rate: 0.04654201064184498
  Sigma Multiplier: 1.818790708586034
  Initialization Multiplier: 1.9977649129507018
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 20.11it/s, loss=-0.000176, elapsed time=0.04, total time=7.75]
[I 2025-06-07 22:11:52,171] Trial 65 finished with value: -0.00017645463469771717 and parameters: {'learning_rate': 0.04654201064184498, 'sigma_multiplier': 1.818790708586034, 'num_layers': 3, 'initialization_multiplier': 1.9977649129507018}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 65 final loss: -0.00017645
Trial 66:
  Learning Rate: 0.019660560402592273
  Sigma Multiplier: 1.379780608750491
  Initialization Multiplier: 1.801459118107482
  Number of Layers: 5


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.48it/s, loss=-0.000295, elapsed time=0.07, total time=10.7]
[I 2025-06-07 22:12:02,958] Trial 66 finished with value: -0.0002945770555231525 and parameters: {'learning_rate': 0.019660560402592273, 'sigma_multiplier': 1.379780608750491, 'num_layers': 5, 'initialization_multiplier': 1.801459118107482}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 66 final loss: -0.00029458
Trial 67:
  Learning Rate: 0.06488767736757338
  Sigma Multiplier: 1.4859691317873698
  Initialization Multiplier: 1.2963772392316921
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 17.84it/s, loss=-0.000335, elapsed time=0.06, total time=8.71]
[I 2025-06-07 22:12:11,719] Trial 67 finished with value: -0.0003353690925175674 and parameters: {'learning_rate': 0.06488767736757338, 'sigma_multiplier': 1.4859691317873698, 'num_layers': 3, 'initialization_multiplier': 1.2963772392316921}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 67 final loss: -0.00033537
Trial 68:
  Learning Rate: 0.013291851095853728
  Sigma Multiplier: 1.577407052214372
  Initialization Multiplier: 1.6127306615145311
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 20.49it/s, loss=0.000003, elapsed time=0.05, total time=7.61] 
[I 2025-06-07 22:12:19,377] Trial 68 finished with value: 2.899518696924369e-06 and parameters: {'learning_rate': 0.013291851095853728, 'sigma_multiplier': 1.577407052214372, 'num_layers': 2, 'initialization_multiplier': 1.6127306615145311}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 68 final loss: 0.00000290
Trial 69:
  Learning Rate: 0.04023331600565887
  Sigma Multiplier: 1.9878373772623255
  Initialization Multiplier: 1.5178058852867256
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.83it/s, loss=-0.000239, elapsed time=0.03, total time=6.15]
[I 2025-06-07 22:12:25,561] Trial 69 finished with value: -0.0002394060748751112 and parameters: {'learning_rate': 0.04023331600565887, 'sigma_multiplier': 1.9878373772623255, 'num_layers': 1, 'initialization_multiplier': 1.5178058852867256}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 69 final loss: -0.00023941
Trial 70:
  Learning Rate: 0.00017328158138110383
  Sigma Multiplier: 1.8097698721870255
  Initialization Multiplier: 1.7232253676729454
  Number of Layers: 4


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 16.84it/s, loss=0.032089, elapsed time=0.05, total time=9.18]
[I 2025-06-07 22:12:34,800] Trial 70 finished with value: 0.03208913090223839 and parameters: {'learning_rate': 0.00017328158138110383, 'sigma_multiplier': 1.8097698721870255, 'num_layers': 4, 'initialization_multiplier': 1.7232253676729454}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 70 final loss: 0.03208913
Trial 71:
  Learning Rate: 0.027162739294348127
  Sigma Multiplier: 1.6810450904145575
  Initialization Multiplier: 1.8514844971217779
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 20.56it/s, loss=-0.000104, elapsed time=0.05, total time=7.58]
[I 2025-06-07 22:12:42,423] Trial 71 finished with value: -0.00010358746478931466 and parameters: {'learning_rate': 0.027162739294348127, 'sigma_multiplier': 1.6810450904145575, 'num_layers': 2, 'initialization_multiplier': 1.8514844971217779}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 71 final loss: -0.00010359
Trial 72:
  Learning Rate: 0.058256064289291275
  Sigma Multiplier: 1.8656552730747247
  Initialization Multiplier: 1.9226804811389857
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 21.51it/s, loss=-0.000176, elapsed time=0.04, total time=7.32]
[I 2025-06-07 22:12:49,782] Trial 72 finished with value: -0.00017636715655907509 and parameters: {'learning_rate': 0.058256064289291275, 'sigma_multiplier': 1.8656552730747247, 'num_layers': 2, 'initialization_multiplier': 1.9226804811389857}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 72 final loss: -0.00017637
Trial 73:
  Learning Rate: 0.02190441277645796
  Sigma Multiplier: 1.944216782910633
  Initialization Multiplier: 1.668763959607582
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 21.72it/s, loss=0.000119, elapsed time=0.03, total time=7.24] 
[I 2025-06-07 22:12:57,054] Trial 73 finished with value: 0.00011907601183458378 and parameters: {'learning_rate': 0.02190441277645796, 'sigma_multiplier': 1.944216782910633, 'num_layers': 2, 'initialization_multiplier': 1.668763959607582}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 73 final loss: 0.00011908
Trial 74:
  Learning Rate: 0.08192495579694715
  Sigma Multiplier: 1.7701231354966986
  Initialization Multiplier: 1.775796964208485
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 21.01it/s, loss=-0.000072, elapsed time=0.04, total time=7.51]
[I 2025-06-07 22:13:04,642] Trial 74 finished with value: -7.199637142557443e-05 and parameters: {'learning_rate': 0.08192495579694715, 'sigma_multiplier': 1.7701231354966986, 'num_layers': 2, 'initialization_multiplier': 1.775796964208485}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 74 final loss: -0.00007200
Trial 75:
  Learning Rate: 0.0495116698727127
  Sigma Multiplier: 1.7234133654495243
  Initialization Multiplier: 1.5774178869589892
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 20.54it/s, loss=-0.000007, elapsed time=0.05, total time=7.62]
[I 2025-06-07 22:13:12,302] Trial 75 finished with value: -6.858433220558166e-06 and parameters: {'learning_rate': 0.0495116698727127, 'sigma_multiplier': 1.7234133654495243, 'num_layers': 2, 'initialization_multiplier': 1.5774178869589892}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 75 final loss: -0.00000686
Trial 76:
  Learning Rate: 0.015392511344347807
  Sigma Multiplier: 0.8795183903375874
  Initialization Multiplier: 1.855500441375747
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 19.04it/s, loss=0.001264, elapsed time=0.03, total time=8.15]
[I 2025-06-07 22:13:20,485] Trial 76 finished with value: 0.0012641918163020169 and parameters: {'learning_rate': 0.015392511344347807, 'sigma_multiplier': 0.8795183903375874, 'num_layers': 1, 'initialization_multiplier': 1.855500441375747}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 76 final loss: 0.00126419
Trial 77:
  Learning Rate: 0.009691045950539448
  Sigma Multiplier: 1.907697069800095
  Initialization Multiplier: 1.937554318925241
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 21.70it/s, loss=-0.000170, elapsed time=0.06, total time=7.12]
[I 2025-06-07 22:13:27,658] Trial 77 finished with value: -0.00017048766582467188 and parameters: {'learning_rate': 0.009691045950539448, 'sigma_multiplier': 1.907697069800095, 'num_layers': 2, 'initialization_multiplier': 1.937554318925241}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 77 final loss: -0.00017049
Trial 78:
  Learning Rate: 0.004311920558865493
  Sigma Multiplier: 1.8435565277820678
  Initialization Multiplier: 1.741160098963658
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 18.78it/s, loss=-0.000311, elapsed time=0.03, total time=8.3] 
[I 2025-06-07 22:13:35,998] Trial 78 finished with value: -0.0003105811832953188 and parameters: {'learning_rate': 0.004311920558865493, 'sigma_multiplier': 1.8435565277820678, 'num_layers': 3, 'initialization_multiplier': 1.741160098963658}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 78 final loss: -0.00031058
Trial 79:
  Learning Rate: 0.03300167442491218
  Sigma Multiplier: 1.6279014921038732
  Initialization Multiplier: 1.8089507247802457
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 21.64it/s, loss=-0.000048, elapsed time=0.03, total time=7.25]
[I 2025-06-07 22:13:43,281] Trial 79 finished with value: -4.785613241199123e-05 and parameters: {'learning_rate': 0.03300167442491218, 'sigma_multiplier': 1.6279014921038732, 'num_layers': 1, 'initialization_multiplier': 1.8089507247802457}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 79 final loss: -0.00004786
Trial 80:
  Learning Rate: 0.07199368315295626
  Sigma Multiplier: 1.523435508663694
  Initialization Multiplier: 1.6853858489223021
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 20.65it/s, loss=0.000006, elapsed time=0.04, total time=7.58] 
[I 2025-06-07 22:13:50,898] Trial 80 finished with value: 5.711108758201248e-06 and parameters: {'learning_rate': 0.07199368315295626, 'sigma_multiplier': 1.523435508663694, 'num_layers': 2, 'initialization_multiplier': 1.6853858489223021}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 80 final loss: 0.00000571
Trial 81:
  Learning Rate: 0.06211510433265009
  Sigma Multiplier: 1.43682078485947
  Initialization Multiplier: 1.2919897533769555
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 17.43it/s, loss=0.000260, elapsed time=0.06, total time=8.98] 
[I 2025-06-07 22:13:59,920] Trial 81 finished with value: 0.0002602493489196517 and parameters: {'learning_rate': 0.06211510433265009, 'sigma_multiplier': 1.43682078485947, 'num_layers': 3, 'initialization_multiplier': 1.2919897533769555}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 81 final loss: 0.00026025
Trial 82:
  Learning Rate: 0.08571185801014838
  Sigma Multiplier: 1.554191617437629
  Initialization Multiplier: 1.321942944294464
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 18.67it/s, loss=-0.000199, elapsed time=0.05, total time=8.45]
[I 2025-06-07 22:14:08,419] Trial 82 finished with value: -0.00019905778528085845 and parameters: {'learning_rate': 0.08571185801014838, 'sigma_multiplier': 1.554191617437629, 'num_layers': 3, 'initialization_multiplier': 1.321942944294464}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 82 final loss: -0.00019906
Trial 83:
  Learning Rate: 0.04689120736878944
  Sigma Multiplier: 1.4927155262722849
  Initialization Multiplier: 1.4570392111342405
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 20.22it/s, loss=-0.000026, elapsed time=0.05, total time=7.79]
[I 2025-06-07 22:14:16,242] Trial 83 finished with value: -2.6030873230600062e-05 and parameters: {'learning_rate': 0.04689120736878944, 'sigma_multiplier': 1.4927155262722849, 'num_layers': 2, 'initialization_multiplier': 1.4570392111342405}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 83 final loss: -0.00002603
Trial 84:
  Learning Rate: 0.03596265190449891
  Sigma Multiplier: 1.6870956192073387
  Initialization Multiplier: 0.9948917216517178
  Number of Layers: 4


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 17.13it/s, loss=-0.000286, elapsed time=0.06, total time=9.08]
[I 2025-06-07 22:14:25,387] Trial 84 finished with value: -0.000286335418588381 and parameters: {'learning_rate': 0.03596265190449891, 'sigma_multiplier': 1.6870956192073387, 'num_layers': 4, 'initialization_multiplier': 0.9948917216517178}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 84 final loss: -0.00028634
Trial 85:
  Learning Rate: 0.06244009024715363
  Sigma Multiplier: 1.1392617346536447
  Initialization Multiplier: 0.0032607436962678316
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 17.12it/s, loss=-0.000021, elapsed time=0.05, total time=9.05]
[I 2025-06-07 22:14:34,504] Trial 85 finished with value: -2.1237891867075123e-05 and parameters: {'learning_rate': 0.06244009024715363, 'sigma_multiplier': 1.1392617346536447, 'num_layers': 3, 'initialization_multiplier': 0.0032607436962678316}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 85 final loss: -0.00002124
Trial 86:
  Learning Rate: 0.028164924676318842
  Sigma Multiplier: 1.7922077013877578
  Initialization Multiplier: 1.953429094491557
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.39it/s, loss=-0.000153, elapsed time=0.04, total time=6.98]
[I 2025-06-07 22:14:41,513] Trial 86 finished with value: -0.0001532615872555898 and parameters: {'learning_rate': 0.028164924676318842, 'sigma_multiplier': 1.7922077013877578, 'num_layers': 2, 'initialization_multiplier': 1.953429094491557}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 86 final loss: -0.00015326
Trial 87:
  Learning Rate: 0.022872642202796622
  Sigma Multiplier: 1.4044530738163339
  Initialization Multiplier: 1.1877380263836947
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 20.32it/s, loss=-0.000256, elapsed time=0.04, total time=7.63]
[I 2025-06-07 22:14:49,175] Trial 87 finished with value: -0.00025607627895180235 and parameters: {'learning_rate': 0.022872642202796622, 'sigma_multiplier': 1.4044530738163339, 'num_layers': 2, 'initialization_multiplier': 1.1877380263836947}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 87 final loss: -0.00025608
Trial 88:
  Learning Rate: 0.09989733047770494
  Sigma Multiplier: 1.469662371434616
  Initialization Multiplier: 1.5420814314123168
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.62it/s, loss=-0.000047, elapsed time=0.04, total time=6.68]
[I 2025-06-07 22:14:55,881] Trial 88 finished with value: -4.657083968518515e-05 and parameters: {'learning_rate': 0.09989733047770494, 'sigma_multiplier': 1.469662371434616, 'num_layers': 1, 'initialization_multiplier': 1.5420814314123168}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 88 final loss: -0.00004657
Trial 89:
  Learning Rate: 0.07585406053011139
  Sigma Multiplier: 1.7174402390563577
  Initialization Multiplier: 1.056216366255758
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 21.04it/s, loss=-0.000276, elapsed time=0.03, total time=7.47]
[I 2025-06-07 22:15:03,391] Trial 89 finished with value: -0.00027648730396646003 and parameters: {'learning_rate': 0.07585406053011139, 'sigma_multiplier': 1.7174402390563577, 'num_layers': 2, 'initialization_multiplier': 1.056216366255758}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 89 final loss: -0.00027649
Trial 90:
  Learning Rate: 0.04136174984844723
  Sigma Multiplier: 1.6195075552129028
  Initialization Multiplier: 1.2368628726264244
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 21.12it/s, loss=-0.000308, elapsed time=0.04, total time=7.37]
[I 2025-06-07 22:15:10,799] Trial 90 finished with value: -0.0003083329513299154 and parameters: {'learning_rate': 0.04136174984844723, 'sigma_multiplier': 1.6195075552129028, 'num_layers': 2, 'initialization_multiplier': 1.2368628726264244}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 90 final loss: -0.00030833
Trial 91:
  Learning Rate: 0.019272112105533857
  Sigma Multiplier: 1.3007612965567663
  Initialization Multiplier: 1.8869470466528115
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 18.46it/s, loss=-0.000196, elapsed time=0.05, total time=8.37]
[I 2025-06-07 22:15:19,206] Trial 91 finished with value: -0.00019572086878849726 and parameters: {'learning_rate': 0.019272112105533857, 'sigma_multiplier': 1.3007612965567663, 'num_layers': 3, 'initialization_multiplier': 1.8869470466528115}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 91 final loss: -0.00019572
Trial 92:
  Learning Rate: 0.007113139254216233
  Sigma Multiplier: 1.2374263312368892
  Initialization Multiplier: 1.9098768237994816
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 18.18it/s, loss=-0.000052, elapsed time=0.04, total time=8.61]
[I 2025-06-07 22:15:27,856] Trial 92 finished with value: -5.188850415356144e-05 and parameters: {'learning_rate': 0.007113139254216233, 'sigma_multiplier': 1.2374263312368892, 'num_layers': 3, 'initialization_multiplier': 1.9098768237994816}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 92 final loss: -0.00005189
Trial 93:
  Learning Rate: 0.01572011032102045
  Sigma Multiplier: 1.356076191154326
  Initialization Multiplier: 1.7488003363098965
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 18.53it/s, loss=0.000131, elapsed time=0.05, total time=8.46] 
[I 2025-06-07 22:15:36,353] Trial 93 finished with value: 0.00013141579043722174 and parameters: {'learning_rate': 0.01572011032102045, 'sigma_multiplier': 1.356076191154326, 'num_layers': 3, 'initialization_multiplier': 1.7488003363098965}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 93 final loss: 0.00013142
Trial 94:
  Learning Rate: 0.054811570880095965
  Sigma Multiplier: 1.3111706545583046
  Initialization Multiplier: 1.8219423740140779
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 18.76it/s, loss=0.000209, elapsed time=0.04, total time=8.26] 
[I 2025-06-07 22:15:44,652] Trial 94 finished with value: 0.00020929658927333673 and parameters: {'learning_rate': 0.054811570880095965, 'sigma_multiplier': 1.3111706545583046, 'num_layers': 3, 'initialization_multiplier': 1.8219423740140779}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 94 final loss: 0.00020930
Trial 95:
  Learning Rate: 0.01152949459053567
  Sigma Multiplier: 1.9613160737054467
  Initialization Multiplier: 1.6071889363287606
  Number of Layers: 4


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 17.04it/s, loss=-0.000151, elapsed time=0.06, total time=9.05]
[I 2025-06-07 22:15:53,752] Trial 95 finished with value: -0.00015077194850325655 and parameters: {'learning_rate': 0.01152949459053567, 'sigma_multiplier': 1.9613160737054467, 'num_layers': 4, 'initialization_multiplier': 1.6071889363287606}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 95 final loss: -0.00015077
Trial 96:
  Learning Rate: 0.002024953847980762
  Sigma Multiplier: 1.8822128191699552
  Initialization Multiplier: 1.4017910456512914
  Number of Layers: 4


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 16.81it/s, loss=0.001459, elapsed time=0.06, total time=9.25]
[I 2025-06-07 22:16:03,065] Trial 96 finished with value: 0.0014594912734652485 and parameters: {'learning_rate': 0.002024953847980762, 'sigma_multiplier': 1.8822128191699552, 'num_layers': 4, 'initialization_multiplier': 1.4017910456512914}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 96 final loss: 0.00145949
Trial 97:
  Learning Rate: 0.03139565349442192
  Sigma Multiplier: 1.5627346111731033
  Initialization Multiplier: 1.8777347274190936
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 20.45it/s, loss=-0.000206, elapsed time=0.04, total time=7.62]
[I 2025-06-07 22:16:10,729] Trial 97 finished with value: -0.00020586225034842725 and parameters: {'learning_rate': 0.03139565349442192, 'sigma_multiplier': 1.5627346111731033, 'num_layers': 2, 'initialization_multiplier': 1.8777347274190936}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 97 final loss: -0.00020586
Trial 98:
  Learning Rate: 0.02485282260747327
  Sigma Multiplier: 1.481228928450545
  Initialization Multiplier: 1.9715520167214606
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 18.28it/s, loss=-0.000351, elapsed time=0.04, total time=8.47]
[I 2025-06-07 22:16:19,240] Trial 98 finished with value: -0.00035082656698641153 and parameters: {'learning_rate': 0.02485282260747327, 'sigma_multiplier': 1.481228928450545, 'num_layers': 3, 'initialization_multiplier': 1.9715520167214606}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 98 final loss: -0.00035083
Trial 99:
  Learning Rate: 0.06686125067211865
  Sigma Multiplier: 1.7385674997693565
  Initialization Multiplier: 1.9613485933236798
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 21.29it/s, loss=-0.000133, elapsed time=0.03, total time=7.4] 
[I 2025-06-07 22:16:26,676] Trial 99 finished with value: -0.00013263673188893216 and parameters: {'learning_rate': 0.06686125067211865, 'sigma_multiplier': 1.7385674997693565, 'num_layers': 2, 'initialization_multiplier': 1.9613485933236798}. Best is trial 27 with value: -0.00047425718078470113.


Training has not converged after 150 steps
Trial 99 final loss: -0.00013264


In [21]:
best_hyperparams = study.best_params
best_loss_value = study.best_value

print("\nOptimization Finished!")
print(f"Best hyperparameters found: {best_hyperparams}")
print(f"Best loss value: {best_loss_value}")


Optimization Finished!
Best hyperparameters found: {'learning_rate': 0.02167126748942985, 'sigma_multiplier': 1.5788824264871768, 'num_layers': 2, 'initialization_multiplier': 1.799034414697425}
Best loss value: -0.00047425718078470113


In [22]:
LR = best_hyperparams['learning_rate']
SIGMA_M = best_hyperparams['sigma_multiplier']
NUM_LAYERS = best_hyperparams['num_layers']
INIT_M = best_hyperparams['initialization_multiplier']

In [23]:
def train_on_dataset(dataset=train_ds):
    grid_conn= aachen_connectivity()
    num_qubits = NODES * (NODES - 1) // 2
    gates = efficient_connectivity_gates(grid_conn, num_qubits, NUM_LAYERS)
    
    circuit = iqp.IqpSimulator(num_qubits, gates, device="lightning.qubit")
    
    initial_params = initialize_from_data(gates, dataset) * INIT_M
    loss = iqp.gen_qml.mmd_loss_iqp
    learning_rate = LR
    sigma = median_heuristic(dataset) * SIGMA_M
    
    loss_kwarg = {
        "params": initial_params,
        "iqp_circuit": circuit,
        "ground_truth": dataset,
        "sigma": [sigma],
        "n_ops": 2000,
        "n_samples": 2000,
        "key": jax.random.PRNGKey(42),
    }
    
    trainer = iqp.Trainer("Adam", loss, stepsize=learning_rate)
    trainer.train(n_iters= 2000,loss_kwargs=loss_kwarg, turbo=1)
    
    return trainer.final_params

In [24]:
params = train_on_dataset(train_ds)

Training Progress: 100%|██████████| 2000/2000 [01:25<00:00, 23.44it/s, loss=-0.000423, elapsed time=0.04, total time=85.7]

Training has not converged after 2000 steps





In [25]:
import numpy as np
np.save(f'./results/params/params_{NODES}N_{TYPE}_{CONN}_LR{LR}_SIGMA{SIGMA_M}_INIT{INIT_M}_MAX_WEIGHT{NUM_LAYERS}.npy', params)