# HPO

In [1]:
import iqpopt as iqp
from iqpopt.utils import initialize_from_data, local_gates
import iqpopt.gen_qml as genq
from iqpopt.gen_qml.utils import median_heuristic
import optuna
import pennylane as qml
import jax
from jax import numpy as jnp
from utils.nisq import aachen_connectivity, efficient_connectivity_gates
from datasets.bipartites import BipartiteGraphDataset
from datasets.er import ErdosRenyiGraphDataset
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
NODES = 10
TYPE = "Bipartite"
CONN = "Sparse"
NUM_LAYERS = 1
QUBITS = NODES * (NODES - 1) //2

In [3]:
ds_path = f'./datasets/raw_data/{NODES}N_{TYPE}_{CONN}.pkl'
train_ds = jnp.array(BipartiteGraphDataset(nodes = 1, edge_prob=0.1).from_file(ds_path).vectors.copy())

[Dataset] Loaded 473 samples from ./datasets/raw_data/10N_Bipartite_Sparse.pkl
  Created: 2025-05-30T13:15:39.349125
  Unique graphs: 473
  Version: 1.0


In [4]:
grid_conn = aachen_connectivity()
gates = efficient_connectivity_gates(grid_conn, QUBITS, 1) 
circ = iqp.IqpSimulator(QUBITS, gates, device='lightning.qubit')

base_key = jax.random.PRNGKey(42)

In [5]:
base_sigma = median_heuristic(train_ds)

In [6]:
from utils.hpo import run_hpo

In [7]:
study = run_hpo(
    grid_conn,
    QUBITS,
    base_sigma,
    train_ds = train_ds,
    n_trials = 1000,
    n_iters_hpo = 150,
    n_ops = 2000,
    n_samples = 2000,
)

[I 2025-06-07 12:35:39,396] A new study created in memory with name: no-name-b129ab4d-6a9f-419c-943a-539c7fbb9e1a


Trial 0:
  Learning Rate: 6.935503216398411e-05
  Sigma Multiplier: 0.4412836533228413
  Initialization Multiplier: 1.3825780687684395
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.00it/s, loss=0.085865, elapsed time=0.06, total time=13.8]
[I 2025-06-07 12:35:53,795] Trial 0 finished with value: 0.08586518143064445 and parameters: {'learning_rate': 6.935503216398411e-05, 'sigma_multiplier': 0.4412836533228413, 'num_layers': 2, 'initialization_multiplier': 1.3825780687684395}. Best is trial 0 with value: 0.08586518143064445.


Training has not converged after 150 steps
Trial 0 final loss: 0.08586518
Trial 1:
  Learning Rate: 0.0004503969869841552
  Sigma Multiplier: 0.6893098576089395
  Initialization Multiplier: 0.1635306173615728
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.91it/s, loss=0.045908, elapsed time=0.06, total time=10.3]
[I 2025-06-07 12:36:04,182] Trial 1 finished with value: 0.04590808221316834 and parameters: {'learning_rate': 0.0004503969869841552, 'sigma_multiplier': 0.6893098576089395, 'num_layers': 2, 'initialization_multiplier': 0.1635306173615728}. Best is trial 1 with value: 0.04590808221316834.


Training has not converged after 150 steps
Trial 1 final loss: 0.04590808
Trial 2:
  Learning Rate: 8.729806907676517e-05
  Sigma Multiplier: 0.1096369182249704
  Initialization Multiplier: 1.8475919564360936
  Number of Layers: 4


Training Progress: 100%|██████████| 150/150 [00:18<00:00,  8.32it/s, loss=0.000003, elapsed time=0.12, total time=18.3] 
[I 2025-06-07 12:36:22,694] Trial 2 finished with value: 2.930511707352784e-06 and parameters: {'learning_rate': 8.729806907676517e-05, 'sigma_multiplier': 0.1096369182249704, 'num_layers': 4, 'initialization_multiplier': 1.8475919564360936}. Best is trial 2 with value: 2.930511707352784e-06.


Training has not converged after 150 steps
Trial 2 final loss: 0.00000293
Trial 3:
  Learning Rate: 0.03259557303412773
  Sigma Multiplier: 0.17564814509798837
  Initialization Multiplier: 1.7779550803245565
  Number of Layers: 5


Training Progress: 100%|██████████| 150/150 [00:20<00:00,  7.40it/s, loss=-0.000067, elapsed time=0.12, total time=20.8]
[I 2025-06-07 12:36:43,636] Trial 3 finished with value: -6.728708824589416e-05 and parameters: {'learning_rate': 0.03259557303412773, 'sigma_multiplier': 0.17564814509798837, 'num_layers': 5, 'initialization_multiplier': 1.7779550803245565}. Best is trial 3 with value: -6.728708824589416e-05.


Training has not converged after 150 steps
Trial 3 final loss: -0.00006729
Trial 4:
  Learning Rate: 0.0020988318194136916
  Sigma Multiplier: 0.9816040453773254
  Initialization Multiplier: 0.6470935599256601
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.94it/s, loss=-0.000274, elapsed time=0.06, total time=9.81]
[I 2025-06-07 12:36:53,532] Trial 4 finished with value: -0.0002739699475439886 and parameters: {'learning_rate': 0.0020988318194136916, 'sigma_multiplier': 0.9816040453773254, 'num_layers': 1, 'initialization_multiplier': 0.6470935599256601}. Best is trial 4 with value: -0.0002739699475439886.


Training has not converged after 150 steps
Trial 4 final loss: -0.00027397
Trial 5:
  Learning Rate: 0.012961055154117739
  Sigma Multiplier: 0.7537358141801683
  Initialization Multiplier: 1.894349747886663
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.00it/s, loss=0.000079, elapsed time=0.05, total time=11]  
[I 2025-06-07 12:37:04,573] Trial 5 finished with value: 7.932762713785821e-05 and parameters: {'learning_rate': 0.012961055154117739, 'sigma_multiplier': 0.7537358141801683, 'num_layers': 1, 'initialization_multiplier': 1.894349747886663}. Best is trial 4 with value: -0.0002739699475439886.


Training has not converged after 150 steps
Trial 5 final loss: 0.00007933
Trial 6:
  Learning Rate: 0.001540571904798215
  Sigma Multiplier: 1.472753281514465
  Initialization Multiplier: 0.8279204339529765
  Number of Layers: 4


Training Progress: 100%|██████████| 150/150 [00:14<00:00, 10.23it/s, loss=0.011734, elapsed time=0.08, total time=15]  
[I 2025-06-07 12:37:19,632] Trial 6 finished with value: 0.011734189946271468 and parameters: {'learning_rate': 0.001540571904798215, 'sigma_multiplier': 1.472753281514465, 'num_layers': 4, 'initialization_multiplier': 0.8279204339529765}. Best is trial 4 with value: -0.0002739699475439886.


Training has not converged after 150 steps
Trial 6 final loss: 0.01173419
Trial 7:
  Learning Rate: 6.909710333814888e-05
  Sigma Multiplier: 1.4955849574834732
  Initialization Multiplier: 0.03783925898591827
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.36it/s, loss=0.083156, elapsed time=0.07, total time=10.1]
[I 2025-06-07 12:37:29,724] Trial 7 finished with value: 0.08315623430177585 and parameters: {'learning_rate': 6.909710333814888e-05, 'sigma_multiplier': 1.4955849574834732, 'num_layers': 2, 'initialization_multiplier': 0.03783925898591827}. Best is trial 4 with value: -0.0002739699475439886.


Training has not converged after 150 steps
Trial 7 final loss: 0.08315623
Trial 8:
  Learning Rate: 0.00018490879919956768
  Sigma Multiplier: 0.7582374424488165
  Initialization Multiplier: 1.9129659518436648
  Number of Layers: 4


Training Progress: 100%|██████████| 150/150 [00:17<00:00,  8.78it/s, loss=0.138716, elapsed time=0.11, total time=17.4]
[I 2025-06-07 12:37:47,195] Trial 8 finished with value: 0.13871582003327854 and parameters: {'learning_rate': 0.00018490879919956768, 'sigma_multiplier': 0.7582374424488165, 'num_layers': 4, 'initialization_multiplier': 1.9129659518436648}. Best is trial 4 with value: -0.0002739699475439886.


Training has not converged after 150 steps
Trial 8 final loss: 0.13871582
Trial 9:
  Learning Rate: 0.010443273466725597
  Sigma Multiplier: 1.278693691321831
  Initialization Multiplier: 0.09674668654126828
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.33it/s, loss=-0.000227, elapsed time=0.06, total time=12.6]
[I 2025-06-07 12:37:59,980] Trial 9 finished with value: -0.00022656437555830294 and parameters: {'learning_rate': 0.010443273466725597, 'sigma_multiplier': 1.278693691321831, 'num_layers': 3, 'initialization_multiplier': 0.09674668654126828}. Best is trial 4 with value: -0.0002739699475439886.


Training has not converged after 150 steps
Trial 9 final loss: -0.00022656
Trial 10:
  Learning Rate: 0.0023414740349941704
  Sigma Multiplier: 1.8160719886395822
  Initialization Multiplier: 0.6773856986937976
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 18.70it/s, loss=-0.000146, elapsed time=0.07, total time=8.32]
[I 2025-06-07 12:38:08,334] Trial 10 finished with value: -0.00014609374324446828 and parameters: {'learning_rate': 0.0023414740349941704, 'sigma_multiplier': 1.8160719886395822, 'num_layers': 1, 'initialization_multiplier': 0.6773856986937976}. Best is trial 4 with value: -0.0002739699475439886.


Training has not converged after 150 steps
Trial 10 final loss: -0.00014609
Trial 11:
  Learning Rate: 0.00601628639667007
  Sigma Multiplier: 1.2280720951257398
  Initialization Multiplier: 0.46141554017996406
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 11.68it/s, loss=-0.000404, elapsed time=0.08, total time=13.2]
[I 2025-06-07 12:38:21,529] Trial 11 finished with value: -0.00040403594016146156 and parameters: {'learning_rate': 0.00601628639667007, 'sigma_multiplier': 1.2280720951257398, 'num_layers': 3, 'initialization_multiplier': 0.46141554017996406}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 11 final loss: -0.00040404
Trial 12:
  Learning Rate: 0.0031043252958263623
  Sigma Multiplier: 1.091922720235438
  Initialization Multiplier: 0.49259637275970314
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:13<00:00, 11.18it/s, loss=-0.000323, elapsed time=0.11, total time=13.7]
[I 2025-06-07 12:38:35,293] Trial 12 finished with value: -0.00032295688029716774 and parameters: {'learning_rate': 0.0031043252958263623, 'sigma_multiplier': 1.091922720235438, 'num_layers': 3, 'initialization_multiplier': 0.49259637275970314}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 12 final loss: -0.00032296
Trial 13:
  Learning Rate: 0.0788773545858736
  Sigma Multiplier: 1.3052103512228208
  Initialization Multiplier: 0.342572899647249
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 11.71it/s, loss=-0.000209, elapsed time=0.06, total time=13.1]
[I 2025-06-07 12:38:48,457] Trial 13 finished with value: -0.0002087590084874756 and parameters: {'learning_rate': 0.0788773545858736, 'sigma_multiplier': 1.3052103512228208, 'num_layers': 3, 'initialization_multiplier': 0.342572899647249}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 13 final loss: -0.00020876
Trial 14:
  Learning Rate: 0.006531540639123406
  Sigma Multiplier: 1.9600734474114039
  Initialization Multiplier: 1.1540066773221667
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.02it/s, loss=-0.000186, elapsed time=0.08, total time=12.8]
[I 2025-06-07 12:39:01,287] Trial 14 finished with value: -0.00018645537345529463 and parameters: {'learning_rate': 0.006531540639123406, 'sigma_multiplier': 1.9600734474114039, 'num_layers': 3, 'initialization_multiplier': 1.1540066773221667}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 14 final loss: -0.00018646
Trial 15:
  Learning Rate: 0.0007048334204669984
  Sigma Multiplier: 1.1101875061675435
  Initialization Multiplier: 0.43589733609211123
  Number of Layers: 5


Training Progress: 100%|██████████| 150/150 [00:18<00:00,  8.20it/s, loss=0.018043, elapsed time=0.1, total time=18.5] 
[I 2025-06-07 12:39:19,887] Trial 15 finished with value: 0.018042540736518092 and parameters: {'learning_rate': 0.0007048334204669984, 'sigma_multiplier': 1.1101875061675435, 'num_layers': 5, 'initialization_multiplier': 0.43589733609211123}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 15 final loss: 0.01804254
Trial 16:
  Learning Rate: 0.0052520322411228415
  Sigma Multiplier: 1.6843969047841196
  Initialization Multiplier: 1.0362315688955381
  Number of Layers: 4


Training Progress: 100%|██████████| 150/150 [00:14<00:00, 10.28it/s, loss=-0.000155, elapsed time=0.09, total time=14.9]
[I 2025-06-07 12:39:34,810] Trial 16 finished with value: -0.0001547980462264047 and parameters: {'learning_rate': 0.0052520322411228415, 'sigma_multiplier': 1.6843969047841196, 'num_layers': 4, 'initialization_multiplier': 1.0362315688955381}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 16 final loss: -0.00015480
Trial 17:
  Learning Rate: 0.025534332389243415
  Sigma Multiplier: 0.9628062704707541
  Initialization Multiplier: 0.39294181503442877
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.41it/s, loss=-0.000307, elapsed time=0.07, total time=11.4]
[I 2025-06-07 12:39:46,304] Trial 17 finished with value: -0.00030677684405406235 and parameters: {'learning_rate': 0.025534332389243415, 'sigma_multiplier': 0.9628062704707541, 'num_layers': 2, 'initialization_multiplier': 0.39294181503442877}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 17 final loss: -0.00030678
Trial 18:
  Learning Rate: 0.0006411625603632361
  Sigma Multiplier: 1.1897144024640798
  Initialization Multiplier: 0.6165926935313628
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 11.83it/s, loss=0.036659, elapsed time=0.07, total time=12.9]
[I 2025-06-07 12:39:59,290] Trial 18 finished with value: 0.0366585088183388 and parameters: {'learning_rate': 0.0006411625603632361, 'sigma_multiplier': 1.1897144024640798, 'num_layers': 3, 'initialization_multiplier': 0.6165926935313628}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 18 final loss: 0.03665851
Trial 19:
  Learning Rate: 0.003950766486571736
  Sigma Multiplier: 1.5405221234866646
  Initialization Multiplier: 1.371375137590764
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 11.84it/s, loss=-0.000235, elapsed time=0.08, total time=13]  
[I 2025-06-07 12:40:12,364] Trial 19 finished with value: -0.00023514756789297343 and parameters: {'learning_rate': 0.003950766486571736, 'sigma_multiplier': 1.5405221234866646, 'num_layers': 3, 'initialization_multiplier': 1.371375137590764}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 19 final loss: -0.00023515
Trial 20:
  Learning Rate: 0.020224874120187128
  Sigma Multiplier: 0.5448051855454472
  Initialization Multiplier: 0.25481052007570726
  Number of Layers: 4


Training Progress: 100%|██████████| 150/150 [00:17<00:00,  8.44it/s, loss=0.000123, elapsed time=0.09, total time=18.1]
[I 2025-06-07 12:40:30,545] Trial 20 finished with value: 0.00012299931126798623 and parameters: {'learning_rate': 0.020224874120187128, 'sigma_multiplier': 0.5448051855454472, 'num_layers': 4, 'initialization_multiplier': 0.25481052007570726}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 20 final loss: 0.00012300
Trial 21:
  Learning Rate: 0.03618575064067154
  Sigma Multiplier: 0.9852373280759166
  Initialization Multiplier: 0.4649000571177765
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.77it/s, loss=-0.000284, elapsed time=0.06, total time=12.1]
[I 2025-06-07 12:40:42,642] Trial 21 finished with value: -0.00028412622606069495 and parameters: {'learning_rate': 0.03618575064067154, 'sigma_multiplier': 0.9852373280759166, 'num_layers': 2, 'initialization_multiplier': 0.4649000571177765}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 21 final loss: -0.00028413
Trial 22:
  Learning Rate: 0.04859109593949148
  Sigma Multiplier: 0.9064556106199673
  Initialization Multiplier: 0.8467884287537462
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.55it/s, loss=-0.000065, elapsed time=0.08, total time=12.2]
[I 2025-06-07 12:40:54,922] Trial 22 finished with value: -6.527172042888055e-05 and parameters: {'learning_rate': 0.04859109593949148, 'sigma_multiplier': 0.9064556106199673, 'num_layers': 2, 'initialization_multiplier': 0.8467884287537462}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 22 final loss: -0.00006527
Trial 23:
  Learning Rate: 0.0139678687744889
  Sigma Multiplier: 1.3464788125874865
  Initialization Multiplier: 0.5355061134964305
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.35it/s, loss=-0.000243, elapsed time=0.07, total time=10.7]
[I 2025-06-07 12:41:05,683] Trial 23 finished with value: -0.00024262488217650525 and parameters: {'learning_rate': 0.0139678687744889, 'sigma_multiplier': 1.3464788125874865, 'num_layers': 2, 'initialization_multiplier': 0.5355061134964305}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 23 final loss: -0.00024262
Trial 24:
  Learning Rate: 0.09921228097444976
  Sigma Multiplier: 1.117584119735634
  Initialization Multiplier: 0.2715099983565834
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:13<00:00, 11.36it/s, loss=0.000034, elapsed time=0.15, total time=13.5] 
[I 2025-06-07 12:41:19,256] Trial 24 finished with value: 3.387575873658467e-05 and parameters: {'learning_rate': 0.09921228097444976, 'sigma_multiplier': 1.117584119735634, 'num_layers': 3, 'initialization_multiplier': 0.2715099983565834}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 24 final loss: 0.00003388
Trial 25:
  Learning Rate: 0.0012231710805051103
  Sigma Multiplier: 0.8896824045873469
  Initialization Multiplier: 0.7520938226250309
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.98it/s, loss=0.010910, elapsed time=0.06, total time=11.9]
[I 2025-06-07 12:41:31,205] Trial 25 finished with value: 0.010910261210857045 and parameters: {'learning_rate': 0.0012231710805051103, 'sigma_multiplier': 0.8896824045873469, 'num_layers': 2, 'initialization_multiplier': 0.7520938226250309}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 25 final loss: 0.01091026
Trial 26:
  Learning Rate: 0.0032775414420593493
  Sigma Multiplier: 0.6047243281404783
  Initialization Multiplier: 0.38142888822229903
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:15<00:00,  9.85it/s, loss=0.000197, elapsed time=0.09, total time=15.6] 
[I 2025-06-07 12:41:46,832] Trial 26 finished with value: 0.00019699188606328303 and parameters: {'learning_rate': 0.0032775414420593493, 'sigma_multiplier': 0.6047243281404783, 'num_layers': 3, 'initialization_multiplier': 0.38142888822229903}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 26 final loss: 0.00019699
Trial 27:
  Learning Rate: 0.008406930090508581
  Sigma Multiplier: 0.3742174638099932
  Initialization Multiplier: 0.9256013610432375
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.07it/s, loss=0.000535, elapsed time=0.06, total time=12.7]
[I 2025-06-07 12:41:59,588] Trial 27 finished with value: 0.0005354823576969233 and parameters: {'learning_rate': 0.008406930090508581, 'sigma_multiplier': 0.3742174638099932, 'num_layers': 1, 'initialization_multiplier': 0.9256013610432375}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 27 final loss: 0.00053548
Trial 28:
  Learning Rate: 0.02231684357074658
  Sigma Multiplier: 1.1896006677690656
  Initialization Multiplier: 0.20611172294311747
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.54it/s, loss=-0.000233, elapsed time=0.05, total time=11.5]
[I 2025-06-07 12:42:11,089] Trial 28 finished with value: -0.00023316602909782887 and parameters: {'learning_rate': 0.02231684357074658, 'sigma_multiplier': 1.1896006677690656, 'num_layers': 2, 'initialization_multiplier': 0.20611172294311747}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 28 final loss: -0.00023317
Trial 29:
  Learning Rate: 0.0050270762657906864
  Sigma Multiplier: 0.8999545320651758
  Initialization Multiplier: 1.2597142540442507
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:14<00:00, 10.56it/s, loss=0.000137, elapsed time=0.09, total time=14.5] 
[I 2025-06-07 12:42:25,665] Trial 29 finished with value: 0.00013729817685592826 and parameters: {'learning_rate': 0.0050270762657906864, 'sigma_multiplier': 0.8999545320651758, 'num_layers': 3, 'initialization_multiplier': 1.2597142540442507}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 29 final loss: 0.00013730
Trial 30:
  Learning Rate: 0.019460473600169448
  Sigma Multiplier: 0.42103324474157255
  Initialization Multiplier: 0.5320692759951572
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:16<00:00,  9.15it/s, loss=0.000792, elapsed time=0.09, total time=16.7]
[I 2025-06-07 12:42:42,437] Trial 30 finished with value: 0.0007922599249176321 and parameters: {'learning_rate': 0.019460473600169448, 'sigma_multiplier': 0.42103324474157255, 'num_layers': 3, 'initialization_multiplier': 0.5320692759951572}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 30 final loss: 0.00079226
Trial 31:
  Learning Rate: 0.04503596944115113
  Sigma Multiplier: 0.9861974266493174
  Initialization Multiplier: 0.41168385708773525
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.55it/s, loss=-0.000226, elapsed time=0.06, total time=11.4]
[I 2025-06-07 12:42:53,892] Trial 31 finished with value: -0.00022568021275996532 and parameters: {'learning_rate': 0.04503596944115113, 'sigma_multiplier': 0.9861974266493174, 'num_layers': 2, 'initialization_multiplier': 0.41168385708773525}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 31 final loss: -0.00022568
Trial 32:
  Learning Rate: 0.038776057352315124
  Sigma Multiplier: 1.0316835719637782
  Initialization Multiplier: 0.4937894224824023
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.50it/s, loss=-0.000276, elapsed time=0.06, total time=11.4]
[I 2025-06-07 12:43:05,367] Trial 32 finished with value: -0.000275726243885327 and parameters: {'learning_rate': 0.038776057352315124, 'sigma_multiplier': 1.0316835719637782, 'num_layers': 2, 'initialization_multiplier': 0.4937894224824023}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 32 final loss: -0.00027573
Trial 33:
  Learning Rate: 0.02621923044708749
  Sigma Multiplier: 0.7440483047399687
  Initialization Multiplier: 0.13678965912908647
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 11.94it/s, loss=0.000038, elapsed time=0.06, total time=12.8] 
[I 2025-06-07 12:43:18,256] Trial 33 finished with value: 3.8307643145158296e-05 and parameters: {'learning_rate': 0.02621923044708749, 'sigma_multiplier': 0.7440483047399687, 'num_layers': 2, 'initialization_multiplier': 0.13678965912908647}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 33 final loss: 0.00003831
Trial 34:
  Learning Rate: 3.555053602322373e-05
  Sigma Multiplier: 1.3772570018864156
  Initialization Multiplier: 1.6695262076289783
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 17.84it/s, loss=0.102114, elapsed time=0.04, total time=8.65]
[I 2025-06-07 12:43:26,931] Trial 34 finished with value: 0.10211409664683399 and parameters: {'learning_rate': 3.555053602322373e-05, 'sigma_multiplier': 1.3772570018864156, 'num_layers': 1, 'initialization_multiplier': 1.6695262076289783}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 34 final loss: 0.10211410
Trial 35:
  Learning Rate: 0.06402484540785403
  Sigma Multiplier: 0.835955375387138
  Initialization Multiplier: 0.6016180784441945
  Number of Layers: 4


Training Progress: 100%|██████████| 150/150 [00:16<00:00,  8.89it/s, loss=0.000396, elapsed time=0.1, total time=17.2]  
[I 2025-06-07 12:43:44,189] Trial 35 finished with value: 0.0003961378147011599 and parameters: {'learning_rate': 0.06402484540785403, 'sigma_multiplier': 0.835955375387138, 'num_layers': 4, 'initialization_multiplier': 0.6016180784441945}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 35 final loss: 0.00039614
Trial 36:
  Learning Rate: 0.00845387737899802
  Sigma Multiplier: 1.1678603376572692
  Initialization Multiplier: 0.32651278039555076
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.06it/s, loss=-0.000284, elapsed time=0.05, total time=11]  
[I 2025-06-07 12:43:55,182] Trial 36 finished with value: -0.00028421633099531645 and parameters: {'learning_rate': 0.00845387737899802, 'sigma_multiplier': 1.1678603376572692, 'num_layers': 2, 'initialization_multiplier': 0.32651278039555076}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 36 final loss: -0.00028422
Trial 37:
  Learning Rate: 0.0032117528008769737
  Sigma Multiplier: 1.201217402518478
  Initialization Multiplier: 0.015346810953481727
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 17.70it/s, loss=0.000799, elapsed time=0.04, total time=8.83]
[I 2025-06-07 12:44:04,043] Trial 37 finished with value: 0.0007988971466899698 and parameters: {'learning_rate': 0.0032117528008769737, 'sigma_multiplier': 1.201217402518478, 'num_layers': 1, 'initialization_multiplier': 0.015346810953481727}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 37 final loss: 0.00079890
Trial 38:
  Learning Rate: 0.0011125070046469868
  Sigma Multiplier: 1.4280865065326913
  Initialization Multiplier: 0.2704492008632767
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.86it/s, loss=-0.000195, elapsed time=0.09, total time=11.1]
[I 2025-06-07 12:44:15,214] Trial 38 finished with value: -0.00019481235293969864 and parameters: {'learning_rate': 0.0011125070046469868, 'sigma_multiplier': 1.4280865065326913, 'num_layers': 2, 'initialization_multiplier': 0.2704492008632767}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 38 final loss: -0.00019481
Trial 39:
  Learning Rate: 0.008014603505281315
  Sigma Multiplier: 1.5846212722828636
  Initialization Multiplier: 0.3367077203691803
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.54it/s, loss=-0.000292, elapsed time=0.07, total time=12.2]
[I 2025-06-07 12:44:27,466] Trial 39 finished with value: -0.00029199890245680104 and parameters: {'learning_rate': 0.008014603505281315, 'sigma_multiplier': 1.5846212722828636, 'num_layers': 3, 'initialization_multiplier': 0.3367077203691803}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 39 final loss: -0.00029200
Trial 40:
  Learning Rate: 0.012682147148384565
  Sigma Multiplier: 1.6062866008620102
  Initialization Multiplier: 0.7295943667243358
  Number of Layers: 4


Training Progress: 100%|██████████| 150/150 [00:14<00:00, 10.29it/s, loss=-0.000284, elapsed time=0.07, total time=14.8]
[I 2025-06-07 12:44:42,377] Trial 40 finished with value: -0.00028372597440580097 and parameters: {'learning_rate': 0.012682147148384565, 'sigma_multiplier': 1.6062866008620102, 'num_layers': 4, 'initialization_multiplier': 0.7295943667243358}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 40 final loss: -0.00028373
Trial 41:
  Learning Rate: 0.00881457698462545
  Sigma Multiplier: 1.1079652992542992
  Initialization Multiplier: 0.15764330349428374
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:13<00:00, 11.49it/s, loss=-0.000245, elapsed time=0.06, total time=13.4]
[I 2025-06-07 12:44:55,849] Trial 41 finished with value: -0.00024516048346907154 and parameters: {'learning_rate': 0.00881457698462545, 'sigma_multiplier': 1.1079652992542992, 'num_layers': 3, 'initialization_multiplier': 0.15764330349428374}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 41 final loss: -0.00024516
Trial 42:
  Learning Rate: 0.0022125841555668967
  Sigma Multiplier: 1.7059973596798281
  Initialization Multiplier: 0.32513818980926656
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.22it/s, loss=-0.000245, elapsed time=0.08, total time=12.6]
[I 2025-06-07 12:45:08,487] Trial 42 finished with value: -0.0002454552839371866 and parameters: {'learning_rate': 0.0022125841555668967, 'sigma_multiplier': 1.7059973596798281, 'num_layers': 3, 'initialization_multiplier': 0.32513818980926656}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 42 final loss: -0.00024546
Trial 43:
  Learning Rate: 0.00622866914471922
  Sigma Multiplier: 1.184639771460511
  Initialization Multiplier: 0.5559365999850404
  Number of Layers: 5


Training Progress: 100%|██████████| 150/150 [00:17<00:00,  8.53it/s, loss=-0.000143, elapsed time=0.11, total time=18.1]
[I 2025-06-07 12:45:26,680] Trial 43 finished with value: -0.0001429195314988308 and parameters: {'learning_rate': 0.00622866914471922, 'sigma_multiplier': 1.184639771460511, 'num_layers': 5, 'initialization_multiplier': 0.5559365999850404}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 43 final loss: -0.00014292
Trial 44:
  Learning Rate: 0.015014637774892646
  Sigma Multiplier: 1.2743781515831547
  Initialization Multiplier: 0.37902422416559356
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.04it/s, loss=-0.000376, elapsed time=0.08, total time=12.8]
[I 2025-06-07 12:45:39,539] Trial 44 finished with value: -0.00037551081724091775 and parameters: {'learning_rate': 0.015014637774892646, 'sigma_multiplier': 1.2743781515831547, 'num_layers': 3, 'initialization_multiplier': 0.37902422416559356}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 44 final loss: -0.00037551
Trial 45:
  Learning Rate: 0.01565069153303194
  Sigma Multiplier: 1.2732601876941163
  Initialization Multiplier: 0.6893140580283861
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.12it/s, loss=-0.000288, elapsed time=0.07, total time=12.6]
[I 2025-06-07 12:45:52,236] Trial 45 finished with value: -0.0002882106041697411 and parameters: {'learning_rate': 0.01565069153303194, 'sigma_multiplier': 1.2732601876941163, 'num_layers': 3, 'initialization_multiplier': 0.6893140580283861}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 45 final loss: -0.00028821
Trial 46:
  Learning Rate: 0.00020684656788286888
  Sigma Multiplier: 1.8486750010484585
  Initialization Multiplier: 0.12035317943802842
  Number of Layers: 4


Training Progress: 100%|██████████| 150/150 [00:14<00:00, 10.65it/s, loss=0.004572, elapsed time=0.1, total time=14.4] 
[I 2025-06-07 12:46:06,725] Trial 46 finished with value: 0.004572023097272563 and parameters: {'learning_rate': 0.00020684656788286888, 'sigma_multiplier': 1.8486750010484585, 'num_layers': 4, 'initialization_multiplier': 0.12035317943802842}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 46 final loss: 0.00457202
Trial 47:
  Learning Rate: 0.003833388172213931
  Sigma Multiplier: 1.545841139006155
  Initialization Multiplier: 0.817716366200639
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.09it/s, loss=0.000461, elapsed time=0.06, total time=12.8]
[I 2025-06-07 12:46:19,533] Trial 47 finished with value: 0.00046076695614497156 and parameters: {'learning_rate': 0.003833388172213931, 'sigma_multiplier': 1.545841139006155, 'num_layers': 3, 'initialization_multiplier': 0.817716366200639}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 47 final loss: 0.00046077
Trial 48:
  Learning Rate: 0.0014867463132049415
  Sigma Multiplier: 1.4337378679738382
  Initialization Multiplier: 0.43921892083433095
  Number of Layers: 4


Training Progress: 100%|██████████| 150/150 [00:14<00:00, 10.60it/s, loss=0.000005, elapsed time=0.11, total time=14.5]
[I 2025-06-07 12:46:34,100] Trial 48 finished with value: 5.006415680179538e-06 and parameters: {'learning_rate': 0.0014867463132049415, 'sigma_multiplier': 1.4337378679738382, 'num_layers': 4, 'initialization_multiplier': 0.43921892083433095}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 48 final loss: 0.00000501
Trial 49:
  Learning Rate: 0.010672324498379418
  Sigma Multiplier: 1.2501843811772302
  Initialization Multiplier: 0.23302171063211363
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.15it/s, loss=-0.000254, elapsed time=0.09, total time=12.7]
[I 2025-06-07 12:46:46,822] Trial 49 finished with value: -0.0002540138130441341 and parameters: {'learning_rate': 0.010672324498379418, 'sigma_multiplier': 1.2501843811772302, 'num_layers': 3, 'initialization_multiplier': 0.23302171063211363}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 49 final loss: -0.00025401
Trial 50:
  Learning Rate: 0.028042792519536276
  Sigma Multiplier: 1.0427552020915947
  Initialization Multiplier: 0.05494467885840232
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:13<00:00, 11.05it/s, loss=-0.000234, elapsed time=0.06, total time=13.8]
[I 2025-06-07 12:47:00,706] Trial 50 finished with value: -0.00023403083316274903 and parameters: {'learning_rate': 0.028042792519536276, 'sigma_multiplier': 1.0427552020915947, 'num_layers': 3, 'initialization_multiplier': 0.05494467885840232}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 50 final loss: -0.00023403
Trial 51:
  Learning Rate: 0.014092198464333301
  Sigma Multiplier: 1.2851377337082015
  Initialization Multiplier: 0.6670538137055075
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 11.84it/s, loss=-0.000238, elapsed time=0.06, total time=12.9]
[I 2025-06-07 12:47:13,667] Trial 51 finished with value: -0.00023824685070455772 and parameters: {'learning_rate': 0.014092198464333301, 'sigma_multiplier': 1.2851377337082015, 'num_layers': 3, 'initialization_multiplier': 0.6670538137055075}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 51 final loss: -0.00023825
Trial 52:
  Learning Rate: 0.01886420321421628
  Sigma Multiplier: 1.3742807256879286
  Initialization Multiplier: 0.36818765277346904
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 11.94it/s, loss=-0.000308, elapsed time=0.07, total time=12.9]
[I 2025-06-07 12:47:26,665] Trial 52 finished with value: -0.0003081348975383179 and parameters: {'learning_rate': 0.01886420321421628, 'sigma_multiplier': 1.3742807256879286, 'num_layers': 3, 'initialization_multiplier': 0.36818765277346904}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 52 final loss: -0.00030813
Trial 53:
  Learning Rate: 0.006812596397754811
  Sigma Multiplier: 1.3766896044408419
  Initialization Multiplier: 0.38780720861683954
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.13it/s, loss=-0.000279, elapsed time=0.09, total time=12.8]
[I 2025-06-07 12:47:39,461] Trial 53 finished with value: -0.00027855420608745984 and parameters: {'learning_rate': 0.006812596397754811, 'sigma_multiplier': 1.3766896044408419, 'num_layers': 3, 'initialization_multiplier': 0.38780720861683954}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 53 final loss: -0.00027855
Trial 54:
  Learning Rate: 0.004748008783268205
  Sigma Multiplier: 1.6420788489175202
  Initialization Multiplier: 0.310182727845027
  Number of Layers: 4


Training Progress: 100%|██████████| 150/150 [00:14<00:00, 10.25it/s, loss=-0.000277, elapsed time=0.11, total time=14.9]
[I 2025-06-07 12:47:54,449] Trial 54 finished with value: -0.00027718842130039614 and parameters: {'learning_rate': 0.004748008783268205, 'sigma_multiplier': 1.6420788489175202, 'num_layers': 4, 'initialization_multiplier': 0.310182727845027}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 54 final loss: -0.00027719
Trial 55:
  Learning Rate: 0.002513617978295668
  Sigma Multiplier: 1.445669164853109
  Initialization Multiplier: 0.6145925599354591
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.25it/s, loss=-0.000096, elapsed time=0.1, total time=12.5] 
[I 2025-06-07 12:48:07,034] Trial 55 finished with value: -9.631565643485914e-05 and parameters: {'learning_rate': 0.002513617978295668, 'sigma_multiplier': 1.445669164853109, 'num_layers': 3, 'initialization_multiplier': 0.6145925599354591}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 55 final loss: -0.00009632
Trial 56:
  Learning Rate: 0.017331593021658293
  Sigma Multiplier: 1.523295468623846
  Initialization Multiplier: 0.48686018309954204
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.36it/s, loss=-0.000168, elapsed time=0.09, total time=12.4]
[I 2025-06-07 12:48:19,532] Trial 56 finished with value: -0.00016833612191222968 and parameters: {'learning_rate': 0.017331593021658293, 'sigma_multiplier': 1.523295468623846, 'num_layers': 3, 'initialization_multiplier': 0.48686018309954204}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 56 final loss: -0.00016834
Trial 57:
  Learning Rate: 0.053175976196239844
  Sigma Multiplier: 1.7744826409455885
  Initialization Multiplier: 0.20109247057596163
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.20it/s, loss=-0.000112, elapsed time=0.09, total time=12.6]
[I 2025-06-07 12:48:32,133] Trial 57 finished with value: -0.00011186127844038069 and parameters: {'learning_rate': 0.053175976196239844, 'sigma_multiplier': 1.7744826409455885, 'num_layers': 3, 'initialization_multiplier': 0.20109247057596163}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 57 final loss: -0.00011186
Trial 58:
  Learning Rate: 0.025752939655338168
  Sigma Multiplier: 1.081940771850219
  Initialization Multiplier: 1.5967707866771899
  Number of Layers: 4


Training Progress: 100%|██████████| 150/150 [00:15<00:00,  9.95it/s, loss=-0.000175, elapsed time=0.11, total time=15.4]
[I 2025-06-07 12:48:47,601] Trial 58 finished with value: -0.0001751921023991026 and parameters: {'learning_rate': 0.025752939655338168, 'sigma_multiplier': 1.081940771850219, 'num_layers': 4, 'initialization_multiplier': 1.5967707866771899}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 58 final loss: -0.00017519
Trial 59:
  Learning Rate: 0.007249818472407206
  Sigma Multiplier: 0.9322421196039871
  Initialization Multiplier: 0.40693055345803375
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:13<00:00, 11.34it/s, loss=-0.000306, elapsed time=0.11, total time=13.6]
[I 2025-06-07 12:49:01,233] Trial 59 finished with value: -0.0003061478827549741 and parameters: {'learning_rate': 0.007249818472407206, 'sigma_multiplier': 0.9322421196039871, 'num_layers': 3, 'initialization_multiplier': 0.40693055345803375}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 59 final loss: -0.00030615
Trial 60:
  Learning Rate: 0.01115586560481812
  Sigma Multiplier: 0.829394848846822
  Initialization Multiplier: 0.44054574495729365
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.83it/s, loss=-0.000192, elapsed time=0.06, total time=12]  
[I 2025-06-07 12:49:13,236] Trial 60 finished with value: -0.00019207126014509043 and parameters: {'learning_rate': 0.01115586560481812, 'sigma_multiplier': 0.829394848846822, 'num_layers': 2, 'initialization_multiplier': 0.44054574495729365}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 60 final loss: -0.00019207
Trial 61:
  Learning Rate: 0.006891538680178833
  Sigma Multiplier: 0.9321642466100014
  Initialization Multiplier: 0.36317671307193006
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:13<00:00, 11.26it/s, loss=-0.000376, elapsed time=0.09, total time=13.7]
[I 2025-06-07 12:49:26,949] Trial 61 finished with value: -0.0003759212929444612 and parameters: {'learning_rate': 0.006891538680178833, 'sigma_multiplier': 0.9321642466100014, 'num_layers': 3, 'initialization_multiplier': 0.36317671307193006}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 61 final loss: -0.00037592
Trial 62:
  Learning Rate: 0.002804348539577881
  Sigma Multiplier: 0.9392864077044984
  Initialization Multiplier: 0.5485568723659696
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:13<00:00, 10.99it/s, loss=-0.000281, elapsed time=0.09, total time=14]  
[I 2025-06-07 12:49:41,024] Trial 62 finished with value: -0.0002806942277757086 and parameters: {'learning_rate': 0.002804348539577881, 'sigma_multiplier': 0.9392864077044984, 'num_layers': 3, 'initialization_multiplier': 0.5485568723659696}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 62 final loss: -0.00028069
Trial 63:
  Learning Rate: 0.00540676606932078
  Sigma Multiplier: 0.8187934796856888
  Initialization Multiplier: 0.35920386301558704
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:13<00:00, 10.97it/s, loss=-0.000295, elapsed time=0.1, total time=14]   
[I 2025-06-07 12:49:55,087] Trial 63 finished with value: -0.000294836419708281 and parameters: {'learning_rate': 0.00540676606932078, 'sigma_multiplier': 0.8187934796856888, 'num_layers': 3, 'initialization_multiplier': 0.35920386301558704}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 63 final loss: -0.00029484
Trial 64:
  Learning Rate: 0.001919082709550501
  Sigma Multiplier: 0.9922814261912639
  Initialization Multiplier: 0.48417754204105057
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:13<00:00, 11.33it/s, loss=-0.000216, elapsed time=0.09, total time=13.6]
[I 2025-06-07 12:50:08,703] Trial 64 finished with value: -0.0002159487778356426 and parameters: {'learning_rate': 0.001919082709550501, 'sigma_multiplier': 0.9922814261912639, 'num_layers': 3, 'initialization_multiplier': 0.48417754204105057}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 64 final loss: -0.00021595
Trial 65:
  Learning Rate: 0.004343022196008278
  Sigma Multiplier: 0.608239877882254
  Initialization Multiplier: 1.0999397778604567
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:15<00:00,  9.59it/s, loss=0.000882, elapsed time=0.1, total time=15.9] 
[I 2025-06-07 12:50:24,672] Trial 65 finished with value: 0.0008822318041677884 and parameters: {'learning_rate': 0.004343022196008278, 'sigma_multiplier': 0.608239877882254, 'num_layers': 3, 'initialization_multiplier': 1.0999397778604567}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 65 final loss: 0.00088223
Trial 66:
  Learning Rate: 0.01930860486793095
  Sigma Multiplier: 0.686889447273987
  Initialization Multiplier: 0.20716915010338993
  Number of Layers: 4


Training Progress: 100%|██████████| 150/150 [00:16<00:00,  9.24it/s, loss=0.000116, elapsed time=0.1, total time=16.5]  
[I 2025-06-07 12:50:41,256] Trial 66 finished with value: 0.00011637808379932924 and parameters: {'learning_rate': 0.01930860486793095, 'sigma_multiplier': 0.686889447273987, 'num_layers': 4, 'initialization_multiplier': 0.20716915010338993}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 66 final loss: 0.00011638
Trial 67:
  Learning Rate: 0.031339802480535556
  Sigma Multiplier: 1.1354421120792708
  Initialization Multiplier: 0.5712933504829955
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.25it/s, loss=-0.000192, elapsed time=0.08, total time=12.5]
[I 2025-06-07 12:50:53,798] Trial 67 finished with value: -0.000191717616181651 and parameters: {'learning_rate': 0.031339802480535556, 'sigma_multiplier': 1.1354421120792708, 'num_layers': 3, 'initialization_multiplier': 0.5712933504829955}. Best is trial 11 with value: -0.00040403594016146156.


Training has not converged after 150 steps
Trial 67 final loss: -0.00019172
Trial 68:
  Learning Rate: 0.006995060776731667
  Sigma Multiplier: 0.9448800111325177
  Initialization Multiplier: 0.39133378957110393
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.84it/s, loss=-0.000422, elapsed time=0.09, total time=11.1]
[I 2025-06-07 12:51:04,949] Trial 68 finished with value: -0.00042194392083188094 and parameters: {'learning_rate': 0.006995060776731667, 'sigma_multiplier': 0.9448800111325177, 'num_layers': 2, 'initialization_multiplier': 0.39133378957110393}. Best is trial 68 with value: -0.00042194392083188094.


Training has not converged after 150 steps
Trial 68 final loss: -0.00042194
Trial 69:
  Learning Rate: 0.01097565461723887
  Sigma Multiplier: 1.3362292684461674
  Initialization Multiplier: 0.27661818876339395
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.44it/s, loss=-0.000353, elapsed time=0.05, total time=10]  
[I 2025-06-07 12:51:15,010] Trial 69 finished with value: -0.000353446888859723 and parameters: {'learning_rate': 0.01097565461723887, 'sigma_multiplier': 1.3362292684461674, 'num_layers': 2, 'initialization_multiplier': 0.27661818876339395}. Best is trial 68 with value: -0.00042194392083188094.


Training has not converged after 150 steps
Trial 69 final loss: -0.00035345
Trial 70:
  Learning Rate: 0.010190000553925944
  Sigma Multiplier: 1.077590489169792
  Initialization Multiplier: 0.2644359819951408
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.54it/s, loss=-0.000413, elapsed time=0.06, total time=11.4]
[I 2025-06-07 12:51:26,417] Trial 70 finished with value: -0.00041270066262819353 and parameters: {'learning_rate': 0.010190000553925944, 'sigma_multiplier': 1.077590489169792, 'num_layers': 2, 'initialization_multiplier': 0.2644359819951408}. Best is trial 68 with value: -0.00042194392083188094.


Training has not converged after 150 steps
Trial 70 final loss: -0.00041270
Trial 71:
  Learning Rate: 0.011312933852425043
  Sigma Multiplier: 1.2187504257582256
  Initialization Multiplier: 0.2736283082483747
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.81it/s, loss=-0.000297, elapsed time=0.06, total time=11.1]
[I 2025-06-07 12:51:37,600] Trial 71 finished with value: -0.0002966708807717484 and parameters: {'learning_rate': 0.011312933852425043, 'sigma_multiplier': 1.2187504257582256, 'num_layers': 2, 'initialization_multiplier': 0.2736283082483747}. Best is trial 68 with value: -0.00042194392083188094.


Training has not converged after 150 steps
Trial 71 final loss: -0.00029667
Trial 72:
  Learning Rate: 0.005467645888559063
  Sigma Multiplier: 1.3341064350960348
  Initialization Multiplier: 0.06606132638851414
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.03it/s, loss=-0.000210, elapsed time=0.05, total time=10.4]
[I 2025-06-07 12:51:47,997] Trial 72 finished with value: -0.00021017384393113816 and parameters: {'learning_rate': 0.005467645888559063, 'sigma_multiplier': 1.3341064350960348, 'num_layers': 2, 'initialization_multiplier': 0.06606132638851414}. Best is trial 68 with value: -0.00042194392083188094.


Training has not converged after 150 steps
Trial 72 final loss: -0.00021017
Trial 73:
  Learning Rate: 0.009591338686540557
  Sigma Multiplier: 1.0374299729475243
  Initialization Multiplier: 0.28481513218559856
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.55it/s, loss=-0.000287, elapsed time=0.06, total time=11.4]
[I 2025-06-07 12:51:59,407] Trial 73 finished with value: -0.0002872488165147127 and parameters: {'learning_rate': 0.009591338686540557, 'sigma_multiplier': 1.0374299729475243, 'num_layers': 2, 'initialization_multiplier': 0.28481513218559856}. Best is trial 68 with value: -0.00042194392083188094.


Training has not converged after 150 steps
Trial 73 final loss: -0.00028725
Trial 74:
  Learning Rate: 0.003720476680265146
  Sigma Multiplier: 1.379428568456352
  Initialization Multiplier: 0.16769102897350996
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 18.89it/s, loss=0.000002, elapsed time=0.04, total time=8.2] 
[I 2025-06-07 12:52:07,636] Trial 74 finished with value: 1.909606060659595e-06 and parameters: {'learning_rate': 0.003720476680265146, 'sigma_multiplier': 1.379428568456352, 'num_layers': 1, 'initialization_multiplier': 0.16769102897350996}. Best is trial 68 with value: -0.00042194392083188094.


Training has not converged after 150 steps
Trial 74 final loss: 0.00000191
Trial 75:
  Learning Rate: 0.015680274013307363
  Sigma Multiplier: 1.1369749254906434
  Initialization Multiplier: 0.3619839602380344
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.92it/s, loss=-0.000338, elapsed time=0.06, total time=11.1]
[I 2025-06-07 12:52:18,753] Trial 75 finished with value: -0.0003377881221787719 and parameters: {'learning_rate': 0.015680274013307363, 'sigma_multiplier': 1.1369749254906434, 'num_layers': 2, 'initialization_multiplier': 0.3619839602380344}. Best is trial 68 with value: -0.00042194392083188094.


Training has not converged after 150 steps
Trial 75 final loss: -0.00033779
Trial 76:
  Learning Rate: 0.006747141644804938
  Sigma Multiplier: 1.079079429069544
  Initialization Multiplier: 0.501776542883589
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.03it/s, loss=-0.000417, elapsed time=0.07, total time=11]  
[I 2025-06-07 12:52:29,829] Trial 76 finished with value: -0.0004166588131721579 and parameters: {'learning_rate': 0.006747141644804938, 'sigma_multiplier': 1.079079429069544, 'num_layers': 2, 'initialization_multiplier': 0.501776542883589}. Best is trial 68 with value: -0.00042194392083188094.


Training has not converged after 150 steps
Trial 76 final loss: -0.00041666
Trial 77:
  Learning Rate: 0.012177359311680038
  Sigma Multiplier: 0.8720366627034208
  Initialization Multiplier: 0.09907863506677345
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 11.74it/s, loss=0.000027, elapsed time=0.1, total time=13.2]  
[I 2025-06-07 12:52:43,044] Trial 77 finished with value: 2.7012437522046718e-05 and parameters: {'learning_rate': 0.012177359311680038, 'sigma_multiplier': 0.8720366627034208, 'num_layers': 2, 'initialization_multiplier': 0.09907863506677345}. Best is trial 68 with value: -0.00042194392083188094.


Training has not converged after 150 steps
Trial 77 final loss: 0.00002701
Trial 78:
  Learning Rate: 0.015247916141308215
  Sigma Multiplier: 1.1276008088086416
  Initialization Multiplier: 0.22910610333617643
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.05it/s, loss=-0.000421, elapsed time=0.11, total time=11.9]
[I 2025-06-07 12:52:55,005] Trial 78 finished with value: -0.0004206568713232515 and parameters: {'learning_rate': 0.015247916141308215, 'sigma_multiplier': 1.1276008088086416, 'num_layers': 2, 'initialization_multiplier': 0.22910610333617643}. Best is trial 68 with value: -0.00042194392083188094.


Training has not converged after 150 steps
Trial 78 final loss: -0.00042066
Trial 79:
  Learning Rate: 0.006408903978684393
  Sigma Multiplier: 1.0468300219496904
  Initialization Multiplier: 0.2332497968803172
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.02it/s, loss=-0.000276, elapsed time=0.04, total time=9.83]
[I 2025-06-07 12:53:04,866] Trial 79 finished with value: -0.000276233266926373 and parameters: {'learning_rate': 0.006408903978684393, 'sigma_multiplier': 1.0468300219496904, 'num_layers': 1, 'initialization_multiplier': 0.2332497968803172}. Best is trial 68 with value: -0.00042194392083188094.


Training has not converged after 150 steps
Trial 79 final loss: -0.00027623
Trial 80:
  Learning Rate: 0.008173085828337522
  Sigma Multiplier: 1.2455588930022552
  Initialization Multiplier: 0.005482497315404178
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.40it/s, loss=-0.000228, elapsed time=0.06, total time=10.8]
[I 2025-06-07 12:53:15,647] Trial 80 finished with value: -0.0002276838732652825 and parameters: {'learning_rate': 0.008173085828337522, 'sigma_multiplier': 1.2455588930022552, 'num_layers': 2, 'initialization_multiplier': 0.005482497315404178}. Best is trial 68 with value: -0.00042194392083188094.


Training has not converged after 150 steps
Trial 80 final loss: -0.00022768
Trial 81:
  Learning Rate: 0.015298685779728441
  Sigma Multiplier: 1.1460363739050972
  Initialization Multiplier: 0.31140073742763175
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.38it/s, loss=-0.000287, elapsed time=0.06, total time=11.5]
[I 2025-06-07 12:53:27,234] Trial 81 finished with value: -0.00028668030862238095 and parameters: {'learning_rate': 0.015298685779728441, 'sigma_multiplier': 1.1460363739050972, 'num_layers': 2, 'initialization_multiplier': 0.31140073742763175}. Best is trial 68 with value: -0.00042194392083188094.


Training has not converged after 150 steps
Trial 81 final loss: -0.00028668
Trial 82:
  Learning Rate: 0.02235518620345133
  Sigma Multiplier: 1.0853648098405835
  Initialization Multiplier: 0.16348443925635392
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.77it/s, loss=-0.000259, elapsed time=0.06, total time=11.2]
[I 2025-06-07 12:53:38,452] Trial 82 finished with value: -0.0002586914016188834 and parameters: {'learning_rate': 0.02235518620345133, 'sigma_multiplier': 1.0853648098405835, 'num_layers': 2, 'initialization_multiplier': 0.16348443925635392}. Best is trial 68 with value: -0.00042194392083188094.


Training has not converged after 150 steps
Trial 82 final loss: -0.00025869
Trial 83:
  Learning Rate: 0.01034060271052533
  Sigma Multiplier: 1.002927561339537
  Initialization Multiplier: 0.4997361142121278
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.30it/s, loss=-0.000436, elapsed time=0.04, total time=11.5]
[I 2025-06-07 12:53:50,026] Trial 83 finished with value: -0.0004358480575217573 and parameters: {'learning_rate': 0.01034060271052533, 'sigma_multiplier': 1.002927561339537, 'num_layers': 2, 'initialization_multiplier': 0.4997361142121278}. Best is trial 83 with value: -0.0004358480575217573.


Training has not converged after 150 steps
Trial 83 final loss: -0.00043585
Trial 84:
  Learning Rate: 0.009254146510582331
  Sigma Multiplier: 0.9945679795759385
  Initialization Multiplier: 0.5229809130709924
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.52it/s, loss=-0.000461, elapsed time=0.06, total time=11.3]
[I 2025-06-07 12:54:01,377] Trial 84 finished with value: -0.00046138953332195543 and parameters: {'learning_rate': 0.009254146510582331, 'sigma_multiplier': 0.9945679795759385, 'num_layers': 2, 'initialization_multiplier': 0.5229809130709924}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 84 final loss: -0.00046139
Trial 85:
  Learning Rate: 0.006067826046929111
  Sigma Multiplier: 1.0033971338226577
  Initialization Multiplier: 1.9858642050541906
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.53it/s, loss=-0.000198, elapsed time=0.08, total time=12.4]
[I 2025-06-07 12:54:13,826] Trial 85 finished with value: -0.00019772702646600444 and parameters: {'learning_rate': 0.006067826046929111, 'sigma_multiplier': 1.0033971338226577, 'num_layers': 2, 'initialization_multiplier': 1.9858642050541906}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 85 final loss: -0.00019773
Trial 86:
  Learning Rate: 0.009133108505542383
  Sigma Multiplier: 0.761566120814016
  Initialization Multiplier: 0.5036084835229179
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.83it/s, loss=-0.000317, elapsed time=0.06, total time=12]  
[I 2025-06-07 12:54:25,839] Trial 86 finished with value: -0.0003173802935722265 and parameters: {'learning_rate': 0.009133108505542383, 'sigma_multiplier': 0.761566120814016, 'num_layers': 2, 'initialization_multiplier': 0.5036084835229179}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 86 final loss: -0.00031738
Trial 87:
  Learning Rate: 0.0045318725785337905
  Sigma Multiplier: 0.9408885986020492
  Initialization Multiplier: 0.7346011328713532
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.01it/s, loss=-0.000337, elapsed time=0.06, total time=11.8]
[I 2025-06-07 12:54:37,674] Trial 87 finished with value: -0.00033738445250070903 and parameters: {'learning_rate': 0.0045318725785337905, 'sigma_multiplier': 0.9408885986020492, 'num_layers': 2, 'initialization_multiplier': 0.7346011328713532}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 87 final loss: -0.00033738
Trial 88:
  Learning Rate: 0.007236034742048763
  Sigma Multiplier: 0.8679838692897354
  Initialization Multiplier: 0.6341521083111059
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.39it/s, loss=-0.000179, elapsed time=0.06, total time=10.1]
[I 2025-06-07 12:54:47,767] Trial 88 finished with value: -0.00017944139536046718 and parameters: {'learning_rate': 0.007236034742048763, 'sigma_multiplier': 0.8679838692897354, 'num_layers': 1, 'initialization_multiplier': 0.6341521083111059}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 88 final loss: -0.00017944
Trial 89:
  Learning Rate: 0.013405964698252799
  Sigma Multiplier: 1.080427839124186
  Initialization Multiplier: 0.4489610346527031
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.19it/s, loss=-0.000382, elapsed time=0.06, total time=10.9]
[I 2025-06-07 12:54:58,737] Trial 89 finished with value: -0.000381576933765169 and parameters: {'learning_rate': 0.013405964698252799, 'sigma_multiplier': 1.080427839124186, 'num_layers': 2, 'initialization_multiplier': 0.4489610346527031}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 89 final loss: -0.00038158
Trial 90:
  Learning Rate: 0.012963231849760278
  Sigma Multiplier: 0.2134784508721681
  Initialization Multiplier: 0.455983117099927
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:14<00:00, 10.15it/s, loss=0.000006, elapsed time=0.09, total time=15.1] 
[I 2025-06-07 12:55:13,863] Trial 90 finished with value: 6.4473206627984334e-06 and parameters: {'learning_rate': 0.012963231849760278, 'sigma_multiplier': 0.2134784508721681, 'num_layers': 2, 'initialization_multiplier': 0.455983117099927}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 90 final loss: 0.00000645
Trial 91:
  Learning Rate: 0.009752997829079313
  Sigma Multiplier: 0.9759050001393603
  Initialization Multiplier: 0.41188322269564615
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.45it/s, loss=-0.000318, elapsed time=0.07, total time=11.5]
[I 2025-06-07 12:55:25,382] Trial 91 finished with value: -0.00031837910233559907 and parameters: {'learning_rate': 0.009752997829079313, 'sigma_multiplier': 0.9759050001393603, 'num_layers': 2, 'initialization_multiplier': 0.41188322269564615}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 91 final loss: -0.00031838
Trial 92:
  Learning Rate: 0.022016189093109848
  Sigma Multiplier: 1.0750420948133144
  Initialization Multiplier: 0.5703287531640742
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.62it/s, loss=-0.000345, elapsed time=0.05, total time=11.3]
[I 2025-06-07 12:55:36,697] Trial 92 finished with value: -0.0003448788150196307 and parameters: {'learning_rate': 0.022016189093109848, 'sigma_multiplier': 1.0750420948133144, 'num_layers': 2, 'initialization_multiplier': 0.5703287531640742}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 92 final loss: -0.00034488
Trial 93:
  Learning Rate: 0.01478112659703949
  Sigma Multiplier: 1.2121298404648946
  Initialization Multiplier: 0.4388325530751623
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.12it/s, loss=-0.000332, elapsed time=0.05, total time=11]  
[I 2025-06-07 12:55:47,727] Trial 93 finished with value: -0.00033168635284483574 and parameters: {'learning_rate': 0.01478112659703949, 'sigma_multiplier': 1.2121298404648946, 'num_layers': 2, 'initialization_multiplier': 0.4388325530751623}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 93 final loss: -0.00033169
Trial 94:
  Learning Rate: 0.0075134502681018
  Sigma Multiplier: 1.1551931122732655
  Initialization Multiplier: 0.5172755900419516
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.29it/s, loss=-0.000255, elapsed time=0.06, total time=10.8]
[I 2025-06-07 12:55:58,563] Trial 94 finished with value: -0.00025481703634649944 and parameters: {'learning_rate': 0.0075134502681018, 'sigma_multiplier': 1.1551931122732655, 'num_layers': 2, 'initialization_multiplier': 0.5172755900419516}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 94 final loss: -0.00025482
Trial 95:
  Learning Rate: 0.03663468318178892
  Sigma Multiplier: 1.0081275359659947
  Initialization Multiplier: 0.6981271445203362
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.88it/s, loss=-0.000207, elapsed time=0.06, total time=12]  
[I 2025-06-07 12:56:10,597] Trial 95 finished with value: -0.00020666814611722687 and parameters: {'learning_rate': 0.03663468318178892, 'sigma_multiplier': 1.0081275359659947, 'num_layers': 2, 'initialization_multiplier': 0.6981271445203362}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 95 final loss: -0.00020667
Trial 96:
  Learning Rate: 0.003554669762006773
  Sigma Multiplier: 1.0969519255072815
  Initialization Multiplier: 0.6027545577195912
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.08it/s, loss=-0.000413, elapsed time=0.05, total time=11]  
[I 2025-06-07 12:56:21,654] Trial 96 finished with value: -0.0004131770970476987 and parameters: {'learning_rate': 0.003554669762006773, 'sigma_multiplier': 1.0969519255072815, 'num_layers': 2, 'initialization_multiplier': 0.6027545577195912}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 96 final loss: -0.00041318
Trial 97:
  Learning Rate: 0.0036467075813148334
  Sigma Multiplier: 1.0879409863343354
  Initialization Multiplier: 0.7821668623998594
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.69it/s, loss=-0.000183, elapsed time=0.1, total time=11.2] 
[I 2025-06-07 12:56:32,946] Trial 97 finished with value: -0.00018313764447451961 and parameters: {'learning_rate': 0.0036467075813148334, 'sigma_multiplier': 1.0879409863343354, 'num_layers': 2, 'initialization_multiplier': 0.7821668623998594}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 97 final loss: -0.00018314
Trial 98:
  Learning Rate: 0.00562221044225364
  Sigma Multiplier: 0.9109256232276735
  Initialization Multiplier: 0.5873412763345793
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.82it/s, loss=-0.000268, elapsed time=0.07, total time=12]  
[I 2025-06-07 12:56:44,997] Trial 98 finished with value: -0.0002675906045665627 and parameters: {'learning_rate': 0.00562221044225364, 'sigma_multiplier': 0.9109256232276735, 'num_layers': 2, 'initialization_multiplier': 0.5873412763345793}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 98 final loss: -0.00026759
Trial 99:
  Learning Rate: 0.004554810440243455
  Sigma Multiplier: 0.7886222992457967
  Initialization Multiplier: 0.8780848986258634
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 11.84it/s, loss=0.000011, elapsed time=0.07, total time=13]  
[I 2025-06-07 12:56:58,000] Trial 99 finished with value: 1.0687164004176679e-05 and parameters: {'learning_rate': 0.004554810440243455, 'sigma_multiplier': 0.7886222992457967, 'num_layers': 2, 'initialization_multiplier': 0.8780848986258634}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 99 final loss: 0.00001069
Trial 100:
  Learning Rate: 0.008677635940661463
  Sigma Multiplier: 1.0207688610979329
  Initialization Multiplier: 0.4686990075859617
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.08it/s, loss=-0.000340, elapsed time=0.06, total time=9.62]
[I 2025-06-07 12:57:07,658] Trial 100 finished with value: -0.00033985142181461737 and parameters: {'learning_rate': 0.008677635940661463, 'sigma_multiplier': 1.0207688610979329, 'num_layers': 1, 'initialization_multiplier': 0.4686990075859617}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 100 final loss: -0.00033985
Trial 101:
  Learning Rate: 0.012914797470423626
  Sigma Multiplier: 1.2358314411132758
  Initialization Multiplier: 0.3999260723860827
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.18it/s, loss=-0.000381, elapsed time=0.06, total time=10.9]
[I 2025-06-07 12:57:18,553] Trial 101 finished with value: -0.00038116690863198823 and parameters: {'learning_rate': 0.012914797470423626, 'sigma_multiplier': 1.2358314411132758, 'num_layers': 2, 'initialization_multiplier': 0.3999260723860827}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 101 final loss: -0.00038117
Trial 102:
  Learning Rate: 0.010186031738683838
  Sigma Multiplier: 0.9643750810595577
  Initialization Multiplier: 0.6485217964443553
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.41it/s, loss=-0.000390, elapsed time=0.06, total time=11.5]
[I 2025-06-07 12:57:30,059] Trial 102 finished with value: -0.0003899440163234097 and parameters: {'learning_rate': 0.010186031738683838, 'sigma_multiplier': 0.9643750810595577, 'num_layers': 2, 'initialization_multiplier': 0.6485217964443553}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 102 final loss: -0.00038994
Trial 103:
  Learning Rate: 0.012977653147937179
  Sigma Multiplier: 1.1824080142280262
  Initialization Multiplier: 0.6585579393878418
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.76it/s, loss=-0.000215, elapsed time=0.07, total time=11.2]
[I 2025-06-07 12:57:41,349] Trial 103 finished with value: -0.00021493738743859876 and parameters: {'learning_rate': 0.012977653147937179, 'sigma_multiplier': 1.1824080142280262, 'num_layers': 2, 'initialization_multiplier': 0.6585579393878418}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 103 final loss: -0.00021494
Trial 104:
  Learning Rate: 0.01814404423134324
  Sigma Multiplier: 1.0650096410761454
  Initialization Multiplier: 0.5140322475189562
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.71it/s, loss=-0.000321, elapsed time=0.05, total time=11.2]
[I 2025-06-07 12:57:52,638] Trial 104 finished with value: -0.0003205710275571777 and parameters: {'learning_rate': 0.01814404423134324, 'sigma_multiplier': 1.0650096410761454, 'num_layers': 2, 'initialization_multiplier': 0.5140322475189562}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 104 final loss: -0.00032057
Trial 105:
  Learning Rate: 0.002843904616590867
  Sigma Multiplier: 1.109038694963662
  Initialization Multiplier: 0.6266156601417815
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.76it/s, loss=-0.000386, elapsed time=0.06, total time=11.2]
[I 2025-06-07 12:58:03,899] Trial 105 finished with value: -0.00038584325939448904 and parameters: {'learning_rate': 0.002843904616590867, 'sigma_multiplier': 1.109038694963662, 'num_layers': 2, 'initialization_multiplier': 0.6266156601417815}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 105 final loss: -0.00038584
Trial 106:
  Learning Rate: 0.002438402620341387
  Sigma Multiplier: 1.1131080488766565
  Initialization Multiplier: 0.9596471236162019
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.87it/s, loss=0.012557, elapsed time=0.05, total time=11.1]
[I 2025-06-07 12:58:15,079] Trial 106 finished with value: 0.012556883090963864 and parameters: {'learning_rate': 0.002438402620341387, 'sigma_multiplier': 1.1131080488766565, 'num_layers': 2, 'initialization_multiplier': 0.9596471236162019}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 106 final loss: 0.01255688
Trial 107:
  Learning Rate: 0.001630035761075183
  Sigma Multiplier: 0.9581391141357644
  Initialization Multiplier: 0.6054751484223495
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.72it/s, loss=-0.000057, elapsed time=0.05, total time=11.2]
[I 2025-06-07 12:58:26,269] Trial 107 finished with value: -5.749936991523352e-05 and parameters: {'learning_rate': 0.001630035761075183, 'sigma_multiplier': 0.9581391141357644, 'num_layers': 2, 'initialization_multiplier': 0.6054751484223495}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 107 final loss: -0.00005750
Trial 108:
  Learning Rate: 0.0010184509661522638
  Sigma Multiplier: 1.1181742538405113
  Initialization Multiplier: 0.53559674090194
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.65it/s, loss=0.002347, elapsed time=0.07, total time=11.3]
[I 2025-06-07 12:58:37,586] Trial 108 finished with value: 0.002347216844761741 and parameters: {'learning_rate': 0.0010184509661522638, 'sigma_multiplier': 1.1181742538405113, 'num_layers': 2, 'initialization_multiplier': 0.53559674090194}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 108 final loss: 0.00234722
Trial 109:
  Learning Rate: 0.0033239421281114007
  Sigma Multiplier: 0.8820044836069157
  Initialization Multiplier: 0.6741025059321561
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.20it/s, loss=-0.000030, elapsed time=0.06, total time=11.7]
[I 2025-06-07 12:58:49,322] Trial 109 finished with value: -2.9663466265314434e-05 and parameters: {'learning_rate': 0.0033239421281114007, 'sigma_multiplier': 0.8820044836069157, 'num_layers': 2, 'initialization_multiplier': 0.6741025059321561}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 109 final loss: -0.00002966
Trial 110:
  Learning Rate: 0.0027532297003252893
  Sigma Multiplier: 1.0429357721590067
  Initialization Multiplier: 0.7744253945403478
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.78it/s, loss=-0.000132, elapsed time=0.08, total time=11.2]
[I 2025-06-07 12:59:00,620] Trial 110 finished with value: -0.00013163008577665023 and parameters: {'learning_rate': 0.0027532297003252893, 'sigma_multiplier': 1.0429357721590067, 'num_layers': 2, 'initialization_multiplier': 0.7744253945403478}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 110 final loss: -0.00013163
Trial 111:
  Learning Rate: 0.006013132197117702
  Sigma Multiplier: 1.230047183160232
  Initialization Multiplier: 0.4068418911162422
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.30it/s, loss=-0.000316, elapsed time=0.07, total time=12.5]
[I 2025-06-07 12:59:13,139] Trial 111 finished with value: -0.0003157852387870899 and parameters: {'learning_rate': 0.006013132197117702, 'sigma_multiplier': 1.230047183160232, 'num_layers': 2, 'initialization_multiplier': 0.4068418911162422}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 111 final loss: -0.00031579
Trial 112:
  Learning Rate: 0.009893307365027868
  Sigma Multiplier: 1.3003767200734226
  Initialization Multiplier: 0.6316924382911594
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.73it/s, loss=-0.000341, elapsed time=0.08, total time=11.3]
[I 2025-06-07 12:59:24,470] Trial 112 finished with value: -0.00034084029321844065 and parameters: {'learning_rate': 0.009893307365027868, 'sigma_multiplier': 1.3003767200734226, 'num_layers': 2, 'initialization_multiplier': 0.6316924382911594}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 112 final loss: -0.00034084
Trial 113:
  Learning Rate: 0.010818528036094701
  Sigma Multiplier: 1.1772849918756816
  Initialization Multiplier: 0.47069722622345456
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.61it/s, loss=-0.000352, elapsed time=0.05, total time=11.3]
[I 2025-06-07 12:59:35,866] Trial 113 finished with value: -0.0003523586632644904 and parameters: {'learning_rate': 0.010818528036094701, 'sigma_multiplier': 1.1772849918756816, 'num_layers': 2, 'initialization_multiplier': 0.47069722622345456}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 113 final loss: -0.00035236
Trial 114:
  Learning Rate: 0.005083685559735996
  Sigma Multiplier: 0.9722840875822073
  Initialization Multiplier: 0.5615326058787473
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.54it/s, loss=-0.000340, elapsed time=0.09, total time=11.4]
[I 2025-06-07 12:59:47,327] Trial 114 finished with value: -0.00034037528528538 and parameters: {'learning_rate': 0.005083685559735996, 'sigma_multiplier': 0.9722840875822073, 'num_layers': 2, 'initialization_multiplier': 0.5615326058787473}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 114 final loss: -0.00034038
Trial 115:
  Learning Rate: 0.004197549473649547
  Sigma Multiplier: 1.0143617022617717
  Initialization Multiplier: 0.34669575964609695
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.61it/s, loss=-0.000328, elapsed time=0.05, total time=11.3]
[I 2025-06-07 12:59:58,665] Trial 115 finished with value: -0.00032791216247539157 and parameters: {'learning_rate': 0.004197549473649547, 'sigma_multiplier': 1.0143617022617717, 'num_layers': 2, 'initialization_multiplier': 0.34669575964609695}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 115 final loss: -0.00032791
Trial 116:
  Learning Rate: 0.007943855461719735
  Sigma Multiplier: 1.0984789555957823
  Initialization Multiplier: 0.7152516223935625
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.53it/s, loss=-0.000254, elapsed time=0.06, total time=11.4]
[I 2025-06-07 13:00:10,127] Trial 116 finished with value: -0.0002540326382487349 and parameters: {'learning_rate': 0.007943855461719735, 'sigma_multiplier': 1.0984789555957823, 'num_layers': 2, 'initialization_multiplier': 0.7152516223935625}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 116 final loss: -0.00025403
Trial 117:
  Learning Rate: 0.01704232872691364
  Sigma Multiplier: 1.1626723657852789
  Initialization Multiplier: 0.4095058679988388
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.80it/s, loss=-0.000330, elapsed time=0.06, total time=11.1]
[I 2025-06-07 13:00:21,306] Trial 117 finished with value: -0.00032960084680489524 and parameters: {'learning_rate': 0.01704232872691364, 'sigma_multiplier': 1.1626723657852789, 'num_layers': 2, 'initialization_multiplier': 0.4095058679988388}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 117 final loss: -0.00032960
Trial 118:
  Learning Rate: 0.011570133185592892
  Sigma Multiplier: 1.061270817812024
  Initialization Multiplier: 0.22396088486436222
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.13it/s, loss=-0.000232, elapsed time=0.07, total time=10.9]
[I 2025-06-07 13:00:32,231] Trial 118 finished with value: -0.0002319545592097548 and parameters: {'learning_rate': 0.011570133185592892, 'sigma_multiplier': 1.061270817812024, 'num_layers': 2, 'initialization_multiplier': 0.22396088486436222}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 118 final loss: -0.00023195
Trial 119:
  Learning Rate: 0.013173709115107396
  Sigma Multiplier: 1.229291778185644
  Initialization Multiplier: 0.5951111385451988
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 17.41it/s, loss=-0.000280, elapsed time=0.04, total time=8.85]
[I 2025-06-07 13:00:41,115] Trial 119 finished with value: -0.00027971736057881314 and parameters: {'learning_rate': 0.013173709115107396, 'sigma_multiplier': 1.229291778185644, 'num_layers': 1, 'initialization_multiplier': 0.5951111385451988}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 119 final loss: -0.00027972
Trial 120:
  Learning Rate: 0.007229967833861746
  Sigma Multiplier: 0.8454030812850829
  Initialization Multiplier: 0.3028171570921243
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.88it/s, loss=-0.000281, elapsed time=0.09, total time=12]  
[I 2025-06-07 13:00:53,134] Trial 120 finished with value: -0.00028069923516507625 and parameters: {'learning_rate': 0.007229967833861746, 'sigma_multiplier': 0.8454030812850829, 'num_layers': 2, 'initialization_multiplier': 0.3028171570921243}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 120 final loss: -0.00028070
Trial 121:
  Learning Rate: 0.006279370641707235
  Sigma Multiplier: 0.9273285887723399
  Initialization Multiplier: 0.5083267442451966
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.27it/s, loss=-0.000282, elapsed time=0.06, total time=11.6]
[I 2025-06-07 13:01:04,756] Trial 121 finished with value: -0.0002824534249501237 and parameters: {'learning_rate': 0.006279370641707235, 'sigma_multiplier': 0.9273285887723399, 'num_layers': 2, 'initialization_multiplier': 0.5083267442451966}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 121 final loss: -0.00028245
Trial 122:
  Learning Rate: 0.00929093913376486
  Sigma Multiplier: 0.9731578706804389
  Initialization Multiplier: 0.3410585280981402
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.43it/s, loss=-0.000381, elapsed time=0.08, total time=11.4]
[I 2025-06-07 13:01:16,204] Trial 122 finished with value: -0.00038126875282393246 and parameters: {'learning_rate': 0.00929093913376486, 'sigma_multiplier': 0.9731578706804389, 'num_layers': 2, 'initialization_multiplier': 0.3410585280981402}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 122 final loss: -0.00038127
Trial 123:
  Learning Rate: 0.0002989212533066769
  Sigma Multiplier: 0.9754429536253945
  Initialization Multiplier: 0.422964179711691
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.74it/s, loss=0.007749, elapsed time=0.06, total time=11.2]
[I 2025-06-07 13:01:27,467] Trial 123 finished with value: 0.0077489115894434375 and parameters: {'learning_rate': 0.0002989212533066769, 'sigma_multiplier': 0.9754429536253945, 'num_layers': 2, 'initialization_multiplier': 0.422964179711691}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 123 final loss: 0.00774891
Trial 124:
  Learning Rate: 0.00911920296357168
  Sigma Multiplier: 1.12348772642328
  Initialization Multiplier: 0.3421130786145985
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.13it/s, loss=-0.000289, elapsed time=0.15, total time=10.9]
[I 2025-06-07 13:01:38,460] Trial 124 finished with value: -0.00028879155132476665 and parameters: {'learning_rate': 0.00911920296357168, 'sigma_multiplier': 1.12348772642328, 'num_layers': 2, 'initialization_multiplier': 0.3421130786145985}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 124 final loss: -0.00028879
Trial 125:
  Learning Rate: 9.504741610541282e-05
  Sigma Multiplier: 1.0506594965268894
  Initialization Multiplier: 0.2522136992506916
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.30it/s, loss=0.041831, elapsed time=0.04, total time=10.8]
[I 2025-06-07 13:01:49,330] Trial 125 finished with value: 0.04183114542888386 and parameters: {'learning_rate': 9.504741610541282e-05, 'sigma_multiplier': 1.0506594965268894, 'num_layers': 2, 'initialization_multiplier': 0.2522136992506916}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 125 final loss: 0.04183115
Trial 126:
  Learning Rate: 0.009500692240386379
  Sigma Multiplier: 1.0254524866881396
  Initialization Multiplier: 0.45510527354901614
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.01it/s, loss=-0.000321, elapsed time=0.06, total time=11]  
[I 2025-06-07 13:02:00,349] Trial 126 finished with value: -0.00032148562646619385 and parameters: {'learning_rate': 0.009500692240386379, 'sigma_multiplier': 1.0254524866881396, 'num_layers': 2, 'initialization_multiplier': 0.45510527354901614}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 126 final loss: -0.00032149
Trial 127:
  Learning Rate: 0.005308149765854036
  Sigma Multiplier: 1.1465918882158896
  Initialization Multiplier: 0.5218087054412168
  Number of Layers: 5


Training Progress: 100%|██████████| 150/150 [00:17<00:00,  8.78it/s, loss=-0.000249, elapsed time=0.11, total time=17.5]
[I 2025-06-07 13:02:17,877] Trial 127 finished with value: -0.0002494249474774158 and parameters: {'learning_rate': 0.005308149765854036, 'sigma_multiplier': 1.1465918882158896, 'num_layers': 5, 'initialization_multiplier': 0.5218087054412168}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 127 final loss: -0.00024942
Trial 128:
  Learning Rate: 0.023284335209668546
  Sigma Multiplier: 0.9002212367159351
  Initialization Multiplier: 0.37984845348771284
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.43it/s, loss=-0.000325, elapsed time=0.06, total time=11.5]
[I 2025-06-07 13:02:29,387] Trial 128 finished with value: -0.000325296098015644 and parameters: {'learning_rate': 0.023284335209668546, 'sigma_multiplier': 0.9002212367159351, 'num_layers': 2, 'initialization_multiplier': 0.37984845348771284}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 128 final loss: -0.00032530
Trial 129:
  Learning Rate: 0.013795919766993455
  Sigma Multiplier: 1.270716220661025
  Initialization Multiplier: 0.3167611509807127
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.45it/s, loss=-0.000280, elapsed time=0.07, total time=10.6]
[I 2025-06-07 13:02:40,064] Trial 129 finished with value: -0.0002802512413069352 and parameters: {'learning_rate': 0.013795919766993455, 'sigma_multiplier': 1.270716220661025, 'num_layers': 2, 'initialization_multiplier': 0.3167611509807127}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 129 final loss: -0.00028025
Trial 130:
  Learning Rate: 0.0031368007634018703
  Sigma Multiplier: 0.9929395834865977
  Initialization Multiplier: 0.1879758235005965
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.39it/s, loss=-0.000071, elapsed time=0.06, total time=11.5]
[I 2025-06-07 13:02:51,627] Trial 130 finished with value: -7.131073929159269e-05 and parameters: {'learning_rate': 0.0031368007634018703, 'sigma_multiplier': 0.9929395834865977, 'num_layers': 2, 'initialization_multiplier': 0.1879758235005965}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 130 final loss: -0.00007131
Trial 131:
  Learning Rate: 0.006758618510932284
  Sigma Multiplier: 0.9377895955473199
  Initialization Multiplier: 0.36994302361100984
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.48it/s, loss=-0.000286, elapsed time=0.06, total time=11.6]
[I 2025-06-07 13:03:03,234] Trial 131 finished with value: -0.00028554479423180515 and parameters: {'learning_rate': 0.006758618510932284, 'sigma_multiplier': 0.9377895955473199, 'num_layers': 2, 'initialization_multiplier': 0.36994302361100984}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 131 final loss: -0.00028554
Trial 132:
  Learning Rate: 0.007642445506576176
  Sigma Multiplier: 1.1953748849806167
  Initialization Multiplier: 0.4816022428221309
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.72it/s, loss=-0.000389, elapsed time=0.06, total time=10.5]
[I 2025-06-07 13:03:13,733] Trial 132 finished with value: -0.00038867940232596465 and parameters: {'learning_rate': 0.007642445506576176, 'sigma_multiplier': 1.1953748849806167, 'num_layers': 2, 'initialization_multiplier': 0.4816022428221309}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 132 final loss: -0.00038868
Trial 133:
  Learning Rate: 0.0019766359220990794
  Sigma Multiplier: 1.1958472331388885
  Initialization Multiplier: 0.48884260803462587
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.57it/s, loss=-0.000266, elapsed time=0.05, total time=10.6]
[I 2025-06-07 13:03:24,389] Trial 133 finished with value: -0.00026573229739722926 and parameters: {'learning_rate': 0.0019766359220990794, 'sigma_multiplier': 1.1958472331388885, 'num_layers': 2, 'initialization_multiplier': 0.48884260803462587}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 133 final loss: -0.00026573
Trial 134:
  Learning Rate: 0.00804017431125495
  Sigma Multiplier: 1.0823869074832622
  Initialization Multiplier: 0.5510870164412311
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.11it/s, loss=-0.000351, elapsed time=0.06, total time=10.9]
[I 2025-06-07 13:03:35,327] Trial 134 finished with value: -0.00035140123579418853 and parameters: {'learning_rate': 0.00804017431125495, 'sigma_multiplier': 1.0823869074832622, 'num_layers': 2, 'initialization_multiplier': 0.5510870164412311}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 134 final loss: -0.00035140
Trial 135:
  Learning Rate: 0.010648665161700255
  Sigma Multiplier: 1.318602190129601
  Initialization Multiplier: 1.348028511043892
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.44it/s, loss=-0.000307, elapsed time=0.05, total time=10.7]
[I 2025-06-07 13:03:46,056] Trial 135 finished with value: -0.0003072481560047811 and parameters: {'learning_rate': 0.010648665161700255, 'sigma_multiplier': 1.318602190129601, 'num_layers': 2, 'initialization_multiplier': 1.348028511043892}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 135 final loss: -0.00030725
Trial 136:
  Learning Rate: 0.012187694694605828
  Sigma Multiplier: 1.1133443112433101
  Initialization Multiplier: 0.6351205898983753
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.80it/s, loss=-0.000363, elapsed time=0.04, total time=11.2]
[I 2025-06-07 13:03:57,261] Trial 136 finished with value: -0.0003630058391024758 and parameters: {'learning_rate': 0.012187694694605828, 'sigma_multiplier': 1.1133443112433101, 'num_layers': 2, 'initialization_multiplier': 0.6351205898983753}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 136 final loss: -0.00036301
Trial 137:
  Learning Rate: 0.01614372665588024
  Sigma Multiplier: 1.1946622452288707
  Initialization Multiplier: 0.4372254581781787
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.50it/s, loss=-0.000324, elapsed time=0.05, total time=10.6]
[I 2025-06-07 13:04:07,908] Trial 137 finished with value: -0.0003240993626618702 and parameters: {'learning_rate': 0.01614372665588024, 'sigma_multiplier': 1.1946622452288707, 'num_layers': 2, 'initialization_multiplier': 0.4372254581781787}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 137 final loss: -0.00032410
Trial 138:
  Learning Rate: 0.003922644835044014
  Sigma Multiplier: 1.2534095302355384
  Initialization Multiplier: 0.46993082329946667
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.30it/s, loss=-0.000373, elapsed time=0.05, total time=10.8]
[I 2025-06-07 13:04:18,771] Trial 138 finished with value: -0.0003733984306457231 and parameters: {'learning_rate': 0.003922644835044014, 'sigma_multiplier': 1.2534095302355384, 'num_layers': 2, 'initialization_multiplier': 0.46993082329946667}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 138 final loss: -0.00037340
Trial 139:
  Learning Rate: 0.008143813897639306
  Sigma Multiplier: 1.1502057377660675
  Initialization Multiplier: 0.5560485902866609
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.10it/s, loss=-0.000351, elapsed time=0.08, total time=11.8]
[I 2025-06-07 13:04:30,624] Trial 139 finished with value: -0.00035112408706174694 and parameters: {'learning_rate': 0.008143813897639306, 'sigma_multiplier': 1.1502057377660675, 'num_layers': 2, 'initialization_multiplier': 0.5560485902866609}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 139 final loss: -0.00035112
Trial 140:
  Learning Rate: 0.0048314592190217285
  Sigma Multiplier: 1.062167209885277
  Initialization Multiplier: 0.2969879480962616
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.65it/s, loss=-0.000258, elapsed time=0.06, total time=11.4]
[I 2025-06-07 13:04:42,031] Trial 140 finished with value: -0.00025783014704629036 and parameters: {'learning_rate': 0.0048314592190217285, 'sigma_multiplier': 1.062167209885277, 'num_layers': 2, 'initialization_multiplier': 0.2969879480962616}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 140 final loss: -0.00025783
Trial 141:
  Learning Rate: 0.00709324990113282
  Sigma Multiplier: 0.9659343274505039
  Initialization Multiplier: 0.39576528670808203
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.49it/s, loss=-0.000379, elapsed time=0.06, total time=11.4]
[I 2025-06-07 13:04:53,503] Trial 141 finished with value: -0.0003787144331715102 and parameters: {'learning_rate': 0.00709324990113282, 'sigma_multiplier': 0.9659343274505039, 'num_layers': 2, 'initialization_multiplier': 0.39576528670808203}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 141 final loss: -0.00037871
Trial 142:
  Learning Rate: 0.010004847032735846
  Sigma Multiplier: 1.0094071411119374
  Initialization Multiplier: 0.3900891900210533
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.61it/s, loss=-0.000394, elapsed time=0.06, total time=11.3]
[I 2025-06-07 13:05:04,849] Trial 142 finished with value: -0.00039430105668191665 and parameters: {'learning_rate': 0.010004847032735846, 'sigma_multiplier': 1.0094071411119374, 'num_layers': 2, 'initialization_multiplier': 0.3900891900210533}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 142 final loss: -0.00039430
Trial 143:
  Learning Rate: 0.010076787982187361
  Sigma Multiplier: 0.9996205741249455
  Initialization Multiplier: 0.49710769995110776
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.53it/s, loss=-0.000384, elapsed time=0.07, total time=11.4]
[I 2025-06-07 13:05:16,292] Trial 143 finished with value: -0.0003843108299383832 and parameters: {'learning_rate': 0.010076787982187361, 'sigma_multiplier': 0.9996205741249455, 'num_layers': 2, 'initialization_multiplier': 0.49710769995110776}. Best is trial 84 with value: -0.00046138953332195543.


Training has not converged after 150 steps
Trial 143 final loss: -0.00038431
Trial 144:
  Learning Rate: 0.009833198081501033
  Sigma Multiplier: 1.001516945448479
  Initialization Multiplier: 0.5924641213537045
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.67it/s, loss=-0.000483, elapsed time=0.07, total time=11.3]
[I 2025-06-07 13:05:27,606] Trial 144 finished with value: -0.0004832216293692365 and parameters: {'learning_rate': 0.009833198081501033, 'sigma_multiplier': 1.001516945448479, 'num_layers': 2, 'initialization_multiplier': 0.5924641213537045}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 144 final loss: -0.00048322
Trial 145:
  Learning Rate: 0.011005639789831538
  Sigma Multiplier: 1.0019753122597954
  Initialization Multiplier: 0.5900955043643455
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.32it/s, loss=-0.000331, elapsed time=0.08, total time=11.6]
[I 2025-06-07 13:05:39,217] Trial 145 finished with value: -0.00033100286900275913 and parameters: {'learning_rate': 0.011005639789831538, 'sigma_multiplier': 1.0019753122597954, 'num_layers': 2, 'initialization_multiplier': 0.5900955043643455}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 145 final loss: -0.00033100
Trial 146:
  Learning Rate: 0.005845509192875578
  Sigma Multiplier: 1.0310274210373342
  Initialization Multiplier: 0.6496259705348878
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.73it/s, loss=-0.000317, elapsed time=0.08, total time=11.2]
[I 2025-06-07 13:05:50,476] Trial 146 finished with value: -0.00031705165062899436 and parameters: {'learning_rate': 0.005845509192875578, 'sigma_multiplier': 1.0310274210373342, 'num_layers': 2, 'initialization_multiplier': 0.6496259705348878}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 146 final loss: -0.00031705
Trial 147:
  Learning Rate: 0.010137562856494557
  Sigma Multiplier: 1.095337823267927
  Initialization Multiplier: 0.5305969710087918
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.31it/s, loss=-0.000373, elapsed time=0.06, total time=11.5]
[I 2025-06-07 13:06:02,021] Trial 147 finished with value: -0.00037293490198940185 and parameters: {'learning_rate': 0.010137562856494557, 'sigma_multiplier': 1.095337823267927, 'num_layers': 2, 'initialization_multiplier': 0.5305969710087918}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 147 final loss: -0.00037293
Trial 148:
  Learning Rate: 0.018139225666797414
  Sigma Multiplier: 0.9026618266270058
  Initialization Multiplier: 0.4906445409424721
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.38it/s, loss=-0.000280, elapsed time=0.07, total time=12.6]
[I 2025-06-07 13:06:14,620] Trial 148 finished with value: -0.00028033884069035987 and parameters: {'learning_rate': 0.018139225666797414, 'sigma_multiplier': 0.9026618266270058, 'num_layers': 2, 'initialization_multiplier': 0.4906445409424721}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 148 final loss: -0.00028034
Trial 149:
  Learning Rate: 0.008528578365318203
  Sigma Multiplier: 1.0542830997337058
  Initialization Multiplier: 0.6051658603559411
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.93it/s, loss=-0.000380, elapsed time=0.06, total time=11.9]
[I 2025-06-07 13:06:26,522] Trial 149 finished with value: -0.00037996354756766453 and parameters: {'learning_rate': 0.008528578365318203, 'sigma_multiplier': 1.0542830997337058, 'num_layers': 2, 'initialization_multiplier': 0.6051658603559411}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 149 final loss: -0.00037996
Trial 150:
  Learning Rate: 0.01376245185403636
  Sigma Multiplier: 1.011695284525096
  Initialization Multiplier: 0.689181317663432
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.82it/s, loss=-0.000261, elapsed time=0.09, total time=12]  
[I 2025-06-07 13:06:38,607] Trial 150 finished with value: -0.00026104363143363273 and parameters: {'learning_rate': 0.01376245185403636, 'sigma_multiplier': 1.011695284525096, 'num_layers': 2, 'initialization_multiplier': 0.689181317663432}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 150 final loss: -0.00026104
Trial 151:
  Learning Rate: 0.009675819915839204
  Sigma Multiplier: 0.9649436116934765
  Initialization Multiplier: 0.43486433096594357
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.46it/s, loss=-0.000372, elapsed time=0.05, total time=12.3]
[I 2025-06-07 13:06:50,997] Trial 151 finished with value: -0.00037182704568062923 and parameters: {'learning_rate': 0.009675819915839204, 'sigma_multiplier': 0.9649436116934765, 'num_layers': 2, 'initialization_multiplier': 0.43486433096594357}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 151 final loss: -0.00037183
Trial 152:
  Learning Rate: 0.006659174176263613
  Sigma Multiplier: 0.9473395769258184
  Initialization Multiplier: 0.47210743636236346
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.70it/s, loss=-0.000405, elapsed time=0.13, total time=12.2]
[I 2025-06-07 13:07:03,273] Trial 152 finished with value: -0.0004054935430587671 and parameters: {'learning_rate': 0.006659174176263613, 'sigma_multiplier': 0.9473395769258184, 'num_layers': 2, 'initialization_multiplier': 0.47210743636236346}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 152 final loss: -0.00040549
Trial 153:
  Learning Rate: 0.007036448410886705
  Sigma Multiplier: 0.8553436466045401
  Initialization Multiplier: 0.5056713489746221
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.10it/s, loss=-0.000254, elapsed time=0.07, total time=12.7]
[I 2025-06-07 13:07:16,031] Trial 153 finished with value: -0.00025430660048079317 and parameters: {'learning_rate': 0.007036448410886705, 'sigma_multiplier': 0.8553436466045401, 'num_layers': 2, 'initialization_multiplier': 0.5056713489746221}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 153 final loss: -0.00025431
Trial 154:
  Learning Rate: 0.006045220815360686
  Sigma Multiplier: 0.92547404100045
  Initialization Multiplier: 0.5509083732310489
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.48it/s, loss=-0.000386, elapsed time=0.08, total time=12.3]
[I 2025-06-07 13:07:28,382] Trial 154 finished with value: -0.0003855440571829513 and parameters: {'learning_rate': 0.006045220815360686, 'sigma_multiplier': 0.92547404100045, 'num_layers': 2, 'initialization_multiplier': 0.5509083732310489}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 154 final loss: -0.00038554
Trial 155:
  Learning Rate: 0.005681418817296294
  Sigma Multiplier: 0.7996481142792959
  Initialization Multiplier: 0.5732297206925372
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.36it/s, loss=-0.000295, elapsed time=0.08, total time=12.4]
[I 2025-06-07 13:07:40,887] Trial 155 finished with value: -0.000294763827278016 and parameters: {'learning_rate': 0.005681418817296294, 'sigma_multiplier': 0.7996481142792959, 'num_layers': 2, 'initialization_multiplier': 0.5732297206925372}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 155 final loss: -0.00029476
Trial 156:
  Learning Rate: 0.004454131129935002
  Sigma Multiplier: 0.9261161907434194
  Initialization Multiplier: 0.54826041737173
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.57it/s, loss=-0.000357, elapsed time=0.07, total time=12.3]
[I 2025-06-07 13:07:53,238] Trial 156 finished with value: -0.0003574976537619843 and parameters: {'learning_rate': 0.004454131129935002, 'sigma_multiplier': 0.9261161907434194, 'num_layers': 2, 'initialization_multiplier': 0.54826041737173}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 156 final loss: -0.00035750
Trial 157:
  Learning Rate: 0.006184366925812076
  Sigma Multiplier: 0.8716277934771446
  Initialization Multiplier: 0.6247393228789481
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.56it/s, loss=-0.000309, elapsed time=0.08, total time=12.3]
[I 2025-06-07 13:08:05,558] Trial 157 finished with value: -0.00030937097278256744 and parameters: {'learning_rate': 0.006184366925812076, 'sigma_multiplier': 0.8716277934771446, 'num_layers': 2, 'initialization_multiplier': 0.6247393228789481}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 157 final loss: -0.00030937
Trial 158:
  Learning Rate: 0.007770079544344371
  Sigma Multiplier: 0.9088092534059359
  Initialization Multiplier: 0.47213060885042496
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.74it/s, loss=-0.000363, elapsed time=0.05, total time=12.2]
[I 2025-06-07 13:08:17,796] Trial 158 finished with value: -0.000363423467525939 and parameters: {'learning_rate': 0.007770079544344371, 'sigma_multiplier': 0.9088092534059359, 'num_layers': 2, 'initialization_multiplier': 0.47213060885042496}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 158 final loss: -0.00036342
Trial 159:
  Learning Rate: 0.003777806217897407
  Sigma Multiplier: 1.0223215838208366
  Initialization Multiplier: 0.7504527983292429
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.44it/s, loss=-0.000248, elapsed time=0.06, total time=11.5]
[I 2025-06-07 13:08:29,370] Trial 159 finished with value: -0.00024771008171941997 and parameters: {'learning_rate': 0.003777806217897407, 'sigma_multiplier': 1.0223215838208366, 'num_layers': 2, 'initialization_multiplier': 0.7504527983292429}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 159 final loss: -0.00024771
Trial 160:
  Learning Rate: 0.006724355634343228
  Sigma Multiplier: 0.9433290369429085
  Initialization Multiplier: 0.5295975402077375
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.52it/s, loss=-0.000418, elapsed time=0.09, total time=11.4]
[I 2025-06-07 13:08:40,806] Trial 160 finished with value: -0.000418012206748761 and parameters: {'learning_rate': 0.006724355634343228, 'sigma_multiplier': 0.9433290369429085, 'num_layers': 2, 'initialization_multiplier': 0.5295975402077375}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 160 final loss: -0.00041801
Trial 161:
  Learning Rate: 0.005313605737589059
  Sigma Multiplier: 0.9507251569284879
  Initialization Multiplier: 0.5233163347190549
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.69it/s, loss=-0.000378, elapsed time=0.07, total time=12.1]
[I 2025-06-07 13:08:52,943] Trial 161 finished with value: -0.0003776891007675604 and parameters: {'learning_rate': 0.005313605737589059, 'sigma_multiplier': 0.9507251569284879, 'num_layers': 2, 'initialization_multiplier': 0.5233163347190549}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 161 final loss: -0.00037769
Trial 162:
  Learning Rate: 0.0064568959773591795
  Sigma Multiplier: 0.9793584609989379
  Initialization Multiplier: 0.5915446933941023
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.64it/s, loss=-0.000362, elapsed time=0.05, total time=12.2]
[I 2025-06-07 13:09:05,197] Trial 162 finished with value: -0.000362180499960893 and parameters: {'learning_rate': 0.0064568959773591795, 'sigma_multiplier': 0.9793584609989379, 'num_layers': 2, 'initialization_multiplier': 0.5915446933941023}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 162 final loss: -0.00036218
Trial 163:
  Learning Rate: 0.007788435559031669
  Sigma Multiplier: 0.82598378761833
  Initialization Multiplier: 0.652934932628503
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.44it/s, loss=-0.000321, elapsed time=0.06, total time=12.3]
[I 2025-06-07 13:09:17,553] Trial 163 finished with value: -0.000320959464549475 and parameters: {'learning_rate': 0.007788435559031669, 'sigma_multiplier': 0.82598378761833, 'num_layers': 2, 'initialization_multiplier': 0.652934932628503}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 163 final loss: -0.00032096
Trial 164:
  Learning Rate: 0.008967062637496465
  Sigma Multiplier: 1.1266195706421867
  Initialization Multiplier: 0.5135502019440258
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.11it/s, loss=-0.000375, elapsed time=0.05, total time=11]  
[I 2025-06-07 13:09:28,576] Trial 164 finished with value: -0.0003747708979702221 and parameters: {'learning_rate': 0.008967062637496465, 'sigma_multiplier': 1.1266195706421867, 'num_layers': 2, 'initialization_multiplier': 0.5135502019440258}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 164 final loss: -0.00037477
Trial 165:
  Learning Rate: 0.011232795209722134
  Sigma Multiplier: 1.0095162379320102
  Initialization Multiplier: 0.5571434494416845
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.83it/s, loss=-0.000284, elapsed time=0.06, total time=11.1]
[I 2025-06-07 13:09:39,698] Trial 165 finished with value: -0.0002838395199711149 and parameters: {'learning_rate': 0.011232795209722134, 'sigma_multiplier': 1.0095162379320102, 'num_layers': 2, 'initialization_multiplier': 0.5571434494416845}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 165 final loss: -0.00028384
Trial 166:
  Learning Rate: 0.0029142384734710857
  Sigma Multiplier: 0.9367586594355207
  Initialization Multiplier: 0.4663245421297305
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.66it/s, loss=-0.000332, elapsed time=0.07, total time=12.2]
[I 2025-06-07 13:09:51,912] Trial 166 finished with value: -0.00033207746709716315 and parameters: {'learning_rate': 0.0029142384734710857, 'sigma_multiplier': 0.9367586594355207, 'num_layers': 2, 'initialization_multiplier': 0.4663245421297305}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 166 final loss: -0.00033208
Trial 167:
  Learning Rate: 0.00487130007411567
  Sigma Multiplier: 1.0521113827178603
  Initialization Multiplier: 0.12115023017676757
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.46it/s, loss=-0.000240, elapsed time=0.07, total time=11.5]
[I 2025-06-07 13:10:03,418] Trial 167 finished with value: -0.00024013815573288265 and parameters: {'learning_rate': 0.00487130007411567, 'sigma_multiplier': 1.0521113827178603, 'num_layers': 2, 'initialization_multiplier': 0.12115023017676757}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 167 final loss: -0.00024014
Trial 168:
  Learning Rate: 0.007038967212299826
  Sigma Multiplier: 0.7047730320313139
  Initialization Multiplier: 0.4276460226333556
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.23it/s, loss=-0.000042, elapsed time=0.09, total time=12.7]
[I 2025-06-07 13:10:16,165] Trial 168 finished with value: -4.162060105024127e-05 and parameters: {'learning_rate': 0.007038967212299826, 'sigma_multiplier': 0.7047730320313139, 'num_layers': 2, 'initialization_multiplier': 0.4276460226333556}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 168 final loss: -0.00004162
Trial 169:
  Learning Rate: 0.010114217759937445
  Sigma Multiplier: 0.883206303475045
  Initialization Multiplier: 0.4985413710386699
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.24it/s, loss=-0.000273, elapsed time=0.06, total time=11.6]
[I 2025-06-07 13:10:27,832] Trial 169 finished with value: -0.00027305603638739796 and parameters: {'learning_rate': 0.010114217759937445, 'sigma_multiplier': 0.883206303475045, 'num_layers': 2, 'initialization_multiplier': 0.4985413710386699}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 169 final loss: -0.00027306
Trial 170:
  Learning Rate: 0.005858585718799565
  Sigma Multiplier: 1.093894712867753
  Initialization Multiplier: 0.6141411476399342
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.21it/s, loss=-0.000389, elapsed time=0.09, total time=10.8]
[I 2025-06-07 13:10:38,666] Trial 170 finished with value: -0.00038870631170790813 and parameters: {'learning_rate': 0.005858585718799565, 'sigma_multiplier': 1.093894712867753, 'num_layers': 2, 'initialization_multiplier': 0.6141411476399342}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 170 final loss: -0.00038871
Trial 171:
  Learning Rate: 0.006032096580244899
  Sigma Multiplier: 1.0898158334673638
  Initialization Multiplier: 0.6042375628813715
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.22it/s, loss=-0.000422, elapsed time=0.06, total time=10.8]
[I 2025-06-07 13:10:49,534] Trial 171 finished with value: -0.000422137714987613 and parameters: {'learning_rate': 0.006032096580244899, 'sigma_multiplier': 1.0898158334673638, 'num_layers': 2, 'initialization_multiplier': 0.6042375628813715}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 171 final loss: -0.00042214
Trial 172:
  Learning Rate: 0.004346687974036646
  Sigma Multiplier: 1.0969515718982301
  Initialization Multiplier: 0.6670847320711606
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.43it/s, loss=-0.000252, elapsed time=0.08, total time=11.6]
[I 2025-06-07 13:11:01,118] Trial 172 finished with value: -0.0002523896528033417 and parameters: {'learning_rate': 0.004346687974036646, 'sigma_multiplier': 1.0969515718982301, 'num_layers': 2, 'initialization_multiplier': 0.6670847320711606}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 172 final loss: -0.00025239
Trial 173:
  Learning Rate: 0.005767133736551449
  Sigma Multiplier: 1.172969578881301
  Initialization Multiplier: 0.5912985197798022
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.64it/s, loss=-0.000351, elapsed time=0.07, total time=11.3]
[I 2025-06-07 13:11:12,511] Trial 173 finished with value: -0.00035108155253792063 and parameters: {'learning_rate': 0.005767133736551449, 'sigma_multiplier': 1.172969578881301, 'num_layers': 2, 'initialization_multiplier': 0.5912985197798022}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 173 final loss: -0.00035108
Trial 174:
  Learning Rate: 0.0064412786884657175
  Sigma Multiplier: 1.1360404264143384
  Initialization Multiplier: 0.7260150080555867
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.21it/s, loss=-0.000395, elapsed time=0.06, total time=11.7]
[I 2025-06-07 13:11:24,263] Trial 174 finished with value: -0.00039528468674098665 and parameters: {'learning_rate': 0.0064412786884657175, 'sigma_multiplier': 1.1360404264143384, 'num_layers': 2, 'initialization_multiplier': 0.7260150080555867}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 174 final loss: -0.00039528
Trial 175:
  Learning Rate: 0.007950710196329399
  Sigma Multiplier: 1.1383348316939137
  Initialization Multiplier: 0.6896025902514669
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.13it/s, loss=-0.000373, elapsed time=0.07, total time=11.8]
[I 2025-06-07 13:11:36,139] Trial 175 finished with value: -0.0003729569456014823 and parameters: {'learning_rate': 0.007950710196329399, 'sigma_multiplier': 1.1383348316939137, 'num_layers': 2, 'initialization_multiplier': 0.6896025902514669}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 175 final loss: -0.00037296
Trial 176:
  Learning Rate: 0.0035659346082625235
  Sigma Multiplier: 1.0968667990493421
  Initialization Multiplier: 0.6238556898682205
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.89it/s, loss=-0.000445, elapsed time=0.05, total time=12]  
[I 2025-06-07 13:11:48,130] Trial 176 finished with value: -0.0004447638276188901 and parameters: {'learning_rate': 0.0035659346082625235, 'sigma_multiplier': 1.0968667990493421, 'num_layers': 2, 'initialization_multiplier': 0.6238556898682205}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 176 final loss: -0.00044476
Trial 177:
  Learning Rate: 0.003444222572909579
  Sigma Multiplier: 1.0703064585015738
  Initialization Multiplier: 0.8239678243577675
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.21it/s, loss=-0.000090, elapsed time=0.07, total time=11.7]
[I 2025-06-07 13:11:59,869] Trial 177 finished with value: -8.975950081712196e-05 and parameters: {'learning_rate': 0.003444222572909579, 'sigma_multiplier': 1.0703064585015738, 'num_layers': 2, 'initialization_multiplier': 0.8239678243577675}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 177 final loss: -0.00008976
Trial 178:
  Learning Rate: 0.005152691028036414
  Sigma Multiplier: 1.1796351518161243
  Initialization Multiplier: 0.7353066032032123
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.62it/s, loss=-0.000397, elapsed time=0.08, total time=11.4]
[I 2025-06-07 13:12:11,338] Trial 178 finished with value: -0.0003970673147528531 and parameters: {'learning_rate': 0.005152691028036414, 'sigma_multiplier': 1.1796351518161243, 'num_layers': 2, 'initialization_multiplier': 0.7353066032032123}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 178 final loss: -0.00039707
Trial 179:
  Learning Rate: 0.004968719884368472
  Sigma Multiplier: 1.1564079453686031
  Initialization Multiplier: 0.797381552231321
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.36it/s, loss=-0.000394, elapsed time=0.07, total time=11.5]
[I 2025-06-07 13:12:22,851] Trial 179 finished with value: -0.0003942300503526515 and parameters: {'learning_rate': 0.004968719884368472, 'sigma_multiplier': 1.1564079453686031, 'num_layers': 2, 'initialization_multiplier': 0.797381552231321}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 179 final loss: -0.00039423
Trial 180:
  Learning Rate: 0.004846213574925601
  Sigma Multiplier: 1.1431796266228105
  Initialization Multiplier: 0.876325711083547
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.08it/s, loss=-0.000214, elapsed time=0.07, total time=11.8]
[I 2025-06-07 13:12:34,720] Trial 180 finished with value: -0.0002137133703455438 and parameters: {'learning_rate': 0.004846213574925601, 'sigma_multiplier': 1.1431796266228105, 'num_layers': 2, 'initialization_multiplier': 0.876325711083547}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 180 final loss: -0.00021371
Trial 181:
  Learning Rate: 0.004928133782887947
  Sigma Multiplier: 1.0389241658842994
  Initialization Multiplier: 0.7287054933531762
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.93it/s, loss=-0.000291, elapsed time=0.06, total time=12]  
[I 2025-06-07 13:12:46,782] Trial 181 finished with value: -0.00029086897347214 and parameters: {'learning_rate': 0.004928133782887947, 'sigma_multiplier': 1.0389241658842994, 'num_layers': 2, 'initialization_multiplier': 0.7287054933531762}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 181 final loss: -0.00029087
Trial 182:
  Learning Rate: 0.003792654544992188
  Sigma Multiplier: 1.1714646876371466
  Initialization Multiplier: 0.7208218555197576
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.75it/s, loss=-0.000356, elapsed time=0.06, total time=11.3]
[I 2025-06-07 13:12:58,133] Trial 182 finished with value: -0.0003562244669251541 and parameters: {'learning_rate': 0.003792654544992188, 'sigma_multiplier': 1.1714646876371466, 'num_layers': 2, 'initialization_multiplier': 0.7208218555197576}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 182 final loss: -0.00035622
Trial 183:
  Learning Rate: 0.006567740685143082
  Sigma Multiplier: 1.090891989815432
  Initialization Multiplier: 0.8034051574195906
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.07it/s, loss=-0.000134, elapsed time=0.06, total time=11.7]
[I 2025-06-07 13:13:09,902] Trial 183 finished with value: -0.00013393803952561697 and parameters: {'learning_rate': 0.006567740685143082, 'sigma_multiplier': 1.090891989815432, 'num_layers': 2, 'initialization_multiplier': 0.8034051574195906}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 183 final loss: -0.00013394
Trial 184:
  Learning Rate: 0.005253868702157325
  Sigma Multiplier: 1.060570000572347
  Initialization Multiplier: 0.7822937276362212
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.17it/s, loss=-0.000253, elapsed time=0.06, total time=11.7]
[I 2025-06-07 13:13:21,630] Trial 184 finished with value: -0.00025313430532576453 and parameters: {'learning_rate': 0.005253868702157325, 'sigma_multiplier': 1.060570000572347, 'num_layers': 2, 'initialization_multiplier': 0.7822937276362212}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 184 final loss: -0.00025313
Trial 185:
  Learning Rate: 0.004237769812805874
  Sigma Multiplier: 1.1238302315313138
  Initialization Multiplier: 0.6959849265716278
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.63it/s, loss=-0.000243, elapsed time=0.07, total time=11.3]
[I 2025-06-07 13:13:33,011] Trial 185 finished with value: -0.00024303256779424463 and parameters: {'learning_rate': 0.004237769812805874, 'sigma_multiplier': 1.1238302315313138, 'num_layers': 2, 'initialization_multiplier': 0.6959849265716278}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 185 final loss: -0.00024303
Trial 186:
  Learning Rate: 0.006690443741494444
  Sigma Multiplier: 1.2231183099045122
  Initialization Multiplier: 0.6348055770083223
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.31it/s, loss=-0.000396, elapsed time=0.04, total time=10.8]
[I 2025-06-07 13:13:43,863] Trial 186 finished with value: -0.0003956699167798849 and parameters: {'learning_rate': 0.006690443741494444, 'sigma_multiplier': 1.2231183099045122, 'num_layers': 2, 'initialization_multiplier': 0.6348055770083223}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 186 final loss: -0.00039567
Trial 187:
  Learning Rate: 0.008889436328523518
  Sigma Multiplier: 1.214273343215771
  Initialization Multiplier: 0.8483935439230259
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.73it/s, loss=-0.000116, elapsed time=0.06, total time=11.2]
[I 2025-06-07 13:13:55,090] Trial 187 finished with value: -0.00011602447491095969 and parameters: {'learning_rate': 0.008889436328523518, 'sigma_multiplier': 1.214273343215771, 'num_layers': 2, 'initialization_multiplier': 0.8483935439230259}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 187 final loss: -0.00011602
Trial 188:
  Learning Rate: 0.006504982172105326
  Sigma Multiplier: 1.255323755207514
  Initialization Multiplier: 0.7674433762290169
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.87it/s, loss=-0.000363, elapsed time=0.05, total time=11.1]
[I 2025-06-07 13:14:06,261] Trial 188 finished with value: -0.00036341680938440025 and parameters: {'learning_rate': 0.006504982172105326, 'sigma_multiplier': 1.255323755207514, 'num_layers': 2, 'initialization_multiplier': 0.7674433762290169}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 188 final loss: -0.00036342
Trial 189:
  Learning Rate: 0.011679472332489638
  Sigma Multiplier: 1.1801946879190761
  Initialization Multiplier: 0.6696999069355639
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.02it/s, loss=-0.000292, elapsed time=0.06, total time=11]  
[I 2025-06-07 13:14:17,278] Trial 189 finished with value: -0.00029175765464874197 and parameters: {'learning_rate': 0.011679472332489638, 'sigma_multiplier': 1.1801946879190761, 'num_layers': 2, 'initialization_multiplier': 0.6696999069355639}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 189 final loss: -0.00029176
Trial 190:
  Learning Rate: 0.00733633562369188
  Sigma Multiplier: 1.2866560000891776
  Initialization Multiplier: 0.6446408083415407
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.25it/s, loss=-0.000351, elapsed time=0.05, total time=10.8]
[I 2025-06-07 13:14:28,139] Trial 190 finished with value: -0.0003506040057640069 and parameters: {'learning_rate': 0.00733633562369188, 'sigma_multiplier': 1.2866560000891776, 'num_layers': 2, 'initialization_multiplier': 0.6446408083415407}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 190 final loss: -0.00035060
Trial 191:
  Learning Rate: 0.005528175231564317
  Sigma Multiplier: 1.1016765508878625
  Initialization Multiplier: 0.612943573851359
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.02it/s, loss=-0.000280, elapsed time=0.05, total time=11]  
[I 2025-06-07 13:14:39,207] Trial 191 finished with value: -0.0002795022529801921 and parameters: {'learning_rate': 0.005528175231564317, 'sigma_multiplier': 1.1016765508878625, 'num_layers': 2, 'initialization_multiplier': 0.612943573851359}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 191 final loss: -0.00027950
Trial 192:
  Learning Rate: 0.00862264346308188
  Sigma Multiplier: 1.2215855215007134
  Initialization Multiplier: 0.7239625690958283
  Number of Layers: 5


Training Progress: 100%|██████████| 150/150 [00:17<00:00,  8.57it/s, loss=-0.000280, elapsed time=0.11, total time=17.8]
[I 2025-06-07 13:14:57,138] Trial 192 finished with value: -0.00028041793839018097 and parameters: {'learning_rate': 0.00862264346308188, 'sigma_multiplier': 1.2215855215007134, 'num_layers': 5, 'initialization_multiplier': 0.7239625690958283}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 192 final loss: -0.00028042
Trial 193:
  Learning Rate: 0.0062811028216532006
  Sigma Multiplier: 1.0305980939903663
  Initialization Multiplier: 0.6134318841673971
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.34it/s, loss=-0.000289, elapsed time=0.08, total time=11.6]
[I 2025-06-07 13:15:08,732] Trial 193 finished with value: -0.00028877429145722847 and parameters: {'learning_rate': 0.0062811028216532006, 'sigma_multiplier': 1.0305980939903663, 'num_layers': 2, 'initialization_multiplier': 0.6134318841673971}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 193 final loss: -0.00028877
Trial 194:
  Learning Rate: 0.004236232336319594
  Sigma Multiplier: 1.1614749753972544
  Initialization Multiplier: 0.5774036691089092
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.07it/s, loss=-0.000334, elapsed time=0.09, total time=11.1]
[I 2025-06-07 13:15:19,837] Trial 194 finished with value: -0.000333692404134574 and parameters: {'learning_rate': 0.004236232336319594, 'sigma_multiplier': 1.1614749753972544, 'num_layers': 2, 'initialization_multiplier': 0.5774036691089092}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 194 final loss: -0.00033369
Trial 195:
  Learning Rate: 0.005423841821505817
  Sigma Multiplier: 1.9859869044658471
  Initialization Multiplier: 0.6460586257230498
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.65it/s, loss=-0.000233, elapsed time=0.06, total time=9.9] 
[I 2025-06-07 13:15:29,780] Trial 195 finished with value: -0.0002326101478147409 and parameters: {'learning_rate': 0.005423841821505817, 'sigma_multiplier': 1.9859869044658471, 'num_layers': 2, 'initialization_multiplier': 0.6460586257230498}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 195 final loss: -0.00023261
Trial 196:
  Learning Rate: 0.007145790759620278
  Sigma Multiplier: 0.9743911272710244
  Initialization Multiplier: 0.6912045199089528
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.37it/s, loss=-0.000290, elapsed time=0.07, total time=11.6]
[I 2025-06-07 13:15:41,390] Trial 196 finished with value: -0.00028955444902170884 and parameters: {'learning_rate': 0.007145790759620278, 'sigma_multiplier': 0.9743911272710244, 'num_layers': 2, 'initialization_multiplier': 0.6912045199089528}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 196 final loss: -0.00028955
Trial 197:
  Learning Rate: 0.008339389949058468
  Sigma Multiplier: 1.0848957238103771
  Initialization Multiplier: 0.5438688329931908
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.86it/s, loss=-0.000446, elapsed time=0.08, total time=11.2]
[I 2025-06-07 13:15:52,634] Trial 197 finished with value: -0.00044607856217723806 and parameters: {'learning_rate': 0.008339389949058468, 'sigma_multiplier': 1.0848957238103771, 'num_layers': 2, 'initialization_multiplier': 0.5438688329931908}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 197 final loss: -0.00044608
Trial 198:
  Learning Rate: 0.008491681717536968
  Sigma Multiplier: 0.9956651896514976
  Initialization Multiplier: 0.5509204785532639
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.31it/s, loss=-0.000437, elapsed time=0.06, total time=11.6]
[I 2025-06-07 13:16:04,250] Trial 198 finished with value: -0.00043701791175349334 and parameters: {'learning_rate': 0.008491681717536968, 'sigma_multiplier': 0.9956651896514976, 'num_layers': 2, 'initialization_multiplier': 0.5509204785532639}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 198 final loss: -0.00043702
Trial 199:
  Learning Rate: 0.008449413871679749
  Sigma Multiplier: 1.0592769008827303
  Initialization Multiplier: 1.0138645042067145
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.27it/s, loss=-0.000200, elapsed time=0.06, total time=11.6]
[I 2025-06-07 13:16:15,894] Trial 199 finished with value: -0.0001997922953760184 and parameters: {'learning_rate': 0.008449413871679749, 'sigma_multiplier': 1.0592769008827303, 'num_layers': 2, 'initialization_multiplier': 1.0138645042067145}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 199 final loss: -0.00019979
Trial 200:
  Learning Rate: 0.011878755659679895
  Sigma Multiplier: 1.0064182788391824
  Initialization Multiplier: 0.5517090241934941
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.95it/s, loss=-0.000426, elapsed time=0.07, total time=11.1]
[I 2025-06-07 13:16:26,988] Trial 200 finished with value: -0.0004262872623524529 and parameters: {'learning_rate': 0.011878755659679895, 'sigma_multiplier': 1.0064182788391824, 'num_layers': 2, 'initialization_multiplier': 0.5517090241934941}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 200 final loss: -0.00042629
Trial 201:
  Learning Rate: 0.012066548886487784
  Sigma Multiplier: 1.0072530173493368
  Initialization Multiplier: 0.5689258981476302
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.62it/s, loss=-0.000372, elapsed time=0.05, total time=11.3]
[I 2025-06-07 13:16:38,313] Trial 201 finished with value: -0.0003715714626479868 and parameters: {'learning_rate': 0.012066548886487784, 'sigma_multiplier': 1.0072530173493368, 'num_layers': 2, 'initialization_multiplier': 0.5689258981476302}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 201 final loss: -0.00037157
Trial 202:
  Learning Rate: 0.015431302659860594
  Sigma Multiplier: 1.1310479904626647
  Initialization Multiplier: 0.532723699051992
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.16it/s, loss=-0.000306, elapsed time=0.07, total time=10.9]
[I 2025-06-07 13:16:49,257] Trial 202 finished with value: -0.0003064069183508089 and parameters: {'learning_rate': 0.015431302659860594, 'sigma_multiplier': 1.1310479904626647, 'num_layers': 2, 'initialization_multiplier': 0.532723699051992}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 202 final loss: -0.00030641
Trial 203:
  Learning Rate: 0.008528603832340198
  Sigma Multiplier: 1.0339159112353624
  Initialization Multiplier: 0.5431990340830738
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.57it/s, loss=-0.000261, elapsed time=0.06, total time=11.4]
[I 2025-06-07 13:17:00,718] Trial 203 finished with value: -0.0002607175329729004 and parameters: {'learning_rate': 0.008528603832340198, 'sigma_multiplier': 1.0339159112353624, 'num_layers': 2, 'initialization_multiplier': 0.5431990340830738}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 203 final loss: -0.00026072
Trial 204:
  Learning Rate: 0.009930505750210162
  Sigma Multiplier: 0.9893690868984302
  Initialization Multiplier: 1.1283561512843818
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.43it/s, loss=0.000422, elapsed time=0.08, total time=11.5]
[I 2025-06-07 13:17:12,228] Trial 204 finished with value: 0.0004215560255652815 and parameters: {'learning_rate': 0.009930505750210162, 'sigma_multiplier': 0.9893690868984302, 'num_layers': 2, 'initialization_multiplier': 1.1283561512843818}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 204 final loss: 0.00042156
Trial 205:
  Learning Rate: 0.00695098436080208
  Sigma Multiplier: 1.0817934503054363
  Initialization Multiplier: 0.4583925326068157
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.36it/s, loss=-0.000328, elapsed time=0.07, total time=10.7]
[I 2025-06-07 13:17:22,976] Trial 205 finished with value: -0.00032817036698349174 and parameters: {'learning_rate': 0.00695098436080208, 'sigma_multiplier': 1.0817934503054363, 'num_layers': 2, 'initialization_multiplier': 0.4583925326068157}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 205 final loss: -0.00032817
Trial 206:
  Learning Rate: 0.012200603542799367
  Sigma Multiplier: 1.1466446219874227
  Initialization Multiplier: 1.6202193702529408
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.77it/s, loss=-0.000106, elapsed time=0.06, total time=11.2]
[I 2025-06-07 13:17:34,252] Trial 206 finished with value: -0.00010590239841997685 and parameters: {'learning_rate': 0.012200603542799367, 'sigma_multiplier': 1.1466446219874227, 'num_layers': 2, 'initialization_multiplier': 1.6202193702529408}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 206 final loss: -0.00010590
Trial 207:
  Learning Rate: 0.0076430758037775125
  Sigma Multiplier: 0.9525862391107832
  Initialization Multiplier: 0.526426405350286
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.76it/s, loss=-0.000377, elapsed time=0.14, total time=11.2]
[I 2025-06-07 13:17:45,516] Trial 207 finished with value: -0.00037682267643539144 and parameters: {'learning_rate': 0.0076430758037775125, 'sigma_multiplier': 0.9525862391107832, 'num_layers': 2, 'initialization_multiplier': 0.526426405350286}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 207 final loss: -0.00037682
Trial 208:
  Learning Rate: 0.008826102799157194
  Sigma Multiplier: 1.034536989634885
  Initialization Multiplier: 0.5931277143742403
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.96it/s, loss=-0.000335, elapsed time=0.05, total time=11.1]
[I 2025-06-07 13:17:56,600] Trial 208 finished with value: -0.0003354884960004625 and parameters: {'learning_rate': 0.008826102799157194, 'sigma_multiplier': 1.034536989634885, 'num_layers': 2, 'initialization_multiplier': 0.5931277143742403}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 208 final loss: -0.00033549
Trial 209:
  Learning Rate: 0.004742225910577647
  Sigma Multiplier: 1.1085811659968985
  Initialization Multiplier: 0.5693116153474013
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.11it/s, loss=-0.000268, elapsed time=0.07, total time=10.8]
[I 2025-06-07 13:18:07,501] Trial 209 finished with value: -0.0002680093399395446 and parameters: {'learning_rate': 0.004742225910577647, 'sigma_multiplier': 1.1085811659968985, 'num_layers': 2, 'initialization_multiplier': 0.5693116153474013}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 209 final loss: -0.00026801
Trial 210:
  Learning Rate: 0.006424437614391984
  Sigma Multiplier: 1.206553887604692
  Initialization Multiplier: 0.9477844373102983
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.82it/s, loss=-0.000234, elapsed time=0.04, total time=11.2]
[I 2025-06-07 13:18:18,717] Trial 210 finished with value: -0.0002337712508175193 and parameters: {'learning_rate': 0.006424437614391984, 'sigma_multiplier': 1.206553887604692, 'num_layers': 2, 'initialization_multiplier': 0.9477844373102983}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 210 final loss: -0.00023377
Trial 211:
  Learning Rate: 0.010751270385683779
  Sigma Multiplier: 0.9907053389230194
  Initialization Multiplier: 0.6442527985131975
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.20it/s, loss=-0.000213, elapsed time=0.06, total time=11.6]
[I 2025-06-07 13:18:30,405] Trial 211 finished with value: -0.00021288913249006502 and parameters: {'learning_rate': 0.010751270385683779, 'sigma_multiplier': 0.9907053389230194, 'num_layers': 2, 'initialization_multiplier': 0.6442527985131975}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 211 final loss: -0.00021289
Trial 212:
  Learning Rate: 0.01394617340085623
  Sigma Multiplier: 0.9503166793714445
  Initialization Multiplier: 0.5043476434804055
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.44it/s, loss=-0.000293, elapsed time=0.09, total time=11.5]
[I 2025-06-07 13:18:41,921] Trial 212 finished with value: -0.0002928384411379973 and parameters: {'learning_rate': 0.01394617340085623, 'sigma_multiplier': 0.9503166793714445, 'num_layers': 2, 'initialization_multiplier': 0.5043476434804055}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 212 final loss: -0.00029284
Trial 213:
  Learning Rate: 0.010104897416904105
  Sigma Multiplier: 1.0627232875453234
  Initialization Multiplier: 0.42710034128929286
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.23it/s, loss=-0.000287, elapsed time=0.08, total time=10.9]
[I 2025-06-07 13:18:52,868] Trial 213 finished with value: -0.0002870356027815798 and parameters: {'learning_rate': 0.010104897416904105, 'sigma_multiplier': 1.0627232875453234, 'num_layers': 2, 'initialization_multiplier': 0.42710034128929286}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 213 final loss: -0.00028704
Trial 214:
  Learning Rate: 0.007812963849720021
  Sigma Multiplier: 0.90654161871217
  Initialization Multiplier: 1.7985914541015084
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.82it/s, loss=-0.000027, elapsed time=0.08, total time=12]  
[I 2025-06-07 13:19:04,904] Trial 214 finished with value: -2.733418022073179e-05 and parameters: {'learning_rate': 0.007812963849720021, 'sigma_multiplier': 0.90654161871217, 'num_layers': 2, 'initialization_multiplier': 1.7985914541015084}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 214 final loss: -0.00002733
Trial 215:
  Learning Rate: 0.009772449247521083
  Sigma Multiplier: 0.9868798306329232
  Initialization Multiplier: 0.5976150064465336
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.08it/s, loss=-0.000376, elapsed time=0.07, total time=10.9]
[I 2025-06-07 13:19:15,893] Trial 215 finished with value: -0.00037553342305905713 and parameters: {'learning_rate': 0.009772449247521083, 'sigma_multiplier': 0.9868798306329232, 'num_layers': 2, 'initialization_multiplier': 0.5976150064465336}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 215 final loss: -0.00037553
Trial 216:
  Learning Rate: 0.01225409082770821
  Sigma Multiplier: 1.0156457215292087
  Initialization Multiplier: 0.5516675697286407
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.06it/s, loss=-0.000356, elapsed time=0.08, total time=10.9]
[I 2025-06-07 13:19:26,875] Trial 216 finished with value: -0.00035557505657559264 and parameters: {'learning_rate': 0.01225409082770821, 'sigma_multiplier': 1.0156457215292087, 'num_layers': 2, 'initialization_multiplier': 0.5516675697286407}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 216 final loss: -0.00035558
Trial 217:
  Learning Rate: 0.006716101331774957
  Sigma Multiplier: 1.0874095663643137
  Initialization Multiplier: 0.6735300170209555
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.50it/s, loss=-0.000374, elapsed time=0.07, total time=12.3]
[I 2025-06-07 13:19:39,218] Trial 217 finished with value: -0.00037353473112654024 and parameters: {'learning_rate': 0.006716101331774957, 'sigma_multiplier': 1.0874095663643137, 'num_layers': 2, 'initialization_multiplier': 0.6735300170209555}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 217 final loss: -0.00037353
Trial 218:
  Learning Rate: 0.005506893160409773
  Sigma Multiplier: 0.9466171790508833
  Initialization Multiplier: 0.2373984993394417
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.29it/s, loss=-0.000112, elapsed time=0.06, total time=11.6]
[I 2025-06-07 13:19:50,822] Trial 218 finished with value: -0.00011172629652882929 and parameters: {'learning_rate': 0.005506893160409773, 'sigma_multiplier': 0.9466171790508833, 'num_layers': 2, 'initialization_multiplier': 0.2373984993394417}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 218 final loss: -0.00011173
Trial 219:
  Learning Rate: 0.008913190847400318
  Sigma Multiplier: 1.1616881014734803
  Initialization Multiplier: 0.48052909636928604
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.16it/s, loss=-0.000378, elapsed time=0.08, total time=10.9]
[I 2025-06-07 13:20:01,769] Trial 219 finished with value: -0.00037838688960205145 and parameters: {'learning_rate': 0.008913190847400318, 'sigma_multiplier': 1.1616881014734803, 'num_layers': 2, 'initialization_multiplier': 0.48052909636928604}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 219 final loss: -0.00037839
Trial 220:
  Learning Rate: 0.007284557458866054
  Sigma Multiplier: 1.0354519354197165
  Initialization Multiplier: 0.7448095397872533
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.90it/s, loss=-0.000168, elapsed time=0.09, total time=11.1]
[I 2025-06-07 13:20:12,890] Trial 220 finished with value: -0.0001680204690573719 and parameters: {'learning_rate': 0.007284557458866054, 'sigma_multiplier': 1.0354519354197165, 'num_layers': 2, 'initialization_multiplier': 0.7448095397872533}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 220 final loss: -0.00016802
Trial 221:
  Learning Rate: 0.005759636654486016
  Sigma Multiplier: 1.1227801994879762
  Initialization Multiplier: 0.6101947776740495
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.48it/s, loss=-0.000366, elapsed time=0.06, total time=10.6]
[I 2025-06-07 13:20:23,602] Trial 221 finished with value: -0.00036601321459305957 and parameters: {'learning_rate': 0.005759636654486016, 'sigma_multiplier': 1.1227801994879762, 'num_layers': 2, 'initialization_multiplier': 0.6101947776740495}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 221 final loss: -0.00036601
Trial 222:
  Learning Rate: 0.005965173172629259
  Sigma Multiplier: 1.0767034453287452
  Initialization Multiplier: 0.6269730564129321
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.97it/s, loss=-0.000416, elapsed time=0.08, total time=11.1]
[I 2025-06-07 13:20:34,725] Trial 222 finished with value: -0.0004159762689535751 and parameters: {'learning_rate': 0.005965173172629259, 'sigma_multiplier': 1.0767034453287452, 'num_layers': 2, 'initialization_multiplier': 0.6269730564129321}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 222 final loss: -0.00041598
Trial 223:
  Learning Rate: 0.00488094097880744
  Sigma Multiplier: 1.087775712448107
  Initialization Multiplier: 0.6452937890096035
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.50it/s, loss=-0.000344, elapsed time=0.09, total time=10.6]
[I 2025-06-07 13:20:45,383] Trial 223 finished with value: -0.00034380482842155246 and parameters: {'learning_rate': 0.00488094097880744, 'sigma_multiplier': 1.087775712448107, 'num_layers': 2, 'initialization_multiplier': 0.6452937890096035}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 223 final loss: -0.00034380
Trial 224:
  Learning Rate: 0.0036618364181504946
  Sigma Multiplier: 1.0585278643898808
  Initialization Multiplier: 0.5186551970555112
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.27it/s, loss=-0.000341, elapsed time=0.06, total time=10.8]
[I 2025-06-07 13:20:56,260] Trial 224 finished with value: -0.0003408068374121803 and parameters: {'learning_rate': 0.0036618364181504946, 'sigma_multiplier': 1.0585278643898808, 'num_layers': 2, 'initialization_multiplier': 0.5186551970555112}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 224 final loss: -0.00034081
Trial 225:
  Learning Rate: 0.006390061164351219
  Sigma Multiplier: 1.0005409945296515
  Initialization Multiplier: 0.5662782953305112
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.61it/s, loss=-0.000388, elapsed time=0.07, total time=11.3]
[I 2025-06-07 13:21:07,582] Trial 225 finished with value: -0.0003883680789697001 and parameters: {'learning_rate': 0.006390061164351219, 'sigma_multiplier': 1.0005409945296515, 'num_layers': 2, 'initialization_multiplier': 0.5662782953305112}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 225 final loss: -0.00038837
Trial 226:
  Learning Rate: 3.878946483012222e-05
  Sigma Multiplier: 1.1905359088849243
  Initialization Multiplier: 0.6938601473402894
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.62it/s, loss=0.087551, elapsed time=0.06, total time=10.5]
[I 2025-06-07 13:21:18,134] Trial 226 finished with value: 0.08755121520992501 and parameters: {'learning_rate': 3.878946483012222e-05, 'sigma_multiplier': 1.1905359088849243, 'num_layers': 2, 'initialization_multiplier': 0.6938601473402894}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 226 final loss: 0.08755122
Trial 227:
  Learning Rate: 0.011129102535710735
  Sigma Multiplier: 1.130072395458976
  Initialization Multiplier: 0.6313089060545966
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.60it/s, loss=-0.000304, elapsed time=0.05, total time=10.7]
[I 2025-06-07 13:21:28,855] Trial 227 finished with value: -0.0003041932940233681 and parameters: {'learning_rate': 0.011129102535710735, 'sigma_multiplier': 1.130072395458976, 'num_layers': 2, 'initialization_multiplier': 0.6313089060545966}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 227 final loss: -0.00030419
Trial 228:
  Learning Rate: 0.007557069819615291
  Sigma Multiplier: 0.9735397012130388
  Initialization Multiplier: 0.5371262418926869
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.83it/s, loss=-0.000354, elapsed time=0.08, total time=11.1]
[I 2025-06-07 13:21:40,002] Trial 228 finished with value: -0.0003543653109957008 and parameters: {'learning_rate': 0.007557069819615291, 'sigma_multiplier': 0.9735397012130388, 'num_layers': 2, 'initialization_multiplier': 0.5371262418926869}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 228 final loss: -0.00035437
Trial 229:
  Learning Rate: 0.008744453557970957
  Sigma Multiplier: 1.0668906668068443
  Initialization Multiplier: 0.3873586169196638
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.89it/s, loss=-0.000295, elapsed time=0.07, total time=11.1]
[I 2025-06-07 13:21:51,117] Trial 229 finished with value: -0.00029514916121762516 and parameters: {'learning_rate': 0.008744453557970957, 'sigma_multiplier': 1.0668906668068443, 'num_layers': 2, 'initialization_multiplier': 0.3873586169196638}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 229 final loss: -0.00029515
Trial 230:
  Learning Rate: 0.0007870749996047186
  Sigma Multiplier: 1.2342098067840657
  Initialization Multiplier: 0.44779147988055623
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.67it/s, loss=0.001046, elapsed time=0.04, total time=10.6]
[I 2025-06-07 13:22:01,708] Trial 230 finished with value: 0.0010455628259859076 and parameters: {'learning_rate': 0.0007870749996047186, 'sigma_multiplier': 1.2342098067840657, 'num_layers': 2, 'initialization_multiplier': 0.44779147988055623}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 230 final loss: 0.00104556
Trial 231:
  Learning Rate: 0.005986049238021355
  Sigma Multiplier: 1.1053705641919385
  Initialization Multiplier: 0.6071498295522044
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.63it/s, loss=-0.000375, elapsed time=0.09, total time=10.5]
[I 2025-06-07 13:22:12,293] Trial 231 finished with value: -0.0003746464019260158 and parameters: {'learning_rate': 0.005986049238021355, 'sigma_multiplier': 1.1053705641919385, 'num_layers': 2, 'initialization_multiplier': 0.6071498295522044}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 231 final loss: -0.00037465
Trial 232:
  Learning Rate: 0.005348784232810031
  Sigma Multiplier: 1.024920544726456
  Initialization Multiplier: 0.5834954696566619
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.32it/s, loss=-0.000399, elapsed time=0.05, total time=10.8]
[I 2025-06-07 13:22:23,111] Trial 232 finished with value: -0.0003994558979765931 and parameters: {'learning_rate': 0.005348784232810031, 'sigma_multiplier': 1.024920544726456, 'num_layers': 2, 'initialization_multiplier': 0.5834954696566619}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 232 final loss: -0.00039946
Trial 233:
  Learning Rate: 0.004256107704759759
  Sigma Multiplier: 1.0259863643929723
  Initialization Multiplier: 0.5648051683758875
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.40it/s, loss=-0.000341, elapsed time=0.06, total time=10.7]
[I 2025-06-07 13:22:33,896] Trial 233 finished with value: -0.000340506072787678 and parameters: {'learning_rate': 0.004256107704759759, 'sigma_multiplier': 1.0259863643929723, 'num_layers': 2, 'initialization_multiplier': 0.5648051683758875}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 233 final loss: -0.00034051
Trial 234:
  Learning Rate: 0.005122987341041256
  Sigma Multiplier: 0.950593573354374
  Initialization Multiplier: 0.48990549137878603
  Number of Layers: 4


Training Progress: 100%|██████████| 150/150 [00:15<00:00,  9.90it/s, loss=-0.000180, elapsed time=0.12, total time=15.5]
[I 2025-06-07 13:22:49,417] Trial 234 finished with value: -0.00018024195836010884 and parameters: {'learning_rate': 0.005122987341041256, 'sigma_multiplier': 0.950593573354374, 'num_layers': 4, 'initialization_multiplier': 0.48990549137878603}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 234 final loss: -0.00018024
Trial 235:
  Learning Rate: 0.006873157650005984
  Sigma Multiplier: 1.0129778634837194
  Initialization Multiplier: 0.5906780061308203
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.84it/s, loss=-0.000454, elapsed time=0.05, total time=11.2]
[I 2025-06-07 13:23:00,639] Trial 235 finished with value: -0.00045390477909667073 and parameters: {'learning_rate': 0.006873157650005984, 'sigma_multiplier': 1.0129778634837194, 'num_layers': 2, 'initialization_multiplier': 0.5906780061308203}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 235 final loss: -0.00045390
Trial 236:
  Learning Rate: 0.00651697994728767
  Sigma Multiplier: 1.0465996198972625
  Initialization Multiplier: 0.5257236770531876
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.27it/s, loss=-0.000299, elapsed time=0.07, total time=10.8]
[I 2025-06-07 13:23:11,469] Trial 236 finished with value: -0.000299129213226956 and parameters: {'learning_rate': 0.00651697994728767, 'sigma_multiplier': 1.0465996198972625, 'num_layers': 2, 'initialization_multiplier': 0.5257236770531876}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 236 final loss: -0.00029913
Trial 237:
  Learning Rate: 0.005095704297765655
  Sigma Multiplier: 1.019142041647709
  Initialization Multiplier: 0.5797367983798287
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.99it/s, loss=-0.000323, elapsed time=0.07, total time=11]  
[I 2025-06-07 13:23:22,530] Trial 237 finished with value: -0.00032301255057094405 and parameters: {'learning_rate': 0.005095704297765655, 'sigma_multiplier': 1.019142041647709, 'num_layers': 2, 'initialization_multiplier': 0.5797367983798287}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 237 final loss: -0.00032301
Trial 238:
  Learning Rate: 0.007529491572762352
  Sigma Multiplier: 1.1506871838378898
  Initialization Multiplier: 0.47932838261864874
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.65it/s, loss=-0.000349, elapsed time=0.05, total time=10.6]
[I 2025-06-07 13:23:33,123] Trial 238 finished with value: -0.00034865623782749644 and parameters: {'learning_rate': 0.007529491572762352, 'sigma_multiplier': 1.1506871838378898, 'num_layers': 2, 'initialization_multiplier': 0.47932838261864874}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 238 final loss: -0.00034866
Trial 239:
  Learning Rate: 0.003189724377238389
  Sigma Multiplier: 1.0753734044195282
  Initialization Multiplier: 0.5409869878097376
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.30it/s, loss=-0.000333, elapsed time=0.08, total time=10.7]
[I 2025-06-07 13:23:43,905] Trial 239 finished with value: -0.0003333460644663744 and parameters: {'learning_rate': 0.003189724377238389, 'sigma_multiplier': 1.0753734044195282, 'num_layers': 2, 'initialization_multiplier': 0.5409869878097376}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 239 final loss: -0.00033335
Trial 240:
  Learning Rate: 0.005777990608041618
  Sigma Multiplier: 0.9995331829771734
  Initialization Multiplier: 0.5852970388101252
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.93it/s, loss=-0.000397, elapsed time=0.07, total time=11.1]
[I 2025-06-07 13:23:55,030] Trial 240 finished with value: -0.00039724166568028124 and parameters: {'learning_rate': 0.005777990608041618, 'sigma_multiplier': 0.9995331829771734, 'num_layers': 2, 'initialization_multiplier': 0.5852970388101252}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 240 final loss: -0.00039724
Trial 241:
  Learning Rate: 0.0058217521968834665
  Sigma Multiplier: 1.0142481747022651
  Initialization Multiplier: 0.6137793893910904
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.63it/s, loss=-0.000383, elapsed time=0.06, total time=11.3]
[I 2025-06-07 13:24:06,366] Trial 241 finished with value: -0.00038265231202042706 and parameters: {'learning_rate': 0.0058217521968834665, 'sigma_multiplier': 1.0142481747022651, 'num_layers': 2, 'initialization_multiplier': 0.6137793893910904}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 241 final loss: -0.00038265
Trial 242:
  Learning Rate: 0.006737410101083146
  Sigma Multiplier: 0.9791871007641735
  Initialization Multiplier: 0.5878082358110831
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.54it/s, loss=-0.000418, elapsed time=0.06, total time=11.3]
[I 2025-06-07 13:24:17,749] Trial 242 finished with value: -0.000418100105615412 and parameters: {'learning_rate': 0.006737410101083146, 'sigma_multiplier': 0.9791871007641735, 'num_layers': 2, 'initialization_multiplier': 0.5878082358110831}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 242 final loss: -0.00041810
Trial 243:
  Learning Rate: 0.007035835655807569
  Sigma Multiplier: 0.9847343790281882
  Initialization Multiplier: 0.559262418849808
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.24it/s, loss=-0.000399, elapsed time=0.07, total time=10.9]
[I 2025-06-07 13:24:28,688] Trial 243 finished with value: -0.0003991283913807845 and parameters: {'learning_rate': 0.007035835655807569, 'sigma_multiplier': 0.9847343790281882, 'num_layers': 2, 'initialization_multiplier': 0.559262418849808}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 243 final loss: -0.00039913
Trial 244:
  Learning Rate: 0.0065794599669950484
  Sigma Multiplier: 0.910534413361954
  Initialization Multiplier: 0.5800003033641263
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.13it/s, loss=-0.000381, elapsed time=0.06, total time=10.9]
[I 2025-06-07 13:24:39,670] Trial 244 finished with value: -0.00038068454275010316 and parameters: {'learning_rate': 0.0065794599669950484, 'sigma_multiplier': 0.910534413361954, 'num_layers': 2, 'initialization_multiplier': 0.5800003033641263}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 244 final loss: -0.00038068
Trial 245:
  Learning Rate: 0.007641561190867256
  Sigma Multiplier: 0.9803756807474611
  Initialization Multiplier: 0.5760341801254858
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.99it/s, loss=-0.000372, elapsed time=0.05, total time=11.1]
[I 2025-06-07 13:24:50,851] Trial 245 finished with value: -0.0003715268544496445 and parameters: {'learning_rate': 0.007641561190867256, 'sigma_multiplier': 0.9803756807474611, 'num_layers': 2, 'initialization_multiplier': 0.5760341801254858}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 245 final loss: -0.00037153
Trial 246:
  Learning Rate: 0.006660243633033271
  Sigma Multiplier: 0.9396286809284574
  Initialization Multiplier: 1.2150659340792045
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.74it/s, loss=0.000444, elapsed time=0.05, total time=11.2] 
[I 2025-06-07 13:25:02,094] Trial 246 finished with value: 0.0004435035949501502 and parameters: {'learning_rate': 0.006660243633033271, 'sigma_multiplier': 0.9396286809284574, 'num_layers': 2, 'initialization_multiplier': 1.2150659340792045}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 246 final loss: 0.00044350
Trial 247:
  Learning Rate: 0.008423976674902287
  Sigma Multiplier: 0.9876375960234558
  Initialization Multiplier: 0.5413194661821406
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.92it/s, loss=-0.000280, elapsed time=0.06, total time=11.2]
[I 2025-06-07 13:25:13,296] Trial 247 finished with value: -0.0002797478572675277 and parameters: {'learning_rate': 0.008423976674902287, 'sigma_multiplier': 0.9876375960234558, 'num_layers': 2, 'initialization_multiplier': 0.5413194661821406}. Best is trial 144 with value: -0.0004832216293692365.


Training has not converged after 150 steps
Trial 247 final loss: -0.00027975
Trial 248:
  Learning Rate: 0.006115521401940773
  Sigma Multiplier: 1.0372657813112909
  Initialization Multiplier: 0.6303388864180629
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.43it/s, loss=-0.000488, elapsed time=0.05, total time=10.7]
[I 2025-06-07 13:25:24,059] Trial 248 finished with value: -0.00048807768206494604 and parameters: {'learning_rate': 0.006115521401940773, 'sigma_multiplier': 1.0372657813112909, 'num_layers': 2, 'initialization_multiplier': 0.6303388864180629}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 248 final loss: -0.00048808
Trial 249:
  Learning Rate: 0.005669119829837811
  Sigma Multiplier: 1.0399595149107632
  Initialization Multiplier: 0.6246829541028147
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.61it/s, loss=-0.000277, elapsed time=0.05, total time=10.6]
[I 2025-06-07 13:25:34,653] Trial 249 finished with value: -0.0002773894884644938 and parameters: {'learning_rate': 0.005669119829837811, 'sigma_multiplier': 1.0399595149107632, 'num_layers': 2, 'initialization_multiplier': 0.6246829541028147}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 249 final loss: -0.00027739
Trial 250:
  Learning Rate: 0.007767705644383626
  Sigma Multiplier: 0.9607634297131635
  Initialization Multiplier: 0.5058929897175716
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.92it/s, loss=-0.000329, elapsed time=0.05, total time=11]  
[I 2025-06-07 13:25:45,714] Trial 250 finished with value: -0.0003286362539927799 and parameters: {'learning_rate': 0.007767705644383626, 'sigma_multiplier': 0.9607634297131635, 'num_layers': 2, 'initialization_multiplier': 0.5058929897175716}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 250 final loss: -0.00032864
Trial 251:
  Learning Rate: 0.004564876069118525
  Sigma Multiplier: 0.8837442597286553
  Initialization Multiplier: 0.6124394191832504
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.75it/s, loss=-0.000290, elapsed time=0.07, total time=11.2]
[I 2025-06-07 13:25:57,003] Trial 251 finished with value: -0.00029001275263896014 and parameters: {'learning_rate': 0.004564876069118525, 'sigma_multiplier': 0.8837442597286553, 'num_layers': 2, 'initialization_multiplier': 0.6124394191832504}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 251 final loss: -0.00029001
Trial 252:
  Learning Rate: 0.005721130293227835
  Sigma Multiplier: 1.0028802960230374
  Initialization Multiplier: 0.5625438760186096
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.82it/s, loss=-0.000389, elapsed time=0.07, total time=11.1]
[I 2025-06-07 13:26:08,185] Trial 252 finished with value: -0.0003886095663439726 and parameters: {'learning_rate': 0.005721130293227835, 'sigma_multiplier': 1.0028802960230374, 'num_layers': 2, 'initialization_multiplier': 0.5625438760186096}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 252 final loss: -0.00038861
Trial 253:
  Learning Rate: 0.008462646349191985
  Sigma Multiplier: 1.0613766205175914
  Initialization Multiplier: 0.5132901066851221
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.34it/s, loss=-0.000351, elapsed time=0.05, total time=10.7]
[I 2025-06-07 13:26:18,942] Trial 253 finished with value: -0.0003510977216997988 and parameters: {'learning_rate': 0.008462646349191985, 'sigma_multiplier': 1.0613766205175914, 'num_layers': 2, 'initialization_multiplier': 0.5132901066851221}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 253 final loss: -0.00035110
Trial 254:
  Learning Rate: 0.0070183727478094985
  Sigma Multiplier: 0.9256855799836837
  Initialization Multiplier: 0.6618419649871392
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.54it/s, loss=-0.000253, elapsed time=0.06, total time=11.4]
[I 2025-06-07 13:26:30,366] Trial 254 finished with value: -0.0002532434599608375 and parameters: {'learning_rate': 0.0070183727478094985, 'sigma_multiplier': 0.9256855799836837, 'num_layers': 2, 'initialization_multiplier': 0.6618419649871392}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 254 final loss: -0.00025324
Trial 255:
  Learning Rate: 0.00427874702167369
  Sigma Multiplier: 0.9841030632353156
  Initialization Multiplier: 0.5906512941786298
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.02it/s, loss=-0.000435, elapsed time=0.08, total time=11]  
[I 2025-06-07 13:26:41,428] Trial 255 finished with value: -0.00043542575235684216 and parameters: {'learning_rate': 0.00427874702167369, 'sigma_multiplier': 0.9841030632353156, 'num_layers': 2, 'initialization_multiplier': 0.5906512941786298}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 255 final loss: -0.00043543
Trial 256:
  Learning Rate: 0.0037240818106825738
  Sigma Multiplier: 0.9773307943739642
  Initialization Multiplier: 0.5918904898331159
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.96it/s, loss=-0.000385, elapsed time=0.07, total time=11.1]
[I 2025-06-07 13:26:52,570] Trial 256 finished with value: -0.00038525618440658446 and parameters: {'learning_rate': 0.0037240818106825738, 'sigma_multiplier': 0.9773307943739642, 'num_layers': 2, 'initialization_multiplier': 0.5918904898331159}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 256 final loss: -0.00038526
Trial 257:
  Learning Rate: 0.004200251453838286
  Sigma Multiplier: 1.0321112265125987
  Initialization Multiplier: 0.5328963977556822
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.82it/s, loss=-0.000328, elapsed time=0.07, total time=11.1]
[I 2025-06-07 13:27:03,763] Trial 257 finished with value: -0.000327580343279115 and parameters: {'learning_rate': 0.004200251453838286, 'sigma_multiplier': 1.0321112265125987, 'num_layers': 2, 'initialization_multiplier': 0.5328963977556822}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 257 final loss: -0.00032758
Trial 258:
  Learning Rate: 0.0024509382390352414
  Sigma Multiplier: 0.9262303901576227
  Initialization Multiplier: 0.4753499438054207
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.88it/s, loss=-0.000311, elapsed time=0.08, total time=11.1]
[I 2025-06-07 13:27:14,929] Trial 258 finished with value: -0.0003107603051073278 and parameters: {'learning_rate': 0.0024509382390352414, 'sigma_multiplier': 0.9262303901576227, 'num_layers': 2, 'initialization_multiplier': 0.4753499438054207}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 258 final loss: -0.00031076
Trial 259:
  Learning Rate: 0.004342949145779358
  Sigma Multiplier: 0.9955944441843484
  Initialization Multiplier: 0.5781255736705601
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.65it/s, loss=-0.000278, elapsed time=0.05, total time=10.6]
[I 2025-06-07 13:27:25,535] Trial 259 finished with value: -0.00027834886209842345 and parameters: {'learning_rate': 0.004342949145779358, 'sigma_multiplier': 0.9955944441843484, 'num_layers': 2, 'initialization_multiplier': 0.5781255736705601}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 259 final loss: -0.00027835
Trial 260:
  Learning Rate: 0.005270852344448388
  Sigma Multiplier: 1.076965287454277
  Initialization Multiplier: 0.5410191937305875
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.26it/s, loss=-0.000406, elapsed time=0.06, total time=10.8]
[I 2025-06-07 13:27:36,378] Trial 260 finished with value: -0.0004056636938531413 and parameters: {'learning_rate': 0.005270852344448388, 'sigma_multiplier': 1.076965287454277, 'num_layers': 2, 'initialization_multiplier': 0.5410191937305875}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 260 final loss: -0.00040566
Trial 261:
  Learning Rate: 0.005479509468431397
  Sigma Multiplier: 1.0642528381651901
  Initialization Multiplier: 0.1706137341538726
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.67it/s, loss=-0.000197, elapsed time=0.07, total time=11.3]
[I 2025-06-07 13:27:47,773] Trial 261 finished with value: -0.00019676519405722838 and parameters: {'learning_rate': 0.005479509468431397, 'sigma_multiplier': 1.0642528381651901, 'num_layers': 2, 'initialization_multiplier': 0.1706137341538726}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 261 final loss: -0.00019677
Trial 262:
  Learning Rate: 0.009628995475985192
  Sigma Multiplier: 1.0237632078957208
  Initialization Multiplier: 0.541964047821514
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.76it/s, loss=-0.000378, elapsed time=0.05, total time=11.2]
[I 2025-06-07 13:27:59,037] Trial 262 finished with value: -0.00037825118159163785 and parameters: {'learning_rate': 0.009628995475985192, 'sigma_multiplier': 1.0237632078957208, 'num_layers': 2, 'initialization_multiplier': 0.541964047821514}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 262 final loss: -0.00037825
Trial 263:
  Learning Rate: 0.004835365054533338
  Sigma Multiplier: 0.9620657254526711
  Initialization Multiplier: 0.4513771854899191
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.12it/s, loss=-0.000185, elapsed time=0.06, total time=12.7]
[I 2025-06-07 13:28:11,739] Trial 263 finished with value: -0.0001854557760886044 and parameters: {'learning_rate': 0.004835365054533338, 'sigma_multiplier': 0.9620657254526711, 'num_layers': 2, 'initialization_multiplier': 0.4513771854899191}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 263 final loss: -0.00018546
Trial 264:
  Learning Rate: 0.015173325254846437
  Sigma Multiplier: 1.0808639348730085
  Initialization Multiplier: 0.508233492537115
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.17it/s, loss=-0.000341, elapsed time=0.05, total time=10.9]
[I 2025-06-07 13:28:22,695] Trial 264 finished with value: -0.0003409178490082463 and parameters: {'learning_rate': 0.015173325254846437, 'sigma_multiplier': 1.0808639348730085, 'num_layers': 2, 'initialization_multiplier': 0.508233492537115}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 264 final loss: -0.00034092
Trial 265:
  Learning Rate: 0.003466345733719817
  Sigma Multiplier: 1.040749941887684
  Initialization Multiplier: 0.5786262368511889
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.25it/s, loss=-0.000158, elapsed time=0.09, total time=10.1]
[I 2025-06-07 13:28:32,849] Trial 265 finished with value: -0.0001575962085643238 and parameters: {'learning_rate': 0.003466345733719817, 'sigma_multiplier': 1.040749941887684, 'num_layers': 1, 'initialization_multiplier': 0.5786262368511889}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 265 final loss: -0.00015760
Trial 266:
  Learning Rate: 0.006011573965309302
  Sigma Multiplier: 1.0005887603754553
  Initialization Multiplier: 0.49197443198397883
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.42it/s, loss=-0.000304, elapsed time=0.06, total time=12.5]
[I 2025-06-07 13:28:45,454] Trial 266 finished with value: -0.00030392933786873113 and parameters: {'learning_rate': 0.006011573965309302, 'sigma_multiplier': 1.0005887603754553, 'num_layers': 2, 'initialization_multiplier': 0.49197443198397883}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 266 final loss: -0.00030393
Trial 267:
  Learning Rate: 0.007899483867633844
  Sigma Multiplier: 0.9050558561246669
  Initialization Multiplier: 0.5471660322016412
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.95it/s, loss=-0.000347, elapsed time=0.08, total time=11.8]
[I 2025-06-07 13:28:57,336] Trial 267 finished with value: -0.0003474931795089552 and parameters: {'learning_rate': 0.007899483867633844, 'sigma_multiplier': 0.9050558561246669, 'num_layers': 2, 'initialization_multiplier': 0.5471660322016412}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 267 final loss: -0.00034749
Trial 268:
  Learning Rate: 0.009095058270432606
  Sigma Multiplier: 0.13416816713254454
  Initialization Multiplier: 0.2628174748990039
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:14<00:00, 10.26it/s, loss=0.000116, elapsed time=0.08, total time=14.9] 
[I 2025-06-07 13:29:12,254] Trial 268 finished with value: 0.00011611943379210081 and parameters: {'learning_rate': 0.009095058270432606, 'sigma_multiplier': 0.13416816713254454, 'num_layers': 2, 'initialization_multiplier': 0.2628174748990039}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 268 final loss: 0.00011612
Trial 269:
  Learning Rate: 0.011729767303117316
  Sigma Multiplier: 1.0985005546471163
  Initialization Multiplier: 0.6064166029936988
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.23it/s, loss=-0.000342, elapsed time=0.06, total time=10.8]
[I 2025-06-07 13:29:23,126] Trial 269 finished with value: -0.00034199222201224014 and parameters: {'learning_rate': 0.011729767303117316, 'sigma_multiplier': 1.0985005546471163, 'num_layers': 2, 'initialization_multiplier': 0.6064166029936988}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 269 final loss: -0.00034199
Trial 270:
  Learning Rate: 0.004122197309750497
  Sigma Multiplier: 0.9848116657385433
  Initialization Multiplier: 0.4428914529228196
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.52it/s, loss=-0.000351, elapsed time=0.06, total time=11.5]
[I 2025-06-07 13:29:34,668] Trial 270 finished with value: -0.0003509210185259587 and parameters: {'learning_rate': 0.004122197309750497, 'sigma_multiplier': 0.9848116657385433, 'num_layers': 2, 'initialization_multiplier': 0.4428914529228196}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 270 final loss: -0.00035092
Trial 271:
  Learning Rate: 0.0064395671663778405
  Sigma Multiplier: 0.8577146256837176
  Initialization Multiplier: 0.5197209075693829
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.17it/s, loss=-0.000279, elapsed time=0.07, total time=11.7]
[I 2025-06-07 13:29:46,366] Trial 271 finished with value: -0.00027918755845849675 and parameters: {'learning_rate': 0.0064395671663778405, 'sigma_multiplier': 0.8577146256837176, 'num_layers': 2, 'initialization_multiplier': 0.5197209075693829}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 271 final loss: -0.00027919
Trial 272:
  Learning Rate: 0.007133460455485789
  Sigma Multiplier: 1.0497766107453925
  Initialization Multiplier: 0.645374551949813
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.70it/s, loss=-0.000409, elapsed time=0.09, total time=11.3]
[I 2025-06-07 13:29:57,676] Trial 272 finished with value: -0.00040926560622334184 and parameters: {'learning_rate': 0.007133460455485789, 'sigma_multiplier': 1.0497766107453925, 'num_layers': 2, 'initialization_multiplier': 0.645374551949813}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 272 final loss: -0.00040927
Trial 273:
  Learning Rate: 0.007535403603639828
  Sigma Multiplier: 0.32126387120095834
  Initialization Multiplier: 0.645572906932239
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:14<00:00, 10.64it/s, loss=0.000481, elapsed time=0.1, total time=14.4] 
[I 2025-06-07 13:30:12,086] Trial 273 finished with value: 0.0004810752605033364 and parameters: {'learning_rate': 0.007535403603639828, 'sigma_multiplier': 0.32126387120095834, 'num_layers': 2, 'initialization_multiplier': 0.645572906932239}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 273 final loss: 0.00048108
Trial 274:
  Learning Rate: 0.010015271230193102
  Sigma Multiplier: 1.052381800715164
  Initialization Multiplier: 1.4548165206992127
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.47it/s, loss=0.000116, elapsed time=0.07, total time=11.4] 
[I 2025-06-07 13:30:23,528] Trial 274 finished with value: 0.00011597626416880924 and parameters: {'learning_rate': 0.010015271230193102, 'sigma_multiplier': 1.052381800715164, 'num_layers': 2, 'initialization_multiplier': 1.4548165206992127}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 274 final loss: 0.00011598
Trial 275:
  Learning Rate: 0.008697940038364037
  Sigma Multiplier: 1.1005291434295474
  Initialization Multiplier: 0.6269474913888212
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.87it/s, loss=-0.000401, elapsed time=0.09, total time=11.2]
[I 2025-06-07 13:30:34,797] Trial 275 finished with value: -0.00040070291860027106 and parameters: {'learning_rate': 0.008697940038364037, 'sigma_multiplier': 1.1005291434295474, 'num_layers': 2, 'initialization_multiplier': 0.6269474913888212}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 275 final loss: -0.00040070
Trial 276:
  Learning Rate: 0.008989755151763403
  Sigma Multiplier: 1.1065682190368606
  Initialization Multiplier: 0.6729325915480694
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.66it/s, loss=-0.000330, elapsed time=0.12, total time=12.1]
[I 2025-06-07 13:30:46,980] Trial 276 finished with value: -0.00033003602046145146 and parameters: {'learning_rate': 0.008989755151763403, 'sigma_multiplier': 1.1065682190368606, 'num_layers': 2, 'initialization_multiplier': 0.6729325915480694}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 276 final loss: -0.00033004
Trial 277:
  Learning Rate: 0.012744261381741203
  Sigma Multiplier: 1.0743874333617085
  Initialization Multiplier: 0.6076254689500247
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.43it/s, loss=-0.000318, elapsed time=0.06, total time=11.4]
[I 2025-06-07 13:30:58,444] Trial 277 finished with value: -0.00031776079540315636 and parameters: {'learning_rate': 0.012744261381741203, 'sigma_multiplier': 1.0743874333617085, 'num_layers': 2, 'initialization_multiplier': 0.6076254689500247}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 277 final loss: -0.00031776
Trial 278:
  Learning Rate: 0.010391303269592802
  Sigma Multiplier: 1.1068275583877996
  Initialization Multiplier: 0.19748122865038314
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.21it/s, loss=-0.000212, elapsed time=0.07, total time=12.7]
[I 2025-06-07 13:31:11,181] Trial 278 finished with value: -0.00021245488182165302 and parameters: {'learning_rate': 0.010391303269592802, 'sigma_multiplier': 1.1068275583877996, 'num_layers': 2, 'initialization_multiplier': 0.19748122865038314}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 278 final loss: -0.00021245
Trial 279:
  Learning Rate: 0.008535205679487864
  Sigma Multiplier: 1.0346410487267539
  Initialization Multiplier: 0.64269569803745
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.85it/s, loss=-0.000347, elapsed time=0.07, total time=12]  
[I 2025-06-07 13:31:23,260] Trial 279 finished with value: -0.0003472015630057818 and parameters: {'learning_rate': 0.008535205679487864, 'sigma_multiplier': 1.0346410487267539, 'num_layers': 2, 'initialization_multiplier': 0.64269569803745}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 279 final loss: -0.00034720
Trial 280:
  Learning Rate: 0.0028550745395191307
  Sigma Multiplier: 1.073104640062073
  Initialization Multiplier: 0.6763293826610031
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.10it/s, loss=-0.000180, elapsed time=0.07, total time=11.8]
[I 2025-06-07 13:31:35,093] Trial 280 finished with value: -0.00017969264853632709 and parameters: {'learning_rate': 0.0028550745395191307, 'sigma_multiplier': 1.073104640062073, 'num_layers': 2, 'initialization_multiplier': 0.6763293826610031}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 280 final loss: -0.00017969
Trial 281:
  Learning Rate: 0.01126288340287288
  Sigma Multiplier: 1.1140763021263977
  Initialization Multiplier: 0.47359308217528395
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.48it/s, loss=-0.000323, elapsed time=0.08, total time=11.5]
[I 2025-06-07 13:31:46,625] Trial 281 finished with value: -0.0003228213371616905 and parameters: {'learning_rate': 0.01126288340287288, 'sigma_multiplier': 1.1140763021263977, 'num_layers': 2, 'initialization_multiplier': 0.47359308217528395}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 281 final loss: -0.00032282
Trial 282:
  Learning Rate: 0.01823584578446739
  Sigma Multiplier: 1.0323568285854572
  Initialization Multiplier: 0.6205749572328957
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.60it/s, loss=-0.000265, elapsed time=0.05, total time=11.3]
[I 2025-06-07 13:31:57,997] Trial 282 finished with value: -0.0002648605274040484 and parameters: {'learning_rate': 0.01823584578446739, 'sigma_multiplier': 1.0323568285854572, 'num_layers': 2, 'initialization_multiplier': 0.6205749572328957}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 282 final loss: -0.00026486
Trial 283:
  Learning Rate: 0.004979461643409642
  Sigma Multiplier: 0.9357952042882323
  Initialization Multiplier: 0.5465263350788888
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.07it/s, loss=-0.000338, elapsed time=0.06, total time=11.8]
[I 2025-06-07 13:32:09,805] Trial 283 finished with value: -0.0003375839820147154 and parameters: {'learning_rate': 0.004979461643409642, 'sigma_multiplier': 0.9357952042882323, 'num_layers': 2, 'initialization_multiplier': 0.5465263350788888}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 283 final loss: -0.00033758
Trial 284:
  Learning Rate: 0.014401310116860602
  Sigma Multiplier: 1.0550354166106366
  Initialization Multiplier: 1.0679307482188223
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 11.56it/s, loss=-0.000066, elapsed time=0.07, total time=13.3]
[I 2025-06-07 13:32:23,146] Trial 284 finished with value: -6.637424406287772e-05 and parameters: {'learning_rate': 0.014401310116860602, 'sigma_multiplier': 1.0550354166106366, 'num_layers': 2, 'initialization_multiplier': 1.0679307482188223}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 284 final loss: -0.00006637
Trial 285:
  Learning Rate: 0.007058333910771692
  Sigma Multiplier: 0.9564904215393485
  Initialization Multiplier: 0.5847152641729475
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.53it/s, loss=-0.000427, elapsed time=0.07, total time=11.5]
[I 2025-06-07 13:32:34,766] Trial 285 finished with value: -0.00042732670330889355 and parameters: {'learning_rate': 0.007058333910771692, 'sigma_multiplier': 0.9564904215393485, 'num_layers': 2, 'initialization_multiplier': 0.5847152641729475}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 285 final loss: -0.00042733
Trial 286:
  Learning Rate: 0.007578558394816807
  Sigma Multiplier: 0.9632788591772856
  Initialization Multiplier: 0.42179627508868056
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.61it/s, loss=-0.000421, elapsed time=0.07, total time=11.4]
[I 2025-06-07 13:32:46,188] Trial 286 finished with value: -0.0004210350023320765 and parameters: {'learning_rate': 0.007578558394816807, 'sigma_multiplier': 0.9632788591772856, 'num_layers': 2, 'initialization_multiplier': 0.42179627508868056}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 286 final loss: -0.00042104
Trial 287:
  Learning Rate: 0.007027314427428624
  Sigma Multiplier: 0.9601282226743288
  Initialization Multiplier: 0.3999773409940845
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.47it/s, loss=-0.000350, elapsed time=0.08, total time=11.4]
[I 2025-06-07 13:32:57,603] Trial 287 finished with value: -0.0003500393163850719 and parameters: {'learning_rate': 0.007027314427428624, 'sigma_multiplier': 0.9601282226743288, 'num_layers': 2, 'initialization_multiplier': 0.3999773409940845}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 287 final loss: -0.00035004
Trial 288:
  Learning Rate: 0.0074258339040369145
  Sigma Multiplier: 0.8953002785185867
  Initialization Multiplier: 0.4213969497044418
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.48it/s, loss=-0.000258, elapsed time=0.06, total time=12.4]
[I 2025-06-07 13:33:10,009] Trial 288 finished with value: -0.0002582959612474964 and parameters: {'learning_rate': 0.0074258339040369145, 'sigma_multiplier': 0.8953002785185867, 'num_layers': 2, 'initialization_multiplier': 0.4213969497044418}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 288 final loss: -0.00025830
Trial 289:
  Learning Rate: 0.006143674355023329
  Sigma Multiplier: 0.9453893784493637
  Initialization Multiplier: 0.45875106538426436
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.04it/s, loss=-0.000313, elapsed time=0.07, total time=11.8]
[I 2025-06-07 13:33:21,907] Trial 289 finished with value: -0.00031275822780035643 and parameters: {'learning_rate': 0.006143674355023329, 'sigma_multiplier': 0.9453893784493637, 'num_layers': 2, 'initialization_multiplier': 0.45875106538426436}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 289 final loss: -0.00031276
Trial 290:
  Learning Rate: 0.007951386010357994
  Sigma Multiplier: 0.9598157407569533
  Initialization Multiplier: 0.3670418780008896
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.00it/s, loss=-0.000327, elapsed time=0.06, total time=11.9]
[I 2025-06-07 13:33:33,845] Trial 290 finished with value: -0.0003267919861592918 and parameters: {'learning_rate': 0.007951386010357994, 'sigma_multiplier': 0.9598157407569533, 'num_layers': 2, 'initialization_multiplier': 0.3670418780008896}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 290 final loss: -0.00032679
Trial 291:
  Learning Rate: 0.00919390249294439
  Sigma Multiplier: 0.875587035982166
  Initialization Multiplier: 0.500292688686815
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.08it/s, loss=-0.000277, elapsed time=0.06, total time=11.7]
[I 2025-06-07 13:33:45,632] Trial 291 finished with value: -0.0002773412015374831 and parameters: {'learning_rate': 0.00919390249294439, 'sigma_multiplier': 0.875587035982166, 'num_layers': 2, 'initialization_multiplier': 0.500292688686815}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 291 final loss: -0.00027734
Trial 292:
  Learning Rate: 0.0012735977364647984
  Sigma Multiplier: 0.927816368238661
  Initialization Multiplier: 0.3301525957834854
  Number of Layers: 4


Training Progress: 100%|██████████| 150/150 [00:15<00:00,  9.53it/s, loss=0.000086, elapsed time=0.13, total time=16.1]
[I 2025-06-07 13:34:01,814] Trial 292 finished with value: 8.612956465319368e-05 and parameters: {'learning_rate': 0.0012735977364647984, 'sigma_multiplier': 0.927816368238661, 'num_layers': 4, 'initialization_multiplier': 0.3301525957834854}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 292 final loss: 0.00008613
Trial 293:
  Learning Rate: 0.0067025897588059765
  Sigma Multiplier: 0.9993070021844712
  Initialization Multiplier: 0.4349364187954886
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.40it/s, loss=-0.000359, elapsed time=0.07, total time=11.5]
[I 2025-06-07 13:34:13,353] Trial 293 finished with value: -0.0003591944693695058 and parameters: {'learning_rate': 0.0067025897588059765, 'sigma_multiplier': 0.9993070021844712, 'num_layers': 2, 'initialization_multiplier': 0.4349364187954886}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 293 final loss: -0.00035919
Trial 294:
  Learning Rate: 0.010955521458798304
  Sigma Multiplier: 1.0142549159294025
  Initialization Multiplier: 0.5154541381142672
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.57it/s, loss=-0.000371, elapsed time=0.05, total time=11.4]
[I 2025-06-07 13:34:24,760] Trial 294 finished with value: -0.0003708633981052179 and parameters: {'learning_rate': 0.010955521458798304, 'sigma_multiplier': 1.0142549159294025, 'num_layers': 2, 'initialization_multiplier': 0.5154541381142672}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 294 final loss: -0.00037086
Trial 295:
  Learning Rate: 0.007903365144512245
  Sigma Multiplier: 0.9112612033633528
  Initialization Multiplier: 0.5514366632989167
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.37it/s, loss=-0.000360, elapsed time=0.07, total time=11.5]
[I 2025-06-07 13:34:36,344] Trial 295 finished with value: -0.00036011052211775297 and parameters: {'learning_rate': 0.007903365144512245, 'sigma_multiplier': 0.9112612033633528, 'num_layers': 2, 'initialization_multiplier': 0.5514366632989167}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 295 final loss: -0.00036011
Trial 296:
  Learning Rate: 0.005810301695970961
  Sigma Multiplier: 0.9648126144015836
  Initialization Multiplier: 0.296337024129503
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.56it/s, loss=-0.000258, elapsed time=0.08, total time=11.3]
[I 2025-06-07 13:34:47,732] Trial 296 finished with value: -0.00025834131922261003 and parameters: {'learning_rate': 0.005810301695970961, 'sigma_multiplier': 0.9648126144015836, 'num_layers': 2, 'initialization_multiplier': 0.296337024129503}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 296 final loss: -0.00025834
Trial 297:
  Learning Rate: 0.00992384815027789
  Sigma Multiplier: 1.0593073642786088
  Initialization Multiplier: 0.5034441420638827
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.33it/s, loss=-0.000465, elapsed time=0.06, total time=10.8]
[I 2025-06-07 13:34:58,563] Trial 297 finished with value: -0.0004648991308180689 and parameters: {'learning_rate': 0.00992384815027789, 'sigma_multiplier': 1.0593073642786088, 'num_layers': 2, 'initialization_multiplier': 0.5034441420638827}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 297 final loss: -0.00046490
Trial 298:
  Learning Rate: 0.012264561640985794
  Sigma Multiplier: 1.0647527205143743
  Initialization Multiplier: 0.4911606932305769
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.82it/s, loss=-0.000372, elapsed time=0.05, total time=9.7] 
[I 2025-06-07 13:35:08,324] Trial 298 finished with value: -0.0003715966860574619 and parameters: {'learning_rate': 0.012264561640985794, 'sigma_multiplier': 1.0647527205143743, 'num_layers': 2, 'initialization_multiplier': 0.4911606932305769}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 298 final loss: -0.00037160
Trial 299:
  Learning Rate: 0.010209544633286742
  Sigma Multiplier: 1.0235854921836087
  Initialization Multiplier: 0.5609218878736002
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.43it/s, loss=-0.000362, elapsed time=0.1, total time=11.5] 
[I 2025-06-07 13:35:19,943] Trial 299 finished with value: -0.00036172909453005607 and parameters: {'learning_rate': 0.010209544633286742, 'sigma_multiplier': 1.0235854921836087, 'num_layers': 2, 'initialization_multiplier': 0.5609218878736002}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 299 final loss: -0.00036173
Trial 300:
  Learning Rate: 0.013397914611971784
  Sigma Multiplier: 1.0795824372108944
  Initialization Multiplier: 0.5981086213417758
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.34it/s, loss=-0.000374, elapsed time=0.06, total time=11.6]
[I 2025-06-07 13:35:31,631] Trial 300 finished with value: -0.00037364156509781123 and parameters: {'learning_rate': 0.013397914611971784, 'sigma_multiplier': 1.0795824372108944, 'num_layers': 2, 'initialization_multiplier': 0.5981086213417758}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 300 final loss: -0.00037364
Trial 301:
  Learning Rate: 0.00953416138878224
  Sigma Multiplier: 0.9869456268098655
  Initialization Multiplier: 0.5133909259375696
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.50it/s, loss=-0.000443, elapsed time=0.09, total time=11.4]
[I 2025-06-07 13:35:43,118] Trial 301 finished with value: -0.0004431342484658023 and parameters: {'learning_rate': 0.00953416138878224, 'sigma_multiplier': 0.9869456268098655, 'num_layers': 2, 'initialization_multiplier': 0.5133909259375696}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 301 final loss: -0.00044313
Trial 302:
  Learning Rate: 0.016339844485832006
  Sigma Multiplier: 1.0476200862105154
  Initialization Multiplier: 0.13199765127993196
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.55it/s, loss=-0.000238, elapsed time=0.06, total time=11.3]
[I 2025-06-07 13:35:54,544] Trial 302 finished with value: -0.0002378103180243174 and parameters: {'learning_rate': 0.016339844485832006, 'sigma_multiplier': 1.0476200862105154, 'num_layers': 2, 'initialization_multiplier': 0.13199765127993196}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 302 final loss: -0.00023781
Trial 303:
  Learning Rate: 0.00972950334847049
  Sigma Multiplier: 0.9938778409683705
  Initialization Multiplier: 0.0742267849362262
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:14<00:00, 10.13it/s, loss=-0.000136, elapsed time=0.07, total time=15.2]
[I 2025-06-07 13:36:09,828] Trial 303 finished with value: -0.0001363500317835849 and parameters: {'learning_rate': 0.00972950334847049, 'sigma_multiplier': 0.9938778409683705, 'num_layers': 2, 'initialization_multiplier': 0.0742267849362262}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 303 final loss: -0.00013635
Trial 304:
  Learning Rate: 0.010569097537611812
  Sigma Multiplier: 1.1238705362863683
  Initialization Multiplier: 0.5282161917978242
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.57it/s, loss=-0.000336, elapsed time=0.05, total time=12.5]
[I 2025-06-07 13:36:22,416] Trial 304 finished with value: -0.0003358319733781941 and parameters: {'learning_rate': 0.010569097537611812, 'sigma_multiplier': 1.1238705362863683, 'num_layers': 2, 'initialization_multiplier': 0.5282161917978242}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 304 final loss: -0.00033583
Trial 305:
  Learning Rate: 0.009024345929083586
  Sigma Multiplier: 1.0213848122381646
  Initialization Multiplier: 0.5877182761644213
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.48it/s, loss=-0.000427, elapsed time=0.08, total time=11.4]
[I 2025-06-07 13:36:33,904] Trial 305 finished with value: -0.0004266511617395637 and parameters: {'learning_rate': 0.009024345929083586, 'sigma_multiplier': 1.0213848122381646, 'num_layers': 2, 'initialization_multiplier': 0.5877182761644213}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 305 final loss: -0.00042665
Trial 306:
  Learning Rate: 0.008532133086660917
  Sigma Multiplier: 0.9921960938278469
  Initialization Multiplier: 0.6403288707653452
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.56it/s, loss=-0.000386, elapsed time=0.06, total time=11.3]
[I 2025-06-07 13:36:45,297] Trial 306 finished with value: -0.00038630184126160305 and parameters: {'learning_rate': 0.008532133086660917, 'sigma_multiplier': 0.9921960938278469, 'num_layers': 2, 'initialization_multiplier': 0.6403288707653452}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 306 final loss: -0.00038630
Trial 307:
  Learning Rate: 0.012492892934484513
  Sigma Multiplier: 1.0242066632465907
  Initialization Multiplier: 0.5999113094114077
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.49it/s, loss=-0.000398, elapsed time=0.15, total time=11.4]
[I 2025-06-07 13:36:56,749] Trial 307 finished with value: -0.0003981927758016691 and parameters: {'learning_rate': 0.012492892934484513, 'sigma_multiplier': 1.0242066632465907, 'num_layers': 2, 'initialization_multiplier': 0.5999113094114077}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 307 final loss: -0.00039819
Trial 308:
  Learning Rate: 0.009353760196839112
  Sigma Multiplier: 0.9720598999751311
  Initialization Multiplier: 0.22335497187135855
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.52it/s, loss=-0.000325, elapsed time=0.08, total time=11.4]
[I 2025-06-07 13:37:08,203] Trial 308 finished with value: -0.0003245499917600974 and parameters: {'learning_rate': 0.009353760196839112, 'sigma_multiplier': 0.9720598999751311, 'num_layers': 2, 'initialization_multiplier': 0.22335497187135855}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 308 final loss: -0.00032455
Trial 309:
  Learning Rate: 0.014252198542446292
  Sigma Multiplier: 1.8841935026188361
  Initialization Multiplier: 0.5784177822041873
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.62it/s, loss=-0.000240, elapsed time=0.05, total time=9.31]
[I 2025-06-07 13:37:17,554] Trial 309 finished with value: -0.0002404160154920432 and parameters: {'learning_rate': 0.014252198542446292, 'sigma_multiplier': 1.8841935026188361, 'num_layers': 2, 'initialization_multiplier': 0.5784177822041873}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 309 final loss: -0.00024042
Trial 310:
  Learning Rate: 0.011528460315734896
  Sigma Multiplier: 1.0289800954502422
  Initialization Multiplier: 0.680622729544399
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.23it/s, loss=-0.000254, elapsed time=0.06, total time=11.7]
[I 2025-06-07 13:37:29,274] Trial 310 finished with value: -0.00025413497108284146 and parameters: {'learning_rate': 0.011528460315734896, 'sigma_multiplier': 1.0289800954502422, 'num_layers': 2, 'initialization_multiplier': 0.680622729544399}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 310 final loss: -0.00025413
Trial 311:
  Learning Rate: 0.008122525308807887
  Sigma Multiplier: 0.9368442269672159
  Initialization Multiplier: 0.6285936135952026
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.53it/s, loss=-0.000435, elapsed time=0.08, total time=11.4]
[I 2025-06-07 13:37:40,720] Trial 311 finished with value: -0.00043491730275589576 and parameters: {'learning_rate': 0.008122525308807887, 'sigma_multiplier': 0.9368442269672159, 'num_layers': 2, 'initialization_multiplier': 0.6285936135952026}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 311 final loss: -0.00043492
Trial 312:
  Learning Rate: 0.009473183931577039
  Sigma Multiplier: 0.9120786655032728
  Initialization Multiplier: 0.5129950412645083
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.95it/s, loss=-0.000380, elapsed time=0.08, total time=11]  
[I 2025-06-07 13:37:51,817] Trial 312 finished with value: -0.0003801141729423127 and parameters: {'learning_rate': 0.009473183931577039, 'sigma_multiplier': 0.9120786655032728, 'num_layers': 2, 'initialization_multiplier': 0.5129950412645083}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 312 final loss: -0.00038011
Trial 313:
  Learning Rate: 0.008293047416018853
  Sigma Multiplier: 0.8335499143039835
  Initialization Multiplier: 0.5761798579467778
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.73it/s, loss=-0.000179, elapsed time=0.07, total time=12.1]
[I 2025-06-07 13:38:03,974] Trial 313 finished with value: -0.0001787425577664057 and parameters: {'learning_rate': 0.008293047416018853, 'sigma_multiplier': 0.8335499143039835, 'num_layers': 2, 'initialization_multiplier': 0.5761798579467778}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 313 final loss: -0.00017874
Trial 314:
  Learning Rate: 0.011164399443958539
  Sigma Multiplier: 0.9489793445173133
  Initialization Multiplier: 0.6152646585295767
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.46it/s, loss=-0.000362, elapsed time=0.07, total time=11.5]
[I 2025-06-07 13:38:15,571] Trial 314 finished with value: -0.00036192561564884384 and parameters: {'learning_rate': 0.011164399443958539, 'sigma_multiplier': 0.9489793445173133, 'num_layers': 2, 'initialization_multiplier': 0.6152646585295767}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 314 final loss: -0.00036193
Trial 315:
  Learning Rate: 0.009473246235938043
  Sigma Multiplier: 0.8748802693196355
  Initialization Multiplier: 0.4786228453457735
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.19it/s, loss=-0.000346, elapsed time=0.07, total time=11.7]
[I 2025-06-07 13:38:27,262] Trial 315 finished with value: -0.00034616961565508075 and parameters: {'learning_rate': 0.009473246235938043, 'sigma_multiplier': 0.8748802693196355, 'num_layers': 2, 'initialization_multiplier': 0.4786228453457735}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 315 final loss: -0.00034617
Trial 316:
  Learning Rate: 0.007751785919894038
  Sigma Multiplier: 0.9672919538925907
  Initialization Multiplier: 0.5501847637785893
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.93it/s, loss=-0.000483, elapsed time=0.07, total time=11.1]
[I 2025-06-07 13:38:38,371] Trial 316 finished with value: -0.00048303365917926343 and parameters: {'learning_rate': 0.007751785919894038, 'sigma_multiplier': 0.9672919538925907, 'num_layers': 2, 'initialization_multiplier': 0.5501847637785893}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 316 final loss: -0.00048303
Trial 317:
  Learning Rate: 0.00014934273201576366
  Sigma Multiplier: 0.9237302507119836
  Initialization Multiplier: 0.5483070506845704
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.46it/s, loss=0.041183, elapsed time=0.05, total time=9.4] 
[I 2025-06-07 13:38:47,804] Trial 317 finished with value: 0.04118272002745469 and parameters: {'learning_rate': 0.00014934273201576366, 'sigma_multiplier': 0.9237302507119836, 'num_layers': 1, 'initialization_multiplier': 0.5483070506845704}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 317 final loss: 0.04118272
Trial 318:
  Learning Rate: 0.007731570080051329
  Sigma Multiplier: 0.9639042606626481
  Initialization Multiplier: 0.7009685807127635
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 11.97it/s, loss=-0.000361, elapsed time=0.06, total time=12.9]
[I 2025-06-07 13:39:00,744] Trial 318 finished with value: -0.000360729930282453 and parameters: {'learning_rate': 0.007731570080051329, 'sigma_multiplier': 0.9639042606626481, 'num_layers': 2, 'initialization_multiplier': 0.7009685807127635}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 318 final loss: -0.00036073
Trial 319:
  Learning Rate: 0.007887892697799945
  Sigma Multiplier: 0.9885044497003503
  Initialization Multiplier: 0.5882318024169759
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.84it/s, loss=-0.000344, elapsed time=0.08, total time=11.2]
[I 2025-06-07 13:39:11,939] Trial 319 finished with value: -0.00034365480925688756 and parameters: {'learning_rate': 0.007887892697799945, 'sigma_multiplier': 0.9885044497003503, 'num_layers': 2, 'initialization_multiplier': 0.5882318024169759}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 319 final loss: -0.00034365
Trial 320:
  Learning Rate: 0.007140735871785507
  Sigma Multiplier: 0.8850516377126307
  Initialization Multiplier: 0.5424590869562312
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.63it/s, loss=-0.000312, elapsed time=0.05, total time=11.4]
[I 2025-06-07 13:39:23,384] Trial 320 finished with value: -0.0003122606287651312 and parameters: {'learning_rate': 0.007140735871785507, 'sigma_multiplier': 0.8850516377126307, 'num_layers': 2, 'initialization_multiplier': 0.5424590869562312}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 320 final loss: -0.00031226
Trial 321:
  Learning Rate: 0.01996719213218704
  Sigma Multiplier: 0.9534646015376281
  Initialization Multiplier: 0.6559363980088305
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.20it/s, loss=-0.000322, elapsed time=0.06, total time=11.6]
[I 2025-06-07 13:39:35,035] Trial 321 finished with value: -0.00032227274057320386 and parameters: {'learning_rate': 0.01996719213218704, 'sigma_multiplier': 0.9534646015376281, 'num_layers': 2, 'initialization_multiplier': 0.6559363980088305}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 321 final loss: -0.00032227
Trial 322:
  Learning Rate: 0.0020104078854299434
  Sigma Multiplier: 0.9210669237482356
  Initialization Multiplier: 0.5080627282616949
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.77it/s, loss=-0.000333, elapsed time=0.07, total time=11.2]
[I 2025-06-07 13:39:46,298] Trial 322 finished with value: -0.00033281643183680275 and parameters: {'learning_rate': 0.0020104078854299434, 'sigma_multiplier': 0.9210669237482356, 'num_layers': 2, 'initialization_multiplier': 0.5080627282616949}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 322 final loss: -0.00033282
Trial 323:
  Learning Rate: 0.008378552394907422
  Sigma Multiplier: 1.0000441358124117
  Initialization Multiplier: 0.5823285346662951
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.79it/s, loss=-0.000353, elapsed time=0.07, total time=11.2]
[I 2025-06-07 13:39:57,596] Trial 323 finished with value: -0.00035324470665783626 and parameters: {'learning_rate': 0.008378552394907422, 'sigma_multiplier': 1.0000441358124117, 'num_layers': 2, 'initialization_multiplier': 0.5823285346662951}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 323 final loss: -0.00035324
Trial 324:
  Learning Rate: 0.010602983499846005
  Sigma Multiplier: 0.9840739421764394
  Initialization Multiplier: 0.6297102183829841
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.82it/s, loss=-0.000484, elapsed time=0.07, total time=11.1]
[I 2025-06-07 13:40:08,781] Trial 324 finished with value: -0.0004843265999481983 and parameters: {'learning_rate': 0.010602983499846005, 'sigma_multiplier': 0.9840739421764394, 'num_layers': 2, 'initialization_multiplier': 0.6297102183829841}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 324 final loss: -0.00048433
Trial 325:
  Learning Rate: 0.012142476011153876
  Sigma Multiplier: 0.9594079678820383
  Initialization Multiplier: 0.6225234737133619
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.56it/s, loss=-0.000277, elapsed time=0.09, total time=11.4]
[I 2025-06-07 13:40:20,241] Trial 325 finished with value: -0.0002771389131656485 and parameters: {'learning_rate': 0.012142476011153876, 'sigma_multiplier': 0.9594079678820383, 'num_layers': 2, 'initialization_multiplier': 0.6225234737133619}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 325 final loss: -0.00027714
Trial 326:
  Learning Rate: 0.01043311062697599
  Sigma Multiplier: 1.0051272325233918
  Initialization Multiplier: 0.4547823064576857
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.80it/s, loss=-0.000358, elapsed time=0.06, total time=11.1]
[I 2025-06-07 13:40:31,411] Trial 326 finished with value: -0.0003576798371177507 and parameters: {'learning_rate': 0.01043311062697599, 'sigma_multiplier': 1.0051272325233918, 'num_layers': 2, 'initialization_multiplier': 0.4547823064576857}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 326 final loss: -0.00035768
Trial 327:
  Learning Rate: 0.008940319346660524
  Sigma Multiplier: 0.930564910649159
  Initialization Multiplier: 0.5153756807922253
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.36it/s, loss=-0.000277, elapsed time=0.05, total time=11.5]
[I 2025-06-07 13:40:42,985] Trial 327 finished with value: -0.00027678186623731325 and parameters: {'learning_rate': 0.008940319346660524, 'sigma_multiplier': 0.930564910649159, 'num_layers': 2, 'initialization_multiplier': 0.5153756807922253}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 327 final loss: -0.00027678
Trial 328:
  Learning Rate: 0.013219888178787779
  Sigma Multiplier: 0.9786013091540366
  Initialization Multiplier: 0.6710938179111361
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.48it/s, loss=-0.000273, elapsed time=0.06, total time=11.4]
[I 2025-06-07 13:40:54,450] Trial 328 finished with value: -0.00027273909518813947 and parameters: {'learning_rate': 0.013219888178787779, 'sigma_multiplier': 0.9786013091540366, 'num_layers': 2, 'initialization_multiplier': 0.6710938179111361}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 328 final loss: -0.00027274
Trial 329:
  Learning Rate: 0.01593263281567266
  Sigma Multiplier: 1.0160720243709371
  Initialization Multiplier: 0.5712974680803125
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.58it/s, loss=-0.000436, elapsed time=0.05, total time=11.4]
[I 2025-06-07 13:41:05,869] Trial 329 finished with value: -0.00043632862523614095 and parameters: {'learning_rate': 0.01593263281567266, 'sigma_multiplier': 1.0160720243709371, 'num_layers': 2, 'initialization_multiplier': 0.5712974680803125}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 329 final loss: -0.00043633
Trial 330:
  Learning Rate: 0.016486634116140358
  Sigma Multiplier: 0.8961374380075133
  Initialization Multiplier: 0.5566600870610087
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.04it/s, loss=-0.000359, elapsed time=0.05, total time=11.8]
[I 2025-06-07 13:41:17,733] Trial 330 finished with value: -0.00035947871520618013 and parameters: {'learning_rate': 0.016486634116140358, 'sigma_multiplier': 0.8961374380075133, 'num_layers': 2, 'initialization_multiplier': 0.5566600870610087}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 330 final loss: -0.00035948
Trial 331:
  Learning Rate: 0.014862822627143783
  Sigma Multiplier: 0.9667433612305483
  Initialization Multiplier: 0.4152177970928265
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.23it/s, loss=-0.000310, elapsed time=0.06, total time=11.6]
[I 2025-06-07 13:41:29,362] Trial 331 finished with value: -0.0003100634406238494 and parameters: {'learning_rate': 0.014862822627143783, 'sigma_multiplier': 0.9667433612305483, 'num_layers': 2, 'initialization_multiplier': 0.4152177970928265}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 331 final loss: -0.00031006
Trial 332:
  Learning Rate: 0.023870706277395043
  Sigma Multiplier: 1.0121531616680128
  Initialization Multiplier: 0.4710051158959646
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.80it/s, loss=-0.000304, elapsed time=0.06, total time=11.1]
[I 2025-06-07 13:41:40,522] Trial 332 finished with value: -0.0003038573665414141 and parameters: {'learning_rate': 0.023870706277395043, 'sigma_multiplier': 1.0121531616680128, 'num_layers': 2, 'initialization_multiplier': 0.4710051158959646}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 332 final loss: -0.00030386
Trial 333:
  Learning Rate: 0.020387307481222656
  Sigma Multiplier: 0.9295965171703289
  Initialization Multiplier: 0.5178597649028104
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.18it/s, loss=-0.000341, elapsed time=0.05, total time=11.7]
[I 2025-06-07 13:41:52,280] Trial 333 finished with value: -0.00034090628755732477 and parameters: {'learning_rate': 0.020387307481222656, 'sigma_multiplier': 0.9295965171703289, 'num_layers': 2, 'initialization_multiplier': 0.5178597649028104}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 333 final loss: -0.00034091
Trial 334:
  Learning Rate: 0.010760065598935402
  Sigma Multiplier: 1.0182279808644574
  Initialization Multiplier: 0.5647405943363557
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.48it/s, loss=-0.000434, elapsed time=0.06, total time=11.4]
[I 2025-06-07 13:42:03,707] Trial 334 finished with value: -0.00043422319266050123 and parameters: {'learning_rate': 0.010760065598935402, 'sigma_multiplier': 1.0182279808644574, 'num_layers': 2, 'initialization_multiplier': 0.5647405943363557}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 334 final loss: -0.00043422
Trial 335:
  Learning Rate: 0.01318789745981863
  Sigma Multiplier: 0.9869210771567581
  Initialization Multiplier: 0.5730180491458366
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.48it/s, loss=-0.000452, elapsed time=0.05, total time=11.5]
[I 2025-06-07 13:42:15,251] Trial 335 finished with value: -0.00045239598447024146 and parameters: {'learning_rate': 0.01318789745981863, 'sigma_multiplier': 0.9869210771567581, 'num_layers': 2, 'initialization_multiplier': 0.5730180491458366}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 335 final loss: -0.00045240
Trial 336:
  Learning Rate: 0.01751934963930545
  Sigma Multiplier: 1.0163714497145562
  Initialization Multiplier: 0.5866222462764779
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.91it/s, loss=-0.000352, elapsed time=0.07, total time=11.1]
[I 2025-06-07 13:42:26,399] Trial 336 finished with value: -0.0003522665475028198 and parameters: {'learning_rate': 0.01751934963930545, 'sigma_multiplier': 1.0163714497145562, 'num_layers': 2, 'initialization_multiplier': 0.5866222462764779}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 336 final loss: -0.00035227
Trial 337:
  Learning Rate: 0.014761680588941662
  Sigma Multiplier: 0.9800514395235606
  Initialization Multiplier: 0.6270728516359627
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.20it/s, loss=-0.000325, elapsed time=0.1, total time=11.7] 
[I 2025-06-07 13:42:38,098] Trial 337 finished with value: -0.00032488656078437275 and parameters: {'learning_rate': 0.014761680588941662, 'sigma_multiplier': 0.9800514395235606, 'num_layers': 2, 'initialization_multiplier': 0.6270728516359627}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 337 final loss: -0.00032489
Trial 338:
  Learning Rate: 0.02924130946856554
  Sigma Multiplier: 1.0298193589359104
  Initialization Multiplier: 0.5733071805663303
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.41it/s, loss=-0.000318, elapsed time=0.15, total time=11.4]
[I 2025-06-07 13:42:49,598] Trial 338 finished with value: -0.00031783909665558576 and parameters: {'learning_rate': 0.02924130946856554, 'sigma_multiplier': 1.0298193589359104, 'num_layers': 2, 'initialization_multiplier': 0.5733071805663303}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 338 final loss: -0.00031784
Trial 339:
  Learning Rate: 0.012911416411929376
  Sigma Multiplier: 0.851314065187591
  Initialization Multiplier: 0.6050338872405147
  Number of Layers: 5


Training Progress: 100%|██████████| 150/150 [00:18<00:00,  8.05it/s, loss=0.000000, elapsed time=0.14, total time=19]   
[I 2025-06-07 13:43:08,674] Trial 339 finished with value: 1.0157213419028852e-07 and parameters: {'learning_rate': 0.012911416411929376, 'sigma_multiplier': 0.851314065187591, 'num_layers': 5, 'initialization_multiplier': 0.6050338872405147}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 339 final loss: 0.00000010
Trial 340:
  Learning Rate: 0.01139641721494189
  Sigma Multiplier: 0.9804203235316894
  Initialization Multiplier: 0.6600873712287847
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.14it/s, loss=-0.000266, elapsed time=0.07, total time=11.7]
[I 2025-06-07 13:43:20,406] Trial 340 finished with value: -0.00026569474292353966 and parameters: {'learning_rate': 0.01139641721494189, 'sigma_multiplier': 0.9804203235316894, 'num_layers': 2, 'initialization_multiplier': 0.6600873712287847}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 340 final loss: -0.00026569
Trial 341:
  Learning Rate: 0.015911980728161062
  Sigma Multiplier: 1.0450574083000166
  Initialization Multiplier: 0.5588648364102206
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.84it/s, loss=-0.000364, elapsed time=0.07, total time=11.2]
[I 2025-06-07 13:43:31,654] Trial 341 finished with value: -0.00036427287559286727 and parameters: {'learning_rate': 0.015911980728161062, 'sigma_multiplier': 1.0450574083000166, 'num_layers': 2, 'initialization_multiplier': 0.5588648364102206}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 341 final loss: -0.00036427
Trial 342:
  Learning Rate: 0.011171462190843112
  Sigma Multiplier: 1.012244864282694
  Initialization Multiplier: 0.70442783177322
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.37it/s, loss=-0.000248, elapsed time=0.07, total time=11.5]
[I 2025-06-07 13:43:43,222] Trial 342 finished with value: -0.00024767933231453727 and parameters: {'learning_rate': 0.011171462190843112, 'sigma_multiplier': 1.012244864282694, 'num_layers': 2, 'initialization_multiplier': 0.70442783177322}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 342 final loss: -0.00024768
Trial 343:
  Learning Rate: 0.013999490729445776
  Sigma Multiplier: 0.9465576213480408
  Initialization Multiplier: 0.5489260703730489
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.21it/s, loss=-0.000323, elapsed time=0.06, total time=11.7]
[I 2025-06-07 13:43:54,970] Trial 343 finished with value: -0.0003229735909947625 and parameters: {'learning_rate': 0.013999490729445776, 'sigma_multiplier': 0.9465576213480408, 'num_layers': 2, 'initialization_multiplier': 0.5489260703730489}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 343 final loss: -0.00032297
Trial 344:
  Learning Rate: 0.010028902630666225
  Sigma Multiplier: 0.9793628493239185
  Initialization Multiplier: 0.6058639461356923
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.46it/s, loss=-0.000369, elapsed time=0.08, total time=11.4]
[I 2025-06-07 13:44:06,429] Trial 344 finished with value: -0.0003694137466261701 and parameters: {'learning_rate': 0.010028902630666225, 'sigma_multiplier': 0.9793628493239185, 'num_layers': 2, 'initialization_multiplier': 0.6058639461356923}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 344 final loss: -0.00036941
Trial 345:
  Learning Rate: 0.012342231660895467
  Sigma Multiplier: 0.8908148702293572
  Initialization Multiplier: 0.6544992342364971
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.77it/s, loss=-0.000304, elapsed time=0.06, total time=12.1]
[I 2025-06-07 13:44:18,548] Trial 345 finished with value: -0.0003038506040521297 and parameters: {'learning_rate': 0.012342231660895467, 'sigma_multiplier': 0.8908148702293572, 'num_layers': 2, 'initialization_multiplier': 0.6544992342364971}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 345 final loss: -0.00030385
Trial 346:
  Learning Rate: 0.010356760763413951
  Sigma Multiplier: 1.0430921216175106
  Initialization Multiplier: 0.49456909240599656
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.50it/s, loss=-0.000330, elapsed time=0.07, total time=11.4]
[I 2025-06-07 13:44:29,972] Trial 346 finished with value: -0.0003296534572332559 and parameters: {'learning_rate': 0.010356760763413951, 'sigma_multiplier': 1.0430921216175106, 'num_layers': 2, 'initialization_multiplier': 0.49456909240599656}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 346 final loss: -0.00032965
Trial 347:
  Learning Rate: 0.019130694447160174
  Sigma Multiplier: 1.0006231344391354
  Initialization Multiplier: 0.5682486672294912
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.73it/s, loss=-0.000399, elapsed time=0.08, total time=11.2]
[I 2025-06-07 13:44:41,256] Trial 347 finished with value: -0.00039888321752278537 and parameters: {'learning_rate': 0.019130694447160174, 'sigma_multiplier': 1.0006231344391354, 'num_layers': 2, 'initialization_multiplier': 0.5682486672294912}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 347 final loss: -0.00039888
Trial 348:
  Learning Rate: 0.013101245188614998
  Sigma Multiplier: 0.9322928444801382
  Initialization Multiplier: 0.6150948812126049
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.18it/s, loss=-0.000348, elapsed time=0.09, total time=11.7]
[I 2025-06-07 13:44:52,981] Trial 348 finished with value: -0.000347742638076908 and parameters: {'learning_rate': 0.013101245188614998, 'sigma_multiplier': 0.9322928444801382, 'num_layers': 2, 'initialization_multiplier': 0.6150948812126049}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 348 final loss: -0.00034774
Trial 349:
  Learning Rate: 0.009399421024465136
  Sigma Multiplier: 1.055072048871443
  Initialization Multiplier: 0.5337209091800076
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.70it/s, loss=-0.000267, elapsed time=0.05, total time=11.2]
[I 2025-06-07 13:45:04,263] Trial 349 finished with value: -0.00026668104536281366 and parameters: {'learning_rate': 0.009399421024465136, 'sigma_multiplier': 1.055072048871443, 'num_layers': 2, 'initialization_multiplier': 0.5337209091800076}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 349 final loss: -0.00026668
Trial 350:
  Learning Rate: 0.011545374471622608
  Sigma Multiplier: 0.9679410872232712
  Initialization Multiplier: 0.5793139756500018
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.23it/s, loss=-0.000312, elapsed time=0.08, total time=11.7]
[I 2025-06-07 13:45:15,972] Trial 350 finished with value: -0.00031224482372308246 and parameters: {'learning_rate': 0.011545374471622608, 'sigma_multiplier': 0.9679410872232712, 'num_layers': 2, 'initialization_multiplier': 0.5793139756500018}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 350 final loss: -0.00031224
Trial 351:
  Learning Rate: 0.009044093848257545
  Sigma Multiplier: 1.0080560052664902
  Initialization Multiplier: 0.026673229657901865
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.12it/s, loss=-0.000141, elapsed time=0.09, total time=11.8]
[I 2025-06-07 13:45:27,788] Trial 351 finished with value: -0.00014093869946300397 and parameters: {'learning_rate': 0.009044093848257545, 'sigma_multiplier': 1.0080560052664902, 'num_layers': 2, 'initialization_multiplier': 0.026673229657901865}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 351 final loss: -0.00014094
Trial 352:
  Learning Rate: 0.014791950614073488
  Sigma Multiplier: 0.9085068184227252
  Initialization Multiplier: 0.4906796894935286
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.17it/s, loss=-0.000374, elapsed time=0.07, total time=11.7]
[I 2025-06-07 13:45:39,522] Trial 352 finished with value: -0.0003736325410429506 and parameters: {'learning_rate': 0.014791950614073488, 'sigma_multiplier': 0.9085068184227252, 'num_layers': 2, 'initialization_multiplier': 0.4906796894935286}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 352 final loss: -0.00037363
Trial 353:
  Learning Rate: 0.010511900479324201
  Sigma Multiplier: 1.0471834593797984
  Initialization Multiplier: 0.6366884630709785
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.71it/s, loss=-0.000288, elapsed time=0.07, total time=11.2]
[I 2025-06-07 13:45:50,805] Trial 353 finished with value: -0.00028832737795004616 and parameters: {'learning_rate': 0.010511900479324201, 'sigma_multiplier': 1.0471834593797984, 'num_layers': 2, 'initialization_multiplier': 0.6366884630709785}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 353 final loss: -0.00028833
Trial 354:
  Learning Rate: 0.008319375969357065
  Sigma Multiplier: 0.9533396875938325
  Initialization Multiplier: 0.527167187255273
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.19it/s, loss=-0.000333, elapsed time=0.08, total time=11.7]
[I 2025-06-07 13:46:02,527] Trial 354 finished with value: -0.00033272245796299037 and parameters: {'learning_rate': 0.008319375969357065, 'sigma_multiplier': 0.9533396875938325, 'num_layers': 2, 'initialization_multiplier': 0.527167187255273}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 354 final loss: -0.00033272
Trial 355:
  Learning Rate: 0.01736595963095857
  Sigma Multiplier: 1.014178056570057
  Initialization Multiplier: 0.6965876639147877
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.19it/s, loss=-0.000218, elapsed time=0.07, total time=11.8]
[I 2025-06-07 13:46:14,333] Trial 355 finished with value: -0.00021756757458057913 and parameters: {'learning_rate': 0.01736595963095857, 'sigma_multiplier': 1.014178056570057, 'num_layers': 2, 'initialization_multiplier': 0.6965876639147877}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 355 final loss: -0.00021757
Trial 356:
  Learning Rate: 0.01275676664015567
  Sigma Multiplier: 1.0796920071210088
  Initialization Multiplier: 0.5932100499762977
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.77it/s, loss=-0.000353, elapsed time=0.07, total time=11.2]
[I 2025-06-07 13:46:25,541] Trial 356 finished with value: -0.0003529173170798632 and parameters: {'learning_rate': 0.01275676664015567, 'sigma_multiplier': 1.0796920071210088, 'num_layers': 2, 'initialization_multiplier': 0.5932100499762977}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 356 final loss: -0.00035292
Trial 357:
  Learning Rate: 0.009473630068323396
  Sigma Multiplier: 0.9883455703878964
  Initialization Multiplier: 0.4474477327653162
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.36it/s, loss=-0.000330, elapsed time=0.06, total time=11.6]
[I 2025-06-07 13:46:37,226] Trial 357 finished with value: -0.00032971016458203153 and parameters: {'learning_rate': 0.009473630068323396, 'sigma_multiplier': 0.9883455703878964, 'num_layers': 2, 'initialization_multiplier': 0.4474477327653162}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 357 final loss: -0.00032971
Trial 358:
  Learning Rate: 0.00786559674275969
  Sigma Multiplier: 0.9400068780553908
  Initialization Multiplier: 0.5479112834747755
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.03it/s, loss=-0.000335, elapsed time=0.06, total time=11.8]
[I 2025-06-07 13:46:49,102] Trial 358 finished with value: -0.0003348013642479969 and parameters: {'learning_rate': 0.00786559674275969, 'sigma_multiplier': 0.9400068780553908, 'num_layers': 2, 'initialization_multiplier': 0.5479112834747755}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 358 final loss: -0.00033480
Trial 359:
  Learning Rate: 0.010612732360648561
  Sigma Multiplier: 1.036443575499903
  Initialization Multiplier: 0.6324532146006682
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.63it/s, loss=-0.000349, elapsed time=0.05, total time=11.3]
[I 2025-06-07 13:47:00,415] Trial 359 finished with value: -0.00034912321333094965 and parameters: {'learning_rate': 0.010612732360648561, 'sigma_multiplier': 1.036443575499903, 'num_layers': 2, 'initialization_multiplier': 0.6324532146006682}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 359 final loss: -0.00034912
Trial 360:
  Learning Rate: 0.007146637219748936
  Sigma Multiplier: 0.9789024103632082
  Initialization Multiplier: 0.49670538556005384
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.01it/s, loss=-0.000293, elapsed time=0.05, total time=11.9]
[I 2025-06-07 13:47:12,397] Trial 360 finished with value: -0.000292926026851976 and parameters: {'learning_rate': 0.007146637219748936, 'sigma_multiplier': 0.9789024103632082, 'num_layers': 2, 'initialization_multiplier': 0.49670538556005384}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 360 final loss: -0.00029293
Trial 361:
  Learning Rate: 0.008882317496736096
  Sigma Multiplier: 0.9028056265228428
  Initialization Multiplier: 0.5721467243870901
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 11.76it/s, loss=-0.000387, elapsed time=0.09, total time=13.1]
[I 2025-06-07 13:47:25,540] Trial 361 finished with value: -0.000387333341801373 and parameters: {'learning_rate': 0.008882317496736096, 'sigma_multiplier': 0.9028056265228428, 'num_layers': 2, 'initialization_multiplier': 0.5721467243870901}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 361 final loss: -0.00038733
Trial 362:
  Learning Rate: 0.012229405525802968
  Sigma Multiplier: 0.8547924948985265
  Initialization Multiplier: 0.5318188233395732
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 11.67it/s, loss=-0.000354, elapsed time=0.09, total time=13.2]
[I 2025-06-07 13:47:38,766] Trial 362 finished with value: -0.0003537348416046371 and parameters: {'learning_rate': 0.012229405525802968, 'sigma_multiplier': 0.8547924948985265, 'num_layers': 2, 'initialization_multiplier': 0.5318188233395732}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 362 final loss: -0.00035373
Trial 363:
  Learning Rate: 0.016076104840365167
  Sigma Multiplier: 1.1023285355941013
  Initialization Multiplier: 0.6649527993861526
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.84it/s, loss=-0.000309, elapsed time=0.06, total time=12]  
[I 2025-06-07 13:47:50,811] Trial 363 finished with value: -0.00030936646929496925 and parameters: {'learning_rate': 0.016076104840365167, 'sigma_multiplier': 1.1023285355941013, 'num_layers': 2, 'initialization_multiplier': 0.6649527993861526}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 363 final loss: -0.00030937
Trial 364:
  Learning Rate: 0.000426637457723328
  Sigma Multiplier: 1.052798007803345
  Initialization Multiplier: 0.3930168157734303
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 11.90it/s, loss=0.002729, elapsed time=0.06, total time=13.3]
[I 2025-06-07 13:48:04,141] Trial 364 finished with value: 0.002729260333057728 and parameters: {'learning_rate': 0.000426637457723328, 'sigma_multiplier': 1.052798007803345, 'num_layers': 2, 'initialization_multiplier': 0.3930168157734303}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 364 final loss: 0.00272926
Trial 365:
  Learning Rate: 0.007624720283315056
  Sigma Multiplier: 1.0080114901940795
  Initialization Multiplier: 0.6085118004514811
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.12it/s, loss=-0.000349, elapsed time=0.06, total time=11.8]
[I 2025-06-07 13:48:16,028] Trial 365 finished with value: -0.00034884592736282496 and parameters: {'learning_rate': 0.007624720283315056, 'sigma_multiplier': 1.0080114901940795, 'num_layers': 2, 'initialization_multiplier': 0.6085118004514811}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 365 final loss: -0.00034885
Trial 366:
  Learning Rate: 0.010245831856861539
  Sigma Multiplier: 0.9456247202137482
  Initialization Multiplier: 0.4577563845214711
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.19it/s, loss=-0.000439, elapsed time=0.06, total time=12.6]
[I 2025-06-07 13:48:28,668] Trial 366 finished with value: -0.0004394372720584836 and parameters: {'learning_rate': 0.010245831856861539, 'sigma_multiplier': 0.9456247202137482, 'num_layers': 2, 'initialization_multiplier': 0.4577563845214711}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 366 final loss: -0.00043944
Trial 367:
  Learning Rate: 0.011396846003117776
  Sigma Multiplier: 0.9283400576663557
  Initialization Multiplier: 0.44301835523357397
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:14<00:00, 10.37it/s, loss=-0.000379, elapsed time=0.09, total time=14.9]
[I 2025-06-07 13:48:43,598] Trial 367 finished with value: -0.00037860407769752 and parameters: {'learning_rate': 0.011396846003117776, 'sigma_multiplier': 0.9283400576663557, 'num_layers': 2, 'initialization_multiplier': 0.44301835523357397}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 367 final loss: -0.00037860
Trial 368:
  Learning Rate: 0.08189348048220246
  Sigma Multiplier: 0.5819342400910708
  Initialization Multiplier: 0.3703959687074022
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.81it/s, loss=0.000026, elapsed time=0.05, total time=12.1]
[I 2025-06-07 13:48:55,783] Trial 368 finished with value: 2.6279898592461928e-05 and parameters: {'learning_rate': 0.08189348048220246, 'sigma_multiplier': 0.5819342400910708, 'num_layers': 1, 'initialization_multiplier': 0.3703959687074022}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 368 final loss: 0.00002628
Trial 369:
  Learning Rate: 0.013595419338834543
  Sigma Multiplier: 0.8707688416380139
  Initialization Multiplier: 0.4347975220264313
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.84it/s, loss=-0.000453, elapsed time=0.06, total time=12]  
[I 2025-06-07 13:49:07,792] Trial 369 finished with value: -0.0004533337065936015 and parameters: {'learning_rate': 0.013595419338834543, 'sigma_multiplier': 0.8707688416380139, 'num_layers': 2, 'initialization_multiplier': 0.4347975220264313}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 369 final loss: -0.00045333
Trial 370:
  Learning Rate: 0.01021258984210763
  Sigma Multiplier: 0.7950040321144848
  Initialization Multiplier: 0.34620477113953807
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 11.64it/s, loss=-0.000200, elapsed time=0.05, total time=13.2]
[I 2025-06-07 13:49:21,076] Trial 370 finished with value: -0.00020049318279737489 and parameters: {'learning_rate': 0.01021258984210763, 'sigma_multiplier': 0.7950040321144848, 'num_layers': 2, 'initialization_multiplier': 0.34620477113953807}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 370 final loss: -0.00020049
Trial 371:
  Learning Rate: 0.013353118508103175
  Sigma Multiplier: 0.8751433794896558
  Initialization Multiplier: 0.41426514003550047
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.77it/s, loss=-0.000366, elapsed time=0.07, total time=12]  
[I 2025-06-07 13:49:33,161] Trial 371 finished with value: -0.00036603255454578513 and parameters: {'learning_rate': 0.013353118508103175, 'sigma_multiplier': 0.8751433794896558, 'num_layers': 2, 'initialization_multiplier': 0.41426514003550047}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 371 final loss: -0.00036603
Trial 372:
  Learning Rate: 0.008983112109570623
  Sigma Multiplier: 0.8037881497406307
  Initialization Multiplier: 0.45544697223150654
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:13<00:00, 11.07it/s, loss=-0.000280, elapsed time=0.05, total time=13.8]
[I 2025-06-07 13:49:47,048] Trial 372 finished with value: -0.0002797455119928876 and parameters: {'learning_rate': 0.008983112109570623, 'sigma_multiplier': 0.8037881497406307, 'num_layers': 2, 'initialization_multiplier': 0.45544697223150654}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 372 final loss: -0.00027975
Trial 373:
  Learning Rate: 0.010983256565369068
  Sigma Multiplier: 0.8834277709440082
  Initialization Multiplier: 0.40445936836067276
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.48it/s, loss=-0.000323, elapsed time=0.06, total time=12.5]
[I 2025-06-07 13:49:59,636] Trial 373 finished with value: -0.00032284369670185876 and parameters: {'learning_rate': 0.010983256565369068, 'sigma_multiplier': 0.8834277709440082, 'num_layers': 2, 'initialization_multiplier': 0.40445936836067276}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 373 final loss: -0.00032284
Trial 374:
  Learning Rate: 0.009232179328353242
  Sigma Multiplier: 0.8360796261628496
  Initialization Multiplier: 0.47801259685889225
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.83it/s, loss=-0.000367, elapsed time=0.06, total time=12]  
[I 2025-06-07 13:50:11,697] Trial 374 finished with value: -0.0003672887073379421 and parameters: {'learning_rate': 0.009232179328353242, 'sigma_multiplier': 0.8360796261628496, 'num_layers': 2, 'initialization_multiplier': 0.47801259685889225}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 374 final loss: -0.00036729
Trial 375:
  Learning Rate: 0.013322588831890664
  Sigma Multiplier: 0.942130156814893
  Initialization Multiplier: 0.43238439194271644
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.21it/s, loss=-0.000346, elapsed time=0.05, total time=11.7]
[I 2025-06-07 13:50:23,393] Trial 375 finished with value: -0.0003460255734562812 and parameters: {'learning_rate': 0.013322588831890664, 'sigma_multiplier': 0.942130156814893, 'num_layers': 2, 'initialization_multiplier': 0.43238439194271644}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 375 final loss: -0.00034603
Trial 376:
  Learning Rate: 0.008349342319093381
  Sigma Multiplier: 0.9100882284383266
  Initialization Multiplier: 0.47976740872892265
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.37it/s, loss=-0.000288, elapsed time=0.08, total time=11.6]
[I 2025-06-07 13:50:35,012] Trial 376 finished with value: -0.0002879798267020585 and parameters: {'learning_rate': 0.008349342319093381, 'sigma_multiplier': 0.9100882284383266, 'num_layers': 2, 'initialization_multiplier': 0.47976740872892265}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 376 final loss: -0.00028798
Trial 377:
  Learning Rate: 0.010823788510080427
  Sigma Multiplier: 0.9436274795228711
  Initialization Multiplier: 0.37396460546304555
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.27it/s, loss=-0.000313, elapsed time=0.08, total time=11.6]
[I 2025-06-07 13:50:46,701] Trial 377 finished with value: -0.0003134748954959076 and parameters: {'learning_rate': 0.010823788510080427, 'sigma_multiplier': 0.9436274795228711, 'num_layers': 2, 'initialization_multiplier': 0.37396460546304555}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 377 final loss: -0.00031347
Trial 378:
  Learning Rate: 0.014634376025346188
  Sigma Multiplier: 0.9727122878686566
  Initialization Multiplier: 1.8974998073256084
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.60it/s, loss=-0.000146, elapsed time=0.08, total time=12.2]
[I 2025-06-07 13:50:58,927] Trial 378 finished with value: -0.00014638174235467232 and parameters: {'learning_rate': 0.014634376025346188, 'sigma_multiplier': 0.9727122878686566, 'num_layers': 2, 'initialization_multiplier': 1.8974998073256084}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 378 final loss: -0.00014638
Trial 379:
  Learning Rate: 0.007852601068627767
  Sigma Multiplier: 1.025332684108601
  Initialization Multiplier: 0.5160246532081127
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.69it/s, loss=-0.000351, elapsed time=0.06, total time=11.2]
[I 2025-06-07 13:51:10,208] Trial 379 finished with value: -0.00035088263454337434 and parameters: {'learning_rate': 0.007852601068627767, 'sigma_multiplier': 1.025332684108601, 'num_layers': 2, 'initialization_multiplier': 0.5160246532081127}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 379 final loss: -0.00035088
Trial 380:
  Learning Rate: 0.009686687975021696
  Sigma Multiplier: 0.7409507624574058
  Initialization Multiplier: 0.4224275505646244
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.43it/s, loss=-0.000104, elapsed time=0.08, total time=12.4]
[I 2025-06-07 13:51:22,654] Trial 380 finished with value: -0.00010363169122604206 and parameters: {'learning_rate': 0.009686687975021696, 'sigma_multiplier': 0.7409507624574058, 'num_layers': 2, 'initialization_multiplier': 0.4224275505646244}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 380 final loss: -0.00010363
Trial 381:
  Learning Rate: 0.006713441196332713
  Sigma Multiplier: 0.8700860274821941
  Initialization Multiplier: 0.4693291959839524
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:13<00:00, 11.09it/s, loss=-0.000338, elapsed time=0.08, total time=13.8]
[I 2025-06-07 13:51:36,509] Trial 381 finished with value: -0.00033814118580620465 and parameters: {'learning_rate': 0.006713441196332713, 'sigma_multiplier': 0.8700860274821941, 'num_layers': 2, 'initialization_multiplier': 0.4693291959839524}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 381 final loss: -0.00033814
Trial 382:
  Learning Rate: 0.012124965332493295
  Sigma Multiplier: 0.989551430172681
  Initialization Multiplier: 0.5117301154908607
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.47it/s, loss=-0.000461, elapsed time=0.06, total time=11.5]
[I 2025-06-07 13:51:48,085] Trial 382 finished with value: -0.00046105848822476024 and parameters: {'learning_rate': 0.012124965332493295, 'sigma_multiplier': 0.989551430172681, 'num_layers': 2, 'initialization_multiplier': 0.5117301154908607}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 382 final loss: -0.00046106
Trial 383:
  Learning Rate: 0.012157391314678934
  Sigma Multiplier: 0.9953711895615042
  Initialization Multiplier: 0.5353810755209452
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.46it/s, loss=-0.000419, elapsed time=0.05, total time=11.5]
[I 2025-06-07 13:51:59,590] Trial 383 finished with value: -0.0004193054633400065 and parameters: {'learning_rate': 0.012157391314678934, 'sigma_multiplier': 0.9953711895615042, 'num_layers': 2, 'initialization_multiplier': 0.5353810755209452}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 383 final loss: -0.00041931
Trial 384:
  Learning Rate: 0.017344228131383074
  Sigma Multiplier: 1.0681815648470345
  Initialization Multiplier: 0.5167204976496144
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.63it/s, loss=-0.000333, elapsed time=0.06, total time=11.3]
[I 2025-06-07 13:52:10,898] Trial 384 finished with value: -0.0003332161570543687 and parameters: {'learning_rate': 0.017344228131383074, 'sigma_multiplier': 1.0681815648470345, 'num_layers': 2, 'initialization_multiplier': 0.5167204976496144}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 384 final loss: -0.00033322
Trial 385:
  Learning Rate: 0.013774267477655085
  Sigma Multiplier: 1.0236975630429475
  Initialization Multiplier: 0.56061062869944
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.05it/s, loss=-0.000425, elapsed time=0.06, total time=11.8]
[I 2025-06-07 13:52:22,807] Trial 385 finished with value: -0.00042461348594818285 and parameters: {'learning_rate': 0.013774267477655085, 'sigma_multiplier': 1.0236975630429475, 'num_layers': 2, 'initialization_multiplier': 0.56061062869944}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 385 final loss: -0.00042461
Trial 386:
  Learning Rate: 0.023048095807898822
  Sigma Multiplier: 1.032608795380142
  Initialization Multiplier: 0.5674822144462479
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.02it/s, loss=-0.000345, elapsed time=0.06, total time=11.8]
[I 2025-06-07 13:52:34,691] Trial 386 finished with value: -0.0003452695596912014 and parameters: {'learning_rate': 0.023048095807898822, 'sigma_multiplier': 1.032608795380142, 'num_layers': 2, 'initialization_multiplier': 0.5674822144462479}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 386 final loss: -0.00034527
Trial 387:
  Learning Rate: 0.013566117708313032
  Sigma Multiplier: 1.0684447225708165
  Initialization Multiplier: 0.5593456932040628
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.82it/s, loss=-0.000311, elapsed time=0.07, total time=11.2]
[I 2025-06-07 13:52:45,963] Trial 387 finished with value: -0.0003107456587468006 and parameters: {'learning_rate': 0.013566117708313032, 'sigma_multiplier': 1.0684447225708165, 'num_layers': 2, 'initialization_multiplier': 0.5593456932040628}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 387 final loss: -0.00031075
Trial 388:
  Learning Rate: 0.01606768493970034
  Sigma Multiplier: 1.0132290704179299
  Initialization Multiplier: 0.5010704745233482
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.25it/s, loss=-0.000295, elapsed time=0.05, total time=11.6]
[I 2025-06-07 13:52:57,647] Trial 388 finished with value: -0.0002945787980804746 and parameters: {'learning_rate': 0.01606768493970034, 'sigma_multiplier': 1.0132290704179299, 'num_layers': 2, 'initialization_multiplier': 0.5010704745233482}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 388 final loss: -0.00029458
Trial 389:
  Learning Rate: 0.014873853235434735
  Sigma Multiplier: 1.0407385727897676
  Initialization Multiplier: 0.6151803318173684
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.58it/s, loss=-0.000392, elapsed time=0.06, total time=11.4]
[I 2025-06-07 13:53:09,061] Trial 389 finished with value: -0.0003915657793271685 and parameters: {'learning_rate': 0.014873853235434735, 'sigma_multiplier': 1.0407385727897676, 'num_layers': 2, 'initialization_multiplier': 0.6151803318173684}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 389 final loss: -0.00039157
Trial 390:
  Learning Rate: 0.011816937512862008
  Sigma Multiplier: 1.0843649982234398
  Initialization Multiplier: 0.5491414340325708
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.91it/s, loss=-0.000406, elapsed time=0.06, total time=11]  
[I 2025-06-07 13:53:20,128] Trial 390 finished with value: -0.0004057972372883846 and parameters: {'learning_rate': 0.011816937512862008, 'sigma_multiplier': 1.0843649982234398, 'num_layers': 2, 'initialization_multiplier': 0.5491414340325708}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 390 final loss: -0.00040580
Trial 391:
  Learning Rate: 0.017969160820845632
  Sigma Multiplier: 0.9924705219675228
  Initialization Multiplier: 0.5947429966583879
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.76it/s, loss=-0.000419, elapsed time=0.06, total time=11.2]
[I 2025-06-07 13:53:31,347] Trial 391 finished with value: -0.0004186548185493868 and parameters: {'learning_rate': 0.017969160820845632, 'sigma_multiplier': 0.9924705219675228, 'num_layers': 2, 'initialization_multiplier': 0.5947429966583879}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 391 final loss: -0.00041865
Trial 392:
  Learning Rate: 0.013527226073781488
  Sigma Multiplier: 1.0157654806584886
  Initialization Multiplier: 0.643672587517269
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.55it/s, loss=-0.000433, elapsed time=0.07, total time=11.4]
[I 2025-06-07 13:53:42,816] Trial 392 finished with value: -0.0004328650560029893 and parameters: {'learning_rate': 0.013527226073781488, 'sigma_multiplier': 1.0157654806584886, 'num_layers': 2, 'initialization_multiplier': 0.643672587517269}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 392 final loss: -0.00043287
Trial 393:
  Learning Rate: 0.014126560850859211
  Sigma Multiplier: 0.972223814991584
  Initialization Multiplier: 0.6505373906710866
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.80it/s, loss=-0.000303, elapsed time=0.06, total time=11.2]
[I 2025-06-07 13:53:54,013] Trial 393 finished with value: -0.00030341872404349136 and parameters: {'learning_rate': 0.014126560850859211, 'sigma_multiplier': 0.972223814991584, 'num_layers': 2, 'initialization_multiplier': 0.6505373906710866}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 393 final loss: -0.00030342
Trial 394:
  Learning Rate: 0.011931059818243462
  Sigma Multiplier: 1.016493403525563
  Initialization Multiplier: 0.7220065151888662
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.27it/s, loss=-0.000178, elapsed time=0.07, total time=11.7]
[I 2025-06-07 13:54:05,798] Trial 394 finished with value: -0.00017775632429954598 and parameters: {'learning_rate': 0.011931059818243462, 'sigma_multiplier': 1.016493403525563, 'num_layers': 2, 'initialization_multiplier': 0.7220065151888662}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 394 final loss: -0.00017776
Trial 395:
  Learning Rate: 0.02010557023192252
  Sigma Multiplier: 0.9211867273593387
  Initialization Multiplier: 0.6814399583894639
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.79it/s, loss=-0.000299, elapsed time=0.06, total time=12.1]
[I 2025-06-07 13:54:17,934] Trial 395 finished with value: -0.00029914212468170085 and parameters: {'learning_rate': 0.02010557023192252, 'sigma_multiplier': 0.9211867273593387, 'num_layers': 2, 'initialization_multiplier': 0.6814399583894639}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 395 final loss: -0.00029914
Trial 396:
  Learning Rate: 0.014738908250768663
  Sigma Multiplier: 0.9604708845145886
  Initialization Multiplier: 0.5129435376286496
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.23it/s, loss=-0.000363, elapsed time=0.07, total time=11.6]
[I 2025-06-07 13:54:29,620] Trial 396 finished with value: -0.0003626191291258848 and parameters: {'learning_rate': 0.014738908250768663, 'sigma_multiplier': 0.9604708845145886, 'num_layers': 2, 'initialization_multiplier': 0.5129435376286496}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 396 final loss: -0.00036262
Trial 397:
  Learning Rate: 0.011532201219062094
  Sigma Multiplier: 1.009405282333681
  Initialization Multiplier: 0.5528777522520807
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.51it/s, loss=-0.000355, elapsed time=0.07, total time=11.4]
[I 2025-06-07 13:54:41,056] Trial 397 finished with value: -0.0003548732367303542 and parameters: {'learning_rate': 0.011532201219062094, 'sigma_multiplier': 1.009405282333681, 'num_layers': 2, 'initialization_multiplier': 0.5528777522520807}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 397 final loss: -0.00035487
Trial 398:
  Learning Rate: 0.010457766303668094
  Sigma Multiplier: 1.0470453748026685
  Initialization Multiplier: 0.48282560249284057
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.81it/s, loss=-0.000333, elapsed time=0.07, total time=11.1]
[I 2025-06-07 13:54:52,241] Trial 398 finished with value: -0.00033253636607557526 and parameters: {'learning_rate': 0.010457766303668094, 'sigma_multiplier': 1.0470453748026685, 'num_layers': 2, 'initialization_multiplier': 0.48282560249284057}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 398 final loss: -0.00033254
Trial 399:
  Learning Rate: 0.013626369285840899
  Sigma Multiplier: 0.9870167877935084
  Initialization Multiplier: 0.6263468620692086
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.33it/s, loss=-0.000450, elapsed time=0.05, total time=11.6]
[I 2025-06-07 13:55:03,878] Trial 399 finished with value: -0.00044953263487591957 and parameters: {'learning_rate': 0.013626369285840899, 'sigma_multiplier': 0.9870167877935084, 'num_layers': 2, 'initialization_multiplier': 0.6263468620692086}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 399 final loss: -0.00044953
Trial 400:
  Learning Rate: 0.016543438166205522
  Sigma Multiplier: 0.9083099106902198
  Initialization Multiplier: 0.6524989316144486
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.75it/s, loss=-0.000209, elapsed time=0.09, total time=12.1]
[I 2025-06-07 13:55:16,042] Trial 400 finished with value: -0.00020901838222892722 and parameters: {'learning_rate': 0.016543438166205522, 'sigma_multiplier': 0.9083099106902198, 'num_layers': 2, 'initialization_multiplier': 0.6524989316144486}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 400 final loss: -0.00020902
Trial 401:
  Learning Rate: 0.012677520716116705
  Sigma Multiplier: 0.9695850367109096
  Initialization Multiplier: 0.6273615515983602
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.83it/s, loss=-0.000406, elapsed time=0.05, total time=11.2]
[I 2025-06-07 13:55:27,294] Trial 401 finished with value: -0.0004064134438965087 and parameters: {'learning_rate': 0.012677520716116705, 'sigma_multiplier': 0.9695850367109096, 'num_layers': 2, 'initialization_multiplier': 0.6273615515983602}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 401 final loss: -0.00040641
Trial 402:
  Learning Rate: 0.010236962631844159
  Sigma Multiplier: 0.9479981242119498
  Initialization Multiplier: 0.7075851529789869
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.05it/s, loss=-0.000319, elapsed time=0.08, total time=11.8]
[I 2025-06-07 13:55:39,138] Trial 402 finished with value: -0.0003189816640962181 and parameters: {'learning_rate': 0.010236962631844159, 'sigma_multiplier': 0.9479981242119498, 'num_layers': 2, 'initialization_multiplier': 0.7075851529789869}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 402 final loss: -0.00031898
Trial 403:
  Learning Rate: 0.009899015745434822
  Sigma Multiplier: 0.9910193815331841
  Initialization Multiplier: 0.5904280974196418
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.21it/s, loss=-0.000378, elapsed time=0.06, total time=11.7]
[I 2025-06-07 13:55:50,884] Trial 403 finished with value: -0.00037808901085174 and parameters: {'learning_rate': 0.009899015745434822, 'sigma_multiplier': 0.9910193815331841, 'num_layers': 2, 'initialization_multiplier': 0.5904280974196418}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 403 final loss: -0.00037809
Trial 404:
  Learning Rate: 0.018749183562091997
  Sigma Multiplier: 0.9218068749557908
  Initialization Multiplier: 0.7649752911584908
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.66it/s, loss=-0.000093, elapsed time=0.08, total time=12.2]
[I 2025-06-07 13:56:03,090] Trial 404 finished with value: -9.330640590782535e-05 and parameters: {'learning_rate': 0.018749183562091997, 'sigma_multiplier': 0.9218068749557908, 'num_layers': 2, 'initialization_multiplier': 0.7649752911584908}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 404 final loss: -0.00009331
Trial 405:
  Learning Rate: 0.011829416356346925
  Sigma Multiplier: 0.9909592019861571
  Initialization Multiplier: 0.6762692089581779
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.45it/s, loss=-0.000339, elapsed time=0.07, total time=11.4]
[I 2025-06-07 13:56:14,542] Trial 405 finished with value: -0.000339007759370086 and parameters: {'learning_rate': 0.011829416356346925, 'sigma_multiplier': 0.9909592019861571, 'num_layers': 2, 'initialization_multiplier': 0.6762692089581779}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 405 final loss: -0.00033901
Trial 406:
  Learning Rate: 0.01321390548861559
  Sigma Multiplier: 1.0390835420108548
  Initialization Multiplier: 0.6214960941156659
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:13<00:00, 11.06it/s, loss=-0.000205, elapsed time=0.08, total time=13.9]
[I 2025-06-07 13:56:28,474] Trial 406 finished with value: -0.00020486393777374086 and parameters: {'learning_rate': 0.01321390548861559, 'sigma_multiplier': 1.0390835420108548, 'num_layers': 3, 'initialization_multiplier': 0.6214960941156659}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 406 final loss: -0.00020486
Trial 407:
  Learning Rate: 0.009106009406448
  Sigma Multiplier: 0.9582595333507197
  Initialization Multiplier: 0.520674892391154
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.31it/s, loss=-0.000316, elapsed time=0.07, total time=11.5]
[I 2025-06-07 13:56:40,062] Trial 407 finished with value: -0.00031600362831886984 and parameters: {'learning_rate': 0.009106009406448, 'sigma_multiplier': 0.9582595333507197, 'num_layers': 2, 'initialization_multiplier': 0.520674892391154}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 407 final loss: -0.00031600
Trial 408:
  Learning Rate: 0.01024931339310504
  Sigma Multiplier: 0.864896840211394
  Initialization Multiplier: 0.5788756406650497
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.27it/s, loss=-0.000311, elapsed time=0.1, total time=11.7] 
[I 2025-06-07 13:56:51,773] Trial 408 finished with value: -0.00031138586795091596 and parameters: {'learning_rate': 0.01024931339310504, 'sigma_multiplier': 0.864896840211394, 'num_layers': 2, 'initialization_multiplier': 0.5788756406650497}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 408 final loss: -0.00031139
Trial 409:
  Learning Rate: 0.015577908784627665
  Sigma Multiplier: 1.0643687393165178
  Initialization Multiplier: 0.6306253337911055
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.40it/s, loss=-0.000409, elapsed time=0.06, total time=10.7]
[I 2025-06-07 13:57:02,528] Trial 409 finished with value: -0.000408935033625929 and parameters: {'learning_rate': 0.015577908784627665, 'sigma_multiplier': 1.0643687393165178, 'num_layers': 2, 'initialization_multiplier': 0.6306253337911055}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 409 final loss: -0.00040894
Trial 410:
  Learning Rate: 0.011339216863054112
  Sigma Multiplier: 0.9980463074452464
  Initialization Multiplier: 0.4777201202450139
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.67it/s, loss=-0.000274, elapsed time=0.07, total time=11.2]
[I 2025-06-07 13:57:13,795] Trial 410 finished with value: -0.0002739373772632759 and parameters: {'learning_rate': 0.011339216863054112, 'sigma_multiplier': 0.9980463074452464, 'num_layers': 2, 'initialization_multiplier': 0.4777201202450139}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 410 final loss: -0.00027394
Trial 411:
  Learning Rate: 0.008824643352397113
  Sigma Multiplier: 0.9375127205952815
  Initialization Multiplier: 0.5442740030815376
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.46it/s, loss=-0.000332, elapsed time=0.06, total time=11.5]
[I 2025-06-07 13:57:25,336] Trial 411 finished with value: -0.00033235180696624376 and parameters: {'learning_rate': 0.008824643352397113, 'sigma_multiplier': 0.9375127205952815, 'num_layers': 2, 'initialization_multiplier': 0.5442740030815376}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 411 final loss: -0.00033235
Trial 412:
  Learning Rate: 0.013356889908595217
  Sigma Multiplier: 1.1067399563853664
  Initialization Multiplier: 0.5970723885148368
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.69it/s, loss=-0.000331, elapsed time=0.06, total time=11.3]
[I 2025-06-07 13:57:36,732] Trial 412 finished with value: -0.00033142977193884715 and parameters: {'learning_rate': 0.013356889908595217, 'sigma_multiplier': 1.1067399563853664, 'num_layers': 2, 'initialization_multiplier': 0.5970723885148368}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 412 final loss: -0.00033143
Trial 413:
  Learning Rate: 0.011267189604325678
  Sigma Multiplier: 0.9080891008387567
  Initialization Multiplier: 0.6657770908305733
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.79it/s, loss=-0.000178, elapsed time=0.08, total time=12.1]
[I 2025-06-07 13:57:48,870] Trial 413 finished with value: -0.00017783940223409152 and parameters: {'learning_rate': 0.011267189604325678, 'sigma_multiplier': 0.9080891008387567, 'num_layers': 2, 'initialization_multiplier': 0.6657770908305733}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 413 final loss: -0.00017784
Trial 414:
  Learning Rate: 0.020417819842008748
  Sigma Multiplier: 0.46485966796259537
  Initialization Multiplier: 0.5141842244586061
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:13<00:00, 10.83it/s, loss=0.000376, elapsed time=0.15, total time=14.1]
[I 2025-06-07 13:58:03,067] Trial 414 finished with value: 0.0003756003663420788 and parameters: {'learning_rate': 0.020417819842008748, 'sigma_multiplier': 0.46485966796259537, 'num_layers': 2, 'initialization_multiplier': 0.5141842244586061}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 414 final loss: 0.00037560
Trial 415:
  Learning Rate: 0.0016248170621675178
  Sigma Multiplier: 1.025391922666261
  Initialization Multiplier: 0.5628357117617759
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.58it/s, loss=-0.000032, elapsed time=0.06, total time=11.4]
[I 2025-06-07 13:58:14,491] Trial 415 finished with value: -3.152490229675406e-05 and parameters: {'learning_rate': 0.0016248170621675178, 'sigma_multiplier': 1.025391922666261, 'num_layers': 2, 'initialization_multiplier': 0.5628357117617759}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 415 final loss: -0.00003152
Trial 416:
  Learning Rate: 0.009038125975440679
  Sigma Multiplier: 1.7565101805303396
  Initialization Multiplier: 1.471358637895218
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.72it/s, loss=-0.000188, elapsed time=0.06, total time=10.5]
[I 2025-06-07 13:58:25,050] Trial 416 finished with value: -0.00018791507889117146 and parameters: {'learning_rate': 0.009038125975440679, 'sigma_multiplier': 1.7565101805303396, 'num_layers': 2, 'initialization_multiplier': 1.471358637895218}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 416 final loss: -0.00018792
Trial 417:
  Learning Rate: 0.01646749605605015
  Sigma Multiplier: 0.9723818743405845
  Initialization Multiplier: 0.45914235248486035
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.30it/s, loss=-0.000380, elapsed time=0.1, total time=11.6] 
[I 2025-06-07 13:58:36,670] Trial 417 finished with value: -0.0003799681562428705 and parameters: {'learning_rate': 0.01646749605605015, 'sigma_multiplier': 0.9723818743405845, 'num_layers': 2, 'initialization_multiplier': 0.45914235248486035}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 417 final loss: -0.00037997
Trial 418:
  Learning Rate: 0.010374882817128442
  Sigma Multiplier: 1.0657634096828847
  Initialization Multiplier: 0.5840636039931794
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.49it/s, loss=-0.000353, elapsed time=0.05, total time=11.5]
[I 2025-06-07 13:58:48,220] Trial 418 finished with value: -0.0003532305079582724 and parameters: {'learning_rate': 0.010374882817128442, 'sigma_multiplier': 1.0657634096828847, 'num_layers': 2, 'initialization_multiplier': 0.5840636039931794}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 418 final loss: -0.00035323
Trial 419:
  Learning Rate: 5.278892739073629e-05
  Sigma Multiplier: 0.8246761890516148
  Initialization Multiplier: 0.6339908103963784
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.31it/s, loss=0.095999, elapsed time=0.06, total time=12.6]
[I 2025-06-07 13:59:00,827] Trial 419 finished with value: 0.09599914298398492 and parameters: {'learning_rate': 5.278892739073629e-05, 'sigma_multiplier': 0.8246761890516148, 'num_layers': 2, 'initialization_multiplier': 0.6339908103963784}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 419 final loss: 0.09599914
Trial 420:
  Learning Rate: 0.01356857926363592
  Sigma Multiplier: 1.0034675763987706
  Initialization Multiplier: 1.9746507947490524
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.12it/s, loss=-0.000047, elapsed time=0.06, total time=11.7]
[I 2025-06-07 13:59:12,610] Trial 420 finished with value: -4.681282998531291e-05 and parameters: {'learning_rate': 0.01356857926363592, 'sigma_multiplier': 1.0034675763987706, 'num_layers': 2, 'initialization_multiplier': 1.9746507947490524}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 420 final loss: -0.00004681
Trial 421:
  Learning Rate: 0.008329399925550677
  Sigma Multiplier: 0.9478382087493603
  Initialization Multiplier: 0.7007911160389813
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.10it/s, loss=-0.000271, elapsed time=0.06, total time=11.9]
[I 2025-06-07 13:59:24,529] Trial 421 finished with value: -0.0002711028547024553 and parameters: {'learning_rate': 0.008329399925550677, 'sigma_multiplier': 0.9478382087493603, 'num_layers': 2, 'initialization_multiplier': 0.7007911160389813}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 421 final loss: -0.00027110
Trial 422:
  Learning Rate: 0.0097517038534069
  Sigma Multiplier: 1.041245046851373
  Initialization Multiplier: 0.529703379592389
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.91it/s, loss=-0.000391, elapsed time=0.06, total time=11.9]
[I 2025-06-07 13:59:36,533] Trial 422 finished with value: -0.0003908594651854549 and parameters: {'learning_rate': 0.0097517038534069, 'sigma_multiplier': 1.041245046851373, 'num_layers': 2, 'initialization_multiplier': 0.529703379592389}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 422 final loss: -0.00039086
Trial 423:
  Learning Rate: 0.01184056165882762
  Sigma Multiplier: 0.8924443396672987
  Initialization Multiplier: 0.5047295068734352
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.75it/s, loss=-0.000315, elapsed time=0.06, total time=12.1]
[I 2025-06-07 13:59:48,680] Trial 423 finished with value: -0.00031512161515623614 and parameters: {'learning_rate': 0.01184056165882762, 'sigma_multiplier': 0.8924443396672987, 'num_layers': 2, 'initialization_multiplier': 0.5047295068734352}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 423 final loss: -0.00031512
Trial 424:
  Learning Rate: 0.014672861505758767
  Sigma Multiplier: 0.9972690311787696
  Initialization Multiplier: 0.6112982363802385
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.11it/s, loss=-0.000312, elapsed time=0.09, total time=11.7]
[I 2025-06-07 14:00:00,457] Trial 424 finished with value: -0.00031163610792430964 and parameters: {'learning_rate': 0.014672861505758767, 'sigma_multiplier': 0.9972690311787696, 'num_layers': 2, 'initialization_multiplier': 0.6112982363802385}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 424 final loss: -0.00031164
Trial 425:
  Learning Rate: 0.00849409133538556
  Sigma Multiplier: 1.0835454268969946
  Initialization Multiplier: 0.5550717376621254
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.61it/s, loss=-0.000376, elapsed time=0.04, total time=9.34]
[I 2025-06-07 14:00:09,838] Trial 425 finished with value: -0.00037593280480041435 and parameters: {'learning_rate': 0.00849409133538556, 'sigma_multiplier': 1.0835454268969946, 'num_layers': 1, 'initialization_multiplier': 0.5550717376621254}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 425 final loss: -0.00037593
Trial 426:
  Learning Rate: 0.025781325688460883
  Sigma Multiplier: 0.964878912503448
  Initialization Multiplier: 0.466892534023612
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.72it/s, loss=-0.000237, elapsed time=0.08, total time=12.1]
[I 2025-06-07 14:00:22,034] Trial 426 finished with value: -0.00023658288306308996 and parameters: {'learning_rate': 0.025781325688460883, 'sigma_multiplier': 0.964878912503448, 'num_layers': 2, 'initialization_multiplier': 0.466892534023612}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 426 final loss: -0.00023658
Trial 427:
  Learning Rate: 0.012046086170968175
  Sigma Multiplier: 1.0312691921383956
  Initialization Multiplier: 0.6578352898360017
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.27it/s, loss=-0.000379, elapsed time=0.09, total time=11.7]
[I 2025-06-07 14:00:33,728] Trial 427 finished with value: -0.0003790839745284193 and parameters: {'learning_rate': 0.012046086170968175, 'sigma_multiplier': 1.0312691921383956, 'num_layers': 2, 'initialization_multiplier': 0.6578352898360017}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 427 final loss: -0.00037908
Trial 428:
  Learning Rate: 0.0076283705479538305
  Sigma Multiplier: 1.1339501701503976
  Initialization Multiplier: 0.6032913686677991
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.28it/s, loss=-0.000385, elapsed time=0.06, total time=10.8]
[I 2025-06-07 14:00:44,625] Trial 428 finished with value: -0.00038528260407467654 and parameters: {'learning_rate': 0.0076283705479538305, 'sigma_multiplier': 1.1339501701503976, 'num_layers': 2, 'initialization_multiplier': 0.6032913686677991}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 428 final loss: -0.00038528
Trial 429:
  Learning Rate: 0.009989152199868269
  Sigma Multiplier: 0.9264925324263806
  Initialization Multiplier: 0.535399477217125
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.16it/s, loss=-0.000388, elapsed time=0.07, total time=11.7]
[I 2025-06-07 14:00:56,385] Trial 429 finished with value: -0.00038799545869294173 and parameters: {'learning_rate': 0.009989152199868269, 'sigma_multiplier': 0.9264925324263806, 'num_layers': 2, 'initialization_multiplier': 0.535399477217125}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 429 final loss: -0.00038800
Trial 430:
  Learning Rate: 0.015546837484028964
  Sigma Multiplier: 0.9980602019143109
  Initialization Multiplier: 0.7372303820801657
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.74it/s, loss=-0.000087, elapsed time=0.06, total time=12.1]
[I 2025-06-07 14:01:08,489] Trial 430 finished with value: -8.66818366709186e-05 and parameters: {'learning_rate': 0.015546837484028964, 'sigma_multiplier': 0.9980602019143109, 'num_layers': 2, 'initialization_multiplier': 0.7372303820801657}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 430 final loss: -0.00008668
Trial 431:
  Learning Rate: 0.011185085133284778
  Sigma Multiplier: 1.0503128018130088
  Initialization Multiplier: 0.49463226437880314
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.00it/s, loss=-0.000365, elapsed time=0.05, total time=11]  
[I 2025-06-07 14:01:19,536] Trial 431 finished with value: -0.00036484623129045884 and parameters: {'learning_rate': 0.011185085133284778, 'sigma_multiplier': 1.0503128018130088, 'num_layers': 2, 'initialization_multiplier': 0.49463226437880314}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 431 final loss: -0.00036485
Trial 432:
  Learning Rate: 0.008964184082814502
  Sigma Multiplier: 0.9580192141564436
  Initialization Multiplier: 0.585671580127659
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.52it/s, loss=-0.000368, elapsed time=0.07, total time=11.4]
[I 2025-06-07 14:01:31,008] Trial 432 finished with value: -0.00036824393119154643 and parameters: {'learning_rate': 0.008964184082814502, 'sigma_multiplier': 0.9580192141564436, 'num_layers': 2, 'initialization_multiplier': 0.585671580127659}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 432 final loss: -0.00036824
Trial 433:
  Learning Rate: 0.018102268064266052
  Sigma Multiplier: 1.4761237582609554
  Initialization Multiplier: 0.6476236077688493
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.24it/s, loss=-0.000215, elapsed time=0.08, total time=12.6]
[I 2025-06-07 14:01:43,626] Trial 433 finished with value: -0.00021516912612768363 and parameters: {'learning_rate': 0.018102268064266052, 'sigma_multiplier': 1.4761237582609554, 'num_layers': 3, 'initialization_multiplier': 0.6476236077688493}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 433 final loss: -0.00021517
Trial 434:
  Learning Rate: 0.007501327158707412
  Sigma Multiplier: 1.1072640483609215
  Initialization Multiplier: 0.5645783429989081
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.75it/s, loss=-0.000325, elapsed time=0.07, total time=11.2]
[I 2025-06-07 14:01:54,874] Trial 434 finished with value: -0.00032548099030327293 and parameters: {'learning_rate': 0.007501327158707412, 'sigma_multiplier': 1.1072640483609215, 'num_layers': 2, 'initialization_multiplier': 0.5645783429989081}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 434 final loss: -0.00032548
Trial 435:
  Learning Rate: 0.012702943780243134
  Sigma Multiplier: 0.8511043840309128
  Initialization Multiplier: 0.5015447758898913
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.31it/s, loss=-0.000309, elapsed time=0.06, total time=12.5]
[I 2025-06-07 14:02:07,406] Trial 435 finished with value: -0.0003090226148845086 and parameters: {'learning_rate': 0.012702943780243134, 'sigma_multiplier': 0.8511043840309128, 'num_layers': 2, 'initialization_multiplier': 0.5015447758898913}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 435 final loss: -0.00030902
Trial 436:
  Learning Rate: 0.010021130637133264
  Sigma Multiplier: 1.0092309109181008
  Initialization Multiplier: 0.44691937911278345
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.45it/s, loss=-0.000365, elapsed time=0.09, total time=11.5]
[I 2025-06-07 14:02:18,908] Trial 436 finished with value: -0.00036456123774823327 and parameters: {'learning_rate': 0.010021130637133264, 'sigma_multiplier': 1.0092309109181008, 'num_layers': 2, 'initialization_multiplier': 0.44691937911278345}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 436 final loss: -0.00036456
Trial 437:
  Learning Rate: 0.007949954599579572
  Sigma Multiplier: 0.9094233216299208
  Initialization Multiplier: 0.6243308282605055
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.98it/s, loss=-0.000362, elapsed time=0.05, total time=11.8]
[I 2025-06-07 14:02:30,795] Trial 437 finished with value: -0.00036156656132460774 and parameters: {'learning_rate': 0.007949954599579572, 'sigma_multiplier': 0.9094233216299208, 'num_layers': 2, 'initialization_multiplier': 0.6243308282605055}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 437 final loss: -0.00036157
Trial 438:
  Learning Rate: 0.013579572838438381
  Sigma Multiplier: 0.9719562927532187
  Initialization Multiplier: 1.2335916332900836
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.88it/s, loss=-0.000192, elapsed time=0.07, total time=12]  
[I 2025-06-07 14:02:42,821] Trial 438 finished with value: -0.00019155539335002266 and parameters: {'learning_rate': 0.013579572838438381, 'sigma_multiplier': 0.9719562927532187, 'num_layers': 2, 'initialization_multiplier': 1.2335916332900836}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 438 final loss: -0.00019156
Trial 439:
  Learning Rate: 0.0066139460134675915
  Sigma Multiplier: 1.0697710939633793
  Initialization Multiplier: 0.5371189723103497
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.50it/s, loss=-0.000362, elapsed time=0.07, total time=11.4]
[I 2025-06-07 14:02:54,276] Trial 439 finished with value: -0.0003615502164425759 and parameters: {'learning_rate': 0.0066139460134675915, 'sigma_multiplier': 1.0697710939633793, 'num_layers': 2, 'initialization_multiplier': 0.5371189723103497}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 439 final loss: -0.00036155
Trial 440:
  Learning Rate: 0.010705512433578184
  Sigma Multiplier: 1.023693807247729
  Initialization Multiplier: 0.6866159127633044
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.60it/s, loss=-0.000402, elapsed time=0.06, total time=12.3]
[I 2025-06-07 14:03:06,611] Trial 440 finished with value: -0.00040178205349216555 and parameters: {'learning_rate': 0.010705512433578184, 'sigma_multiplier': 1.023693807247729, 'num_layers': 2, 'initialization_multiplier': 0.6866159127633044}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 440 final loss: -0.00040178
Trial 441:
  Learning Rate: 0.021389215410853788
  Sigma Multiplier: 0.9353218033473227
  Initialization Multiplier: 0.9130648305629422
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.44it/s, loss=-0.000185, elapsed time=0.06, total time=12.4]
[I 2025-06-07 14:03:19,046] Trial 441 finished with value: -0.00018500968395823678 and parameters: {'learning_rate': 0.021389215410853788, 'sigma_multiplier': 0.9353218033473227, 'num_layers': 2, 'initialization_multiplier': 0.9130648305629422}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 441 final loss: -0.00018501
Trial 442:
  Learning Rate: 0.009351698384392359
  Sigma Multiplier: 0.9849459068279064
  Initialization Multiplier: 0.5900902903771736
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.44it/s, loss=-0.000386, elapsed time=0.08, total time=11.5]
[I 2025-06-07 14:03:30,567] Trial 442 finished with value: -0.000386126534409909 and parameters: {'learning_rate': 0.009351698384392359, 'sigma_multiplier': 0.9849459068279064, 'num_layers': 2, 'initialization_multiplier': 0.5900902903771736}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 442 final loss: -0.00038613
Trial 443:
  Learning Rate: 0.012893860999810821
  Sigma Multiplier: 1.0602160914638379
  Initialization Multiplier: 0.5565165911310748
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.55it/s, loss=-0.000307, elapsed time=0.05, total time=11.4]
[I 2025-06-07 14:03:41,975] Trial 443 finished with value: -0.0003069411248841213 and parameters: {'learning_rate': 0.012893860999810821, 'sigma_multiplier': 1.0602160914638379, 'num_layers': 2, 'initialization_multiplier': 0.5565165911310748}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 443 final loss: -0.00030694
Trial 444:
  Learning Rate: 0.01613421338265001
  Sigma Multiplier: 0.8998220406454841
  Initialization Multiplier: 0.48548145161696926
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.99it/s, loss=-0.000276, elapsed time=0.07, total time=11.9]
[I 2025-06-07 14:03:53,899] Trial 444 finished with value: -0.00027600521213225963 and parameters: {'learning_rate': 0.01613421338265001, 'sigma_multiplier': 0.8998220406454841, 'num_layers': 2, 'initialization_multiplier': 0.48548145161696926}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 444 final loss: -0.00027601
Trial 445:
  Learning Rate: 0.0025691842673659817
  Sigma Multiplier: 0.27169319455842955
  Initialization Multiplier: 0.6241053031825108
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:14<00:00, 10.04it/s, loss=0.000098, elapsed time=0.09, total time=15.2] 
[I 2025-06-07 14:04:09,197] Trial 445 finished with value: 9.808787133069787e-05 and parameters: {'learning_rate': 0.0025691842673659817, 'sigma_multiplier': 0.27169319455842955, 'num_layers': 2, 'initialization_multiplier': 0.6241053031825108}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 445 final loss: 0.00009809
Trial 446:
  Learning Rate: 0.008079094387805717
  Sigma Multiplier: 0.7678448493990808
  Initialization Multiplier: 0.5187114142630018
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.09it/s, loss=-0.000251, elapsed time=0.06, total time=12.7]
[I 2025-06-07 14:04:21,950] Trial 446 finished with value: -0.0002513741369903367 and parameters: {'learning_rate': 0.008079094387805717, 'sigma_multiplier': 0.7678448493990808, 'num_layers': 2, 'initialization_multiplier': 0.5187114142630018}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 446 final loss: -0.00025137
Trial 447:
  Learning Rate: 0.0008910757044518747
  Sigma Multiplier: 1.0229729374381866
  Initialization Multiplier: 0.5887623531930465
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.71it/s, loss=0.007102, elapsed time=0.07, total time=11.2]
[I 2025-06-07 14:04:33,235] Trial 447 finished with value: 0.007102440013395235 and parameters: {'learning_rate': 0.0008910757044518747, 'sigma_multiplier': 1.0229729374381866, 'num_layers': 2, 'initialization_multiplier': 0.5887623531930465}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 447 final loss: 0.00710244
Trial 448:
  Learning Rate: 0.011267245033664214
  Sigma Multiplier: 0.9569709821480505
  Initialization Multiplier: 0.45914860693046794
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.40it/s, loss=-0.000329, elapsed time=0.08, total time=11.5]
[I 2025-06-07 14:04:44,801] Trial 448 finished with value: -0.0003289364868404477 and parameters: {'learning_rate': 0.011267245033664214, 'sigma_multiplier': 0.9569709821480505, 'num_layers': 2, 'initialization_multiplier': 0.45914860693046794}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 448 final loss: -0.00032894
Trial 449:
  Learning Rate: 0.006578094883532394
  Sigma Multiplier: 1.1085153935881358
  Initialization Multiplier: 0.6727298450804675
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.87it/s, loss=-0.000413, elapsed time=0.06, total time=11.2]
[I 2025-06-07 14:04:56,030] Trial 449 finished with value: -0.00041251974614338343 and parameters: {'learning_rate': 0.006578094883532394, 'sigma_multiplier': 1.1085153935881358, 'num_layers': 2, 'initialization_multiplier': 0.6727298450804675}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 449 final loss: -0.00041252
Trial 450:
  Learning Rate: 0.009319625106856766
  Sigma Multiplier: 0.9866453567256805
  Initialization Multiplier: 0.5461355746377008
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.48it/s, loss=-0.000439, elapsed time=0.06, total time=11.4]
[I 2025-06-07 14:05:07,508] Trial 450 finished with value: -0.0004391191814192027 and parameters: {'learning_rate': 0.009319625106856766, 'sigma_multiplier': 0.9866453567256805, 'num_layers': 2, 'initialization_multiplier': 0.5461355746377008}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 450 final loss: -0.00043912
Trial 451:
  Learning Rate: 0.00860473469079008
  Sigma Multiplier: 0.8830744407247145
  Initialization Multiplier: 0.5204380802928427
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:13<00:00, 10.94it/s, loss=-0.000323, elapsed time=0.14, total time=14.1]
[I 2025-06-07 14:05:21,637] Trial 451 finished with value: -0.0003225242708302908 and parameters: {'learning_rate': 0.00860473469079008, 'sigma_multiplier': 0.8830744407247145, 'num_layers': 2, 'initialization_multiplier': 0.5204380802928427}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 451 final loss: -0.00032252
Trial 452:
  Learning Rate: 0.007179988964863914
  Sigma Multiplier: 0.9854505205746404
  Initialization Multiplier: 0.43388569557597356
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.78it/s, loss=-0.000376, elapsed time=0.07, total time=12.1]
[I 2025-06-07 14:05:33,753] Trial 452 finished with value: -0.0003761339184613912 and parameters: {'learning_rate': 0.007179988964863914, 'sigma_multiplier': 0.9854505205746404, 'num_layers': 2, 'initialization_multiplier': 0.43388569557597356}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 452 final loss: -0.00037613
Trial 453:
  Learning Rate: 0.008925449661432237
  Sigma Multiplier: 0.937599172227213
  Initialization Multiplier: 0.6233611076301312
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.86it/s, loss=-0.000319, elapsed time=0.06, total time=12]  
[I 2025-06-07 14:05:45,761] Trial 453 finished with value: -0.0003191429402938989 and parameters: {'learning_rate': 0.008925449661432237, 'sigma_multiplier': 0.937599172227213, 'num_layers': 2, 'initialization_multiplier': 0.6233611076301312}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 453 final loss: -0.00031914
Trial 454:
  Learning Rate: 0.009805794026480854
  Sigma Multiplier: 1.0480689759917197
  Initialization Multiplier: 0.5749814551222553
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.40it/s, loss=-0.000424, elapsed time=0.09, total time=11.5]
[I 2025-06-07 14:05:57,350] Trial 454 finished with value: -0.0004237234345419606 and parameters: {'learning_rate': 0.009805794026480854, 'sigma_multiplier': 1.0480689759917197, 'num_layers': 2, 'initialization_multiplier': 0.5749814551222553}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 454 final loss: -0.00042372
Trial 455:
  Learning Rate: 0.00637890004248206
  Sigma Multiplier: 0.9811949415842308
  Initialization Multiplier: 0.7152368064859743
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.87it/s, loss=-0.000265, elapsed time=0.06, total time=12.1]
[I 2025-06-07 14:06:09,486] Trial 455 finished with value: -0.000264736558225541 and parameters: {'learning_rate': 0.00637890004248206, 'sigma_multiplier': 0.9811949415842308, 'num_layers': 2, 'initialization_multiplier': 0.7152368064859743}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 455 final loss: -0.00026474
Trial 456:
  Learning Rate: 0.0081934682988772
  Sigma Multiplier: 1.0814788183044752
  Initialization Multiplier: 0.4879490207454491
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.44it/s, loss=-0.000350, elapsed time=0.05, total time=11.4]
[I 2025-06-07 14:06:20,984] Trial 456 finished with value: -0.00035025580099906205 and parameters: {'learning_rate': 0.0081934682988772, 'sigma_multiplier': 1.0814788183044752, 'num_layers': 2, 'initialization_multiplier': 0.4879490207454491}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 456 final loss: -0.00035026
Trial 457:
  Learning Rate: 0.010300685660775195
  Sigma Multiplier: 1.0192964825902788
  Initialization Multiplier: 0.5451409405435107
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.60it/s, loss=-0.000394, elapsed time=0.07, total time=12.3]
[I 2025-06-07 14:06:33,324] Trial 457 finished with value: -0.0003936957760613875 and parameters: {'learning_rate': 0.010300685660775195, 'sigma_multiplier': 1.0192964825902788, 'num_layers': 2, 'initialization_multiplier': 0.5451409405435107}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 457 final loss: -0.00039370
Trial 458:
  Learning Rate: 0.0074660060032102245
  Sigma Multiplier: 0.932765489661106
  Initialization Multiplier: 0.6566899458086729
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.53it/s, loss=-0.000261, elapsed time=0.05, total time=12.2]
[I 2025-06-07 14:06:45,618] Trial 458 finished with value: -0.00026112003005358353 and parameters: {'learning_rate': 0.0074660060032102245, 'sigma_multiplier': 0.932765489661106, 'num_layers': 2, 'initialization_multiplier': 0.6566899458086729}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 458 final loss: -0.00026112
Trial 459:
  Learning Rate: 0.0005084193541020148
  Sigma Multiplier: 0.9732363592797088
  Initialization Multiplier: 0.5870585304791867
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.18it/s, loss=0.023696, elapsed time=0.06, total time=11.7]
[I 2025-06-07 14:06:57,370] Trial 459 finished with value: 0.023696212657234043 and parameters: {'learning_rate': 0.0005084193541020148, 'sigma_multiplier': 0.9732363592797088, 'num_layers': 2, 'initialization_multiplier': 0.5870585304791867}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 459 final loss: 0.02369621
Trial 460:
  Learning Rate: 0.0013876908406211992
  Sigma Multiplier: 1.5749442216001903
  Initialization Multiplier: 1.7424614539857
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.31it/s, loss=0.039156, elapsed time=0.05, total time=10.8]
[I 2025-06-07 14:07:08,254] Trial 460 finished with value: 0.03915582696489354 and parameters: {'learning_rate': 0.0013876908406211992, 'sigma_multiplier': 1.5749442216001903, 'num_layers': 2, 'initialization_multiplier': 1.7424614539857}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 460 final loss: 0.03915583
Trial 461:
  Learning Rate: 0.011875958330595437
  Sigma Multiplier: 0.8273788084148306
  Initialization Multiplier: 1.3313776273921802
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.09it/s, loss=0.000140, elapsed time=0.08, total time=12.7] 
[I 2025-06-07 14:07:20,979] Trial 461 finished with value: 0.00013997048519073568 and parameters: {'learning_rate': 0.011875958330595437, 'sigma_multiplier': 0.8273788084148306, 'num_layers': 2, 'initialization_multiplier': 1.3313776273921802}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 461 final loss: 0.00013997
Trial 462:
  Learning Rate: 0.005612047433197327
  Sigma Multiplier: 1.040245539068071
  Initialization Multiplier: 0.6171338123062193
  Number of Layers: 4


Training Progress: 100%|██████████| 150/150 [00:16<00:00,  9.13it/s, loss=-0.000050, elapsed time=0.1, total time=16.8] 
[I 2025-06-07 14:07:37,819] Trial 462 finished with value: -5.045451800570083e-05 and parameters: {'learning_rate': 0.005612047433197327, 'sigma_multiplier': 1.040245539068071, 'num_layers': 4, 'initialization_multiplier': 0.6171338123062193}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 462 final loss: -0.00005045
Trial 463:
  Learning Rate: 0.009355603492889785
  Sigma Multiplier: 0.8857036952210808
  Initialization Multiplier: 0.5300040736706151
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.61it/s, loss=-0.000338, elapsed time=0.06, total time=12.4]
[I 2025-06-07 14:07:50,267] Trial 463 finished with value: -0.00033806090837325774 and parameters: {'learning_rate': 0.009355603492889785, 'sigma_multiplier': 0.8857036952210808, 'num_layers': 2, 'initialization_multiplier': 0.5300040736706151}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 463 final loss: -0.00033806
Trial 464:
  Learning Rate: 0.014323131624548626
  Sigma Multiplier: 1.1289224371423683
  Initialization Multiplier: 0.9851872757377852
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.81it/s, loss=-0.000313, elapsed time=0.08, total time=12.1]
[I 2025-06-07 14:08:02,446] Trial 464 finished with value: -0.0003131223603096801 and parameters: {'learning_rate': 0.014323131624548626, 'sigma_multiplier': 1.1289224371423683, 'num_layers': 2, 'initialization_multiplier': 0.9851872757377852}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 464 final loss: -0.00031312
Trial 465:
  Learning Rate: 0.0022256427612357896
  Sigma Multiplier: 1.0003143090849136
  Initialization Multiplier: 0.4850697041067369
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.69it/s, loss=-0.000399, elapsed time=0.06, total time=12.1]
[I 2025-06-07 14:08:14,620] Trial 465 finished with value: -0.0003993494842876193 and parameters: {'learning_rate': 0.0022256427612357896, 'sigma_multiplier': 1.0003143090849136, 'num_layers': 2, 'initialization_multiplier': 0.4850697041067369}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 465 final loss: -0.00039935
Trial 466:
  Learning Rate: 0.007119880657106015
  Sigma Multiplier: 0.9409829600208847
  Initialization Multiplier: 0.5618287845354375
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.76it/s, loss=-0.000316, elapsed time=0.07, total time=12.1]
[I 2025-06-07 14:08:26,782] Trial 466 finished with value: -0.00031583697994024153 and parameters: {'learning_rate': 0.007119880657106015, 'sigma_multiplier': 0.9409829600208847, 'num_layers': 2, 'initialization_multiplier': 0.5618287845354375}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 466 final loss: -0.00031584
Trial 467:
  Learning Rate: 0.01067696365989676
  Sigma Multiplier: 1.0849607442037452
  Initialization Multiplier: 0.6536421397824745
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.70it/s, loss=-0.000341, elapsed time=0.15, total time=11.4]
[I 2025-06-07 14:08:38,199] Trial 467 finished with value: -0.00034063343930049205 and parameters: {'learning_rate': 0.01067696365989676, 'sigma_multiplier': 1.0849607442037452, 'num_layers': 2, 'initialization_multiplier': 0.6536421397824745}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 467 final loss: -0.00034063
Trial 468:
  Learning Rate: 0.008439634398777133
  Sigma Multiplier: 1.016739142116299
  Initialization Multiplier: 0.5969642821104767
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.45it/s, loss=-0.000351, elapsed time=0.05, total time=11.4]
[I 2025-06-07 14:08:49,653] Trial 468 finished with value: -0.0003512849297464029 and parameters: {'learning_rate': 0.008439634398777133, 'sigma_multiplier': 1.016739142116299, 'num_layers': 2, 'initialization_multiplier': 0.5969642821104767}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 468 final loss: -0.00035128
Trial 469:
  Learning Rate: 0.018149055672037676
  Sigma Multiplier: 0.9618122102491936
  Initialization Multiplier: 0.4459655428230778
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.05it/s, loss=-0.000301, elapsed time=0.1, total time=11.9] 
[I 2025-06-07 14:09:01,588] Trial 469 finished with value: -0.00030117001452236 and parameters: {'learning_rate': 0.018149055672037676, 'sigma_multiplier': 0.9618122102491936, 'num_layers': 2, 'initialization_multiplier': 0.4459655428230778}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 469 final loss: -0.00030117
Trial 470:
  Learning Rate: 0.012828145158466092
  Sigma Multiplier: 1.0551876278650762
  Initialization Multiplier: 0.5206686632591719
  Number of Layers: 5


Training Progress: 100%|██████████| 150/150 [00:18<00:00,  8.28it/s, loss=-0.000094, elapsed time=0.12, total time=18.5]
[I 2025-06-07 14:09:20,205] Trial 470 finished with value: -9.353888397926756e-05 and parameters: {'learning_rate': 0.012828145158466092, 'sigma_multiplier': 1.0551876278650762, 'num_layers': 5, 'initialization_multiplier': 0.5206686632591719}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 470 final loss: -0.00009354
Trial 471:
  Learning Rate: 0.003259733190918939
  Sigma Multiplier: 0.9265908957421362
  Initialization Multiplier: 0.5663693508860983
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.96it/s, loss=-0.000266, elapsed time=0.06, total time=11.9]
[I 2025-06-07 14:09:32,213] Trial 471 finished with value: -0.00026563182680414664 and parameters: {'learning_rate': 0.003259733190918939, 'sigma_multiplier': 0.9265908957421362, 'num_layers': 2, 'initialization_multiplier': 0.5663693508860983}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 471 final loss: -0.00026563
Trial 472:
  Learning Rate: 0.009843718450626909
  Sigma Multiplier: 0.9896550970484164
  Initialization Multiplier: 0.681752910904558
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.81it/s, loss=-0.000322, elapsed time=0.09, total time=12.1]
[I 2025-06-07 14:09:44,346] Trial 472 finished with value: -0.000321554995531442 and parameters: {'learning_rate': 0.009843718450626909, 'sigma_multiplier': 0.9896550970484164, 'num_layers': 2, 'initialization_multiplier': 0.681752910904558}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 472 final loss: -0.00032155
Trial 473:
  Learning Rate: 0.006361001062140426
  Sigma Multiplier: 0.8618053186718336
  Initialization Multiplier: 0.636254088435876
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.38it/s, loss=-0.000361, elapsed time=0.06, total time=12.5]
[I 2025-06-07 14:09:56,901] Trial 473 finished with value: -0.0003611798152422055 and parameters: {'learning_rate': 0.006361001062140426, 'sigma_multiplier': 0.8618053186718336, 'num_layers': 2, 'initialization_multiplier': 0.636254088435876}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 473 final loss: -0.00036118
Trial 474:
  Learning Rate: 0.015491279404655507
  Sigma Multiplier: 1.0312438939417106
  Initialization Multiplier: 0.4225416881087153
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.12it/s, loss=-0.000409, elapsed time=0.06, total time=11.8]
[I 2025-06-07 14:10:08,750] Trial 474 finished with value: -0.0004093321759022802 and parameters: {'learning_rate': 0.015491279404655507, 'sigma_multiplier': 1.0312438939417106, 'num_layers': 2, 'initialization_multiplier': 0.4225416881087153}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 474 final loss: -0.00040933
Trial 475:
  Learning Rate: 0.00862940792343032
  Sigma Multiplier: 0.8990808395790644
  Initialization Multiplier: 0.5047447345810995
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.85it/s, loss=-0.000261, elapsed time=0.08, total time=11.9]
[I 2025-06-07 14:10:20,751] Trial 475 finished with value: -0.0002611680387408477 and parameters: {'learning_rate': 0.00862940792343032, 'sigma_multiplier': 0.8990808395790644, 'num_layers': 2, 'initialization_multiplier': 0.5047447345810995}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 475 final loss: -0.00026117
Trial 476:
  Learning Rate: 0.011416818296322422
  Sigma Multiplier: 1.093612823181493
  Initialization Multiplier: 0.6110125756959717
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.45it/s, loss=-0.000360, elapsed time=0.06, total time=11.5]
[I 2025-06-07 14:10:32,272] Trial 476 finished with value: -0.0003604707603023599 and parameters: {'learning_rate': 0.011416818296322422, 'sigma_multiplier': 1.093612823181493, 'num_layers': 2, 'initialization_multiplier': 0.6110125756959717}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 476 final loss: -0.00036047
Trial 477:
  Learning Rate: 9.627939780973241e-05
  Sigma Multiplier: 0.9920976159326036
  Initialization Multiplier: 0.7479596703035272
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.46it/s, loss=0.113348, elapsed time=0.06, total time=11.4]
[I 2025-06-07 14:10:43,763] Trial 477 finished with value: 0.11334773040447617 and parameters: {'learning_rate': 9.627939780973241e-05, 'sigma_multiplier': 0.9920976159326036, 'num_layers': 2, 'initialization_multiplier': 0.7479596703035272}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 477 final loss: 0.11334773
Trial 478:
  Learning Rate: 0.004697136247185719
  Sigma Multiplier: 1.0536900115711736
  Initialization Multiplier: 0.5360600136634551
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.68it/s, loss=-0.000286, elapsed time=0.07, total time=9.87]
[I 2025-06-07 14:10:53,669] Trial 478 finished with value: -0.00028625177127448667 and parameters: {'learning_rate': 0.004697136247185719, 'sigma_multiplier': 1.0536900115711736, 'num_layers': 1, 'initialization_multiplier': 0.5360600136634551}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 478 final loss: -0.00028625
Trial 479:
  Learning Rate: 0.04853941514014136
  Sigma Multiplier: 0.9576483873328091
  Initialization Multiplier: 0.5775530965477912
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.17it/s, loss=-0.000288, elapsed time=0.08, total time=11.7]
[I 2025-06-07 14:11:05,438] Trial 479 finished with value: -0.00028756284439728326 and parameters: {'learning_rate': 0.04853941514014136, 'sigma_multiplier': 0.9576483873328091, 'num_layers': 2, 'initialization_multiplier': 0.5775530965477912}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 479 final loss: -0.00028756
Trial 480:
  Learning Rate: 0.007115039284996518
  Sigma Multiplier: 1.0273779821428661
  Initialization Multiplier: 0.46348225070511967
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.54it/s, loss=-0.000389, elapsed time=0.07, total time=11.4]
[I 2025-06-07 14:11:16,900] Trial 480 finished with value: -0.0003887184357222933 and parameters: {'learning_rate': 0.007115039284996518, 'sigma_multiplier': 1.0273779821428661, 'num_layers': 2, 'initialization_multiplier': 0.46348225070511967}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 480 final loss: -0.00038872
Trial 481:
  Learning Rate: 0.01337482595289105
  Sigma Multiplier: 0.9216039653816525
  Initialization Multiplier: 0.551267543109294
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.74it/s, loss=-0.000224, elapsed time=0.09, total time=12.1]
[I 2025-06-07 14:11:29,050] Trial 481 finished with value: -0.00022371449010661005 and parameters: {'learning_rate': 0.01337482595289105, 'sigma_multiplier': 0.9216039653816525, 'num_layers': 2, 'initialization_multiplier': 0.551267543109294}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 481 final loss: -0.00022371
Trial 482:
  Learning Rate: 0.0059083451850969245
  Sigma Multiplier: 1.1300056422161606
  Initialization Multiplier: 0.4906920643471064
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.42it/s, loss=-0.000345, elapsed time=0.07, total time=11.5]
[I 2025-06-07 14:11:40,638] Trial 482 finished with value: -0.00034475661756339207 and parameters: {'learning_rate': 0.0059083451850969245, 'sigma_multiplier': 1.1300056422161606, 'num_layers': 2, 'initialization_multiplier': 0.4906920643471064}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 482 final loss: -0.00034476
Trial 483:
  Learning Rate: 0.010384736598365554
  Sigma Multiplier: 0.9754278365243632
  Initialization Multiplier: 0.7018930270523576
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.33it/s, loss=-0.000365, elapsed time=0.06, total time=11.6]
[I 2025-06-07 14:11:52,287] Trial 483 finished with value: -0.0003648255340056389 and parameters: {'learning_rate': 0.010384736598365554, 'sigma_multiplier': 0.9754278365243632, 'num_layers': 2, 'initialization_multiplier': 0.7018930270523576}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 483 final loss: -0.00036483
Trial 484:
  Learning Rate: 0.008807529777852052
  Sigma Multiplier: 1.010833499770241
  Initialization Multiplier: 0.6356151004671999
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.08it/s, loss=-0.000345, elapsed time=0.06, total time=11.8]
[I 2025-06-07 14:12:04,147] Trial 484 finished with value: -0.0003447157212471679 and parameters: {'learning_rate': 0.008807529777852052, 'sigma_multiplier': 1.010833499770241, 'num_layers': 2, 'initialization_multiplier': 0.6356151004671999}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 484 final loss: -0.00034472
Trial 485:
  Learning Rate: 0.007984667541756222
  Sigma Multiplier: 1.0703366169214472
  Initialization Multiplier: 0.593700525359195
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.05it/s, loss=-0.000284, elapsed time=0.06, total time=11.8]
[I 2025-06-07 14:12:16,037] Trial 485 finished with value: -0.0002836667643379795 and parameters: {'learning_rate': 0.007984667541756222, 'sigma_multiplier': 1.0703366169214472, 'num_layers': 2, 'initialization_multiplier': 0.593700525359195}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 485 final loss: -0.00028367
Trial 486:
  Learning Rate: 0.0119218374682121
  Sigma Multiplier: 0.9509109704027444
  Initialization Multiplier: 0.41176584613531325
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.05it/s, loss=-0.000340, elapsed time=0.07, total time=11.8]
[I 2025-06-07 14:12:27,888] Trial 486 finished with value: -0.0003399199819462771 and parameters: {'learning_rate': 0.0119218374682121, 'sigma_multiplier': 0.9509109704027444, 'num_layers': 2, 'initialization_multiplier': 0.41176584613531325}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 486 final loss: -0.00033992
Trial 487:
  Learning Rate: 0.015483594301279578
  Sigma Multiplier: 0.9955617652551723
  Initialization Multiplier: 0.5263325694662895
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.77it/s, loss=-0.000376, elapsed time=0.1, total time=12.1] 
[I 2025-06-07 14:12:40,145] Trial 487 finished with value: -0.0003755754006264699 and parameters: {'learning_rate': 0.015483594301279578, 'sigma_multiplier': 0.9955617652551723, 'num_layers': 2, 'initialization_multiplier': 0.5263325694662895}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 487 final loss: -0.00037558
Trial 488:
  Learning Rate: 0.009874002691611403
  Sigma Multiplier: 0.9081284457158967
  Initialization Multiplier: 0.6017633649650375
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.22it/s, loss=-0.000350, elapsed time=0.1, total time=12.7] 
[I 2025-06-07 14:12:52,867] Trial 488 finished with value: -0.0003496816113316241 and parameters: {'learning_rate': 0.009874002691611403, 'sigma_multiplier': 0.9081284457158967, 'num_layers': 2, 'initialization_multiplier': 0.6017633649650375}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 488 final loss: -0.00034968
Trial 489:
  Learning Rate: 0.0073567990530811755
  Sigma Multiplier: 1.0518631032052372
  Initialization Multiplier: 0.5564701655792373
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.43it/s, loss=-0.000313, elapsed time=0.07, total time=12.4]
[I 2025-06-07 14:13:05,282] Trial 489 finished with value: -0.000312631661972128 and parameters: {'learning_rate': 0.0073567990530811755, 'sigma_multiplier': 1.0518631032052372, 'num_layers': 2, 'initialization_multiplier': 0.5564701655792373}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 489 final loss: -0.00031263
Trial 490:
  Learning Rate: 0.013645237885761248
  Sigma Multiplier: 0.9531704391585383
  Initialization Multiplier: 0.6576572072030507
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.44it/s, loss=-0.000312, elapsed time=0.07, total time=12.4]
[I 2025-06-07 14:13:17,749] Trial 490 finished with value: -0.0003123802579643581 and parameters: {'learning_rate': 0.013645237885761248, 'sigma_multiplier': 0.9531704391585383, 'num_layers': 2, 'initialization_multiplier': 0.6576572072030507}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 490 final loss: -0.00031238
Trial 491:
  Learning Rate: 0.021692813154046548
  Sigma Multiplier: 1.0908333718576435
  Initialization Multiplier: 0.4764915717000764
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.44it/s, loss=-0.000369, elapsed time=0.07, total time=11.6]
[I 2025-06-07 14:13:29,381] Trial 491 finished with value: -0.00036929238419622075 and parameters: {'learning_rate': 0.021692813154046548, 'sigma_multiplier': 1.0908333718576435, 'num_layers': 2, 'initialization_multiplier': 0.4764915717000764}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 491 final loss: -0.00036929
Trial 492:
  Learning Rate: 0.003888456786561695
  Sigma Multiplier: 1.1581571997111384
  Initialization Multiplier: 0.520480607541187
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.91it/s, loss=-0.000380, elapsed time=0.05, total time=11.1]
[I 2025-06-07 14:13:40,550] Trial 492 finished with value: -0.0003795261646277838 and parameters: {'learning_rate': 0.003888456786561695, 'sigma_multiplier': 1.1581571997111384, 'num_layers': 2, 'initialization_multiplier': 0.520480607541187}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 492 final loss: -0.00037953
Trial 493:
  Learning Rate: 0.01119653383857233
  Sigma Multiplier: 0.8613533029813968
  Initialization Multiplier: 0.6161979818917298
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:14<00:00, 10.16it/s, loss=0.000021, elapsed time=0.07, total time=15.1] 
[I 2025-06-07 14:13:55,710] Trial 493 finished with value: 2.060700623483235e-05 and parameters: {'learning_rate': 0.01119653383857233, 'sigma_multiplier': 0.8613533029813968, 'num_layers': 3, 'initialization_multiplier': 0.6161979818917298}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 493 final loss: 0.00002061
Trial 494:
  Learning Rate: 0.006048389520420699
  Sigma Multiplier: 1.0178621820891802
  Initialization Multiplier: 0.5633553533802768
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.66it/s, loss=-0.000341, elapsed time=0.05, total time=12.3]
[I 2025-06-07 14:14:08,148] Trial 494 finished with value: -0.00034107404872689895 and parameters: {'learning_rate': 0.006048389520420699, 'sigma_multiplier': 1.0178621820891802, 'num_layers': 2, 'initialization_multiplier': 0.5633553533802768}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 494 final loss: -0.00034107
Trial 495:
  Learning Rate: 0.009207078440402337
  Sigma Multiplier: 0.9839479655601798
  Initialization Multiplier: 0.6845250344100686
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.55it/s, loss=-0.000354, elapsed time=0.07, total time=12.2]
[I 2025-06-07 14:14:20,447] Trial 495 finished with value: -0.00035445336250336534 and parameters: {'learning_rate': 0.009207078440402337, 'sigma_multiplier': 0.9839479655601798, 'num_layers': 2, 'initialization_multiplier': 0.6845250344100686}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 495 final loss: -0.00035445
Trial 496:
  Learning Rate: 0.007506746222903477
  Sigma Multiplier: 1.0425424744780398
  Initialization Multiplier: 0.45765511725631225
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.60it/s, loss=-0.000316, elapsed time=0.07, total time=11.3]
[I 2025-06-07 14:14:31,840] Trial 496 finished with value: -0.00031550318694471613 and parameters: {'learning_rate': 0.007506746222903477, 'sigma_multiplier': 1.0425424744780398, 'num_layers': 2, 'initialization_multiplier': 0.45765511725631225}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 496 final loss: -0.00031550
Trial 497:
  Learning Rate: 0.0053297802596021385
  Sigma Multiplier: 0.9245472961620811
  Initialization Multiplier: 0.5869823289270373
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.76it/s, loss=-0.000341, elapsed time=0.07, total time=12.1]
[I 2025-06-07 14:14:43,954] Trial 497 finished with value: -0.000340953840834266 and parameters: {'learning_rate': 0.0053297802596021385, 'sigma_multiplier': 0.9245472961620811, 'num_layers': 2, 'initialization_multiplier': 0.5869823289270373}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 497 final loss: -0.00034095
Trial 498:
  Learning Rate: 0.017468799931768877
  Sigma Multiplier: 1.0020987250809035
  Initialization Multiplier: 0.5027591715265085
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.72it/s, loss=-0.000297, elapsed time=0.08, total time=12.1]
[I 2025-06-07 14:14:56,067] Trial 498 finished with value: -0.00029694082738688035 and parameters: {'learning_rate': 0.017468799931768877, 'sigma_multiplier': 1.0020987250809035, 'num_layers': 2, 'initialization_multiplier': 0.5027591715265085}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 498 final loss: -0.00029694
Trial 499:
  Learning Rate: 0.01201356281114738
  Sigma Multiplier: 0.9616864294130012
  Initialization Multiplier: 0.6490861444166702
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.58it/s, loss=-0.000291, elapsed time=0.06, total time=12.2]
[I 2025-06-07 14:15:08,368] Trial 499 finished with value: -0.00029077650498218765 and parameters: {'learning_rate': 0.01201356281114738, 'sigma_multiplier': 0.9616864294130012, 'num_layers': 2, 'initialization_multiplier': 0.6490861444166702}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 499 final loss: -0.00029078
Trial 500:
  Learning Rate: 0.009941971327478859
  Sigma Multiplier: 1.1065444889463432
  Initialization Multiplier: 0.5470412906163082
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.29it/s, loss=-0.000337, elapsed time=0.06, total time=11.6]
[I 2025-06-07 14:15:20,058] Trial 500 finished with value: -0.0003370156065589178 and parameters: {'learning_rate': 0.009941971327478859, 'sigma_multiplier': 1.1065444889463432, 'num_layers': 2, 'initialization_multiplier': 0.5470412906163082}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 500 final loss: -0.00033702
Trial 501:
  Learning Rate: 0.008452743355418629
  Sigma Multiplier: 1.0606379212262558
  Initialization Multiplier: 0.39548474068131995
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.30it/s, loss=-0.000397, elapsed time=0.06, total time=11.7]
[I 2025-06-07 14:15:31,787] Trial 501 finished with value: -0.0003970409382123125 and parameters: {'learning_rate': 0.008452743355418629, 'sigma_multiplier': 1.0606379212262558, 'num_layers': 2, 'initialization_multiplier': 0.39548474068131995}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 501 final loss: -0.00039704
Trial 502:
  Learning Rate: 0.014163344899520234
  Sigma Multiplier: 0.8997427363055674
  Initialization Multiplier: 0.6142288000178151
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.01it/s, loss=-0.000217, elapsed time=0.07, total time=11.9]
[I 2025-06-07 14:15:43,691] Trial 502 finished with value: -0.00021727165088652694 and parameters: {'learning_rate': 0.014163344899520234, 'sigma_multiplier': 0.8997427363055674, 'num_layers': 2, 'initialization_multiplier': 0.6142288000178151}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 502 final loss: -0.00021727
Trial 503:
  Learning Rate: 0.006857957485453881
  Sigma Multiplier: 1.019889801880716
  Initialization Multiplier: 0.5135655787648512
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.62it/s, loss=-0.000333, elapsed time=0.08, total time=11.3]
[I 2025-06-07 14:15:55,080] Trial 503 finished with value: -0.00033336061587149366 and parameters: {'learning_rate': 0.006857957485453881, 'sigma_multiplier': 1.019889801880716, 'num_layers': 2, 'initialization_multiplier': 0.5135655787648512}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 503 final loss: -0.00033336
Trial 504:
  Learning Rate: 0.011245474150914647
  Sigma Multiplier: 0.9711819697677573
  Initialization Multiplier: 0.7250346403297798
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.14it/s, loss=-0.000317, elapsed time=0.07, total time=11.7]
[I 2025-06-07 14:16:06,859] Trial 504 finished with value: -0.00031696922658568056 and parameters: {'learning_rate': 0.011245474150914647, 'sigma_multiplier': 0.9711819697677573, 'num_layers': 2, 'initialization_multiplier': 0.7250346403297798}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 504 final loss: -0.00031697
Trial 505:
  Learning Rate: 0.008613140255386644
  Sigma Multiplier: 0.936933631078302
  Initialization Multiplier: 0.4488565967911635
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.75it/s, loss=-0.000414, elapsed time=0.08, total time=12]  
[I 2025-06-07 14:16:18,949] Trial 505 finished with value: -0.0004144143848429123 and parameters: {'learning_rate': 0.008613140255386644, 'sigma_multiplier': 0.936933631078302, 'num_layers': 2, 'initialization_multiplier': 0.4488565967911635}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 505 final loss: -0.00041441
Trial 506:
  Learning Rate: 0.010044963288789352
  Sigma Multiplier: 1.027880622308439
  Initialization Multiplier: 0.5430347379790681
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.81it/s, loss=-0.000384, elapsed time=0.05, total time=12.1]
[I 2025-06-07 14:16:31,066] Trial 506 finished with value: -0.00038432475712213865 and parameters: {'learning_rate': 0.010044963288789352, 'sigma_multiplier': 1.027880622308439, 'num_layers': 2, 'initialization_multiplier': 0.5430347379790681}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 506 final loss: -0.00038432
Trial 507:
  Learning Rate: 0.013289111204143696
  Sigma Multiplier: 1.084969208870268
  Initialization Multiplier: 0.5833019728760512
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.39it/s, loss=-0.000389, elapsed time=0.06, total time=11.5]
[I 2025-06-07 14:16:42,643] Trial 507 finished with value: -0.0003888086249064146 and parameters: {'learning_rate': 0.013289111204143696, 'sigma_multiplier': 1.084969208870268, 'num_layers': 2, 'initialization_multiplier': 0.5833019728760512}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 507 final loss: -0.00038881
Trial 508:
  Learning Rate: 0.006371659591646259
  Sigma Multiplier: 0.9875374241015789
  Initialization Multiplier: 0.6502295470528858
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.61it/s, loss=-0.000419, elapsed time=0.07, total time=12.2]
[I 2025-06-07 14:16:54,897] Trial 508 finished with value: -0.0004194867659945889 and parameters: {'learning_rate': 0.006371659591646259, 'sigma_multiplier': 0.9875374241015789, 'num_layers': 2, 'initialization_multiplier': 0.6502295470528858}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 508 final loss: -0.00041949
Trial 509:
  Learning Rate: 0.007816909993745144
  Sigma Multiplier: 0.829580628446358
  Initialization Multiplier: 0.49491146627843735
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.23it/s, loss=-0.000344, elapsed time=0.05, total time=12.6]
[I 2025-06-07 14:17:07,531] Trial 509 finished with value: -0.00034399597539717545 and parameters: {'learning_rate': 0.007816909993745144, 'sigma_multiplier': 0.829580628446358, 'num_layers': 2, 'initialization_multiplier': 0.49491146627843735}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 509 final loss: -0.00034400
Trial 510:
  Learning Rate: 0.01745021019551438
  Sigma Multiplier: 0.8843869600443814
  Initialization Multiplier: 0.5764548471947352
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.47it/s, loss=-0.000392, elapsed time=0.07, total time=12.4]
[I 2025-06-07 14:17:19,992] Trial 510 finished with value: -0.0003922034185626275 and parameters: {'learning_rate': 0.01745021019551438, 'sigma_multiplier': 0.8843869600443814, 'num_layers': 2, 'initialization_multiplier': 0.5764548471947352}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 510 final loss: -0.00039220
Trial 511:
  Learning Rate: 0.011183989005017242
  Sigma Multiplier: 1.0432138124789503
  Initialization Multiplier: 0.6217001528210259
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.91it/s, loss=-0.000373, elapsed time=0.07, total time=12]  
[I 2025-06-07 14:17:32,037] Trial 511 finished with value: -0.00037289903984905336 and parameters: {'learning_rate': 0.011183989005017242, 'sigma_multiplier': 1.0432138124789503, 'num_layers': 2, 'initialization_multiplier': 0.6217001528210259}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 511 final loss: -0.00037290
Trial 512:
  Learning Rate: 0.005296703391566309
  Sigma Multiplier: 0.9509851499185437
  Initialization Multiplier: 0.5268485790846353
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.52it/s, loss=-0.000387, elapsed time=0.05, total time=12.3]
[I 2025-06-07 14:17:44,409] Trial 512 finished with value: -0.0003873625636247893 and parameters: {'learning_rate': 0.005296703391566309, 'sigma_multiplier': 0.9509851499185437, 'num_layers': 2, 'initialization_multiplier': 0.5268485790846353}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 512 final loss: -0.00038736
Trial 513:
  Learning Rate: 0.0044024682646083505
  Sigma Multiplier: 1.1230777025675294
  Initialization Multiplier: 0.47407905620070345
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.30it/s, loss=-0.000350, elapsed time=0.08, total time=11.6]
[I 2025-06-07 14:17:56,010] Trial 513 finished with value: -0.0003502967505445124 and parameters: {'learning_rate': 0.0044024682646083505, 'sigma_multiplier': 1.1230777025675294, 'num_layers': 2, 'initialization_multiplier': 0.47407905620070345}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 513 final loss: -0.00035030
Trial 514:
  Learning Rate: 0.009219837364748688
  Sigma Multiplier: 1.0054277517859411
  Initialization Multiplier: 0.6743318991429935
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.81it/s, loss=-0.000342, elapsed time=0.06, total time=12.1]
[I 2025-06-07 14:18:08,169] Trial 514 finished with value: -0.00034223901640914866 and parameters: {'learning_rate': 0.009219837364748688, 'sigma_multiplier': 1.0054277517859411, 'num_layers': 2, 'initialization_multiplier': 0.6743318991429935}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 514 final loss: -0.00034224
Trial 515:
  Learning Rate: 0.015274832091886471
  Sigma Multiplier: 0.9784648790526589
  Initialization Multiplier: 0.5649267695340082
  Number of Layers: 4


Training Progress: 100%|██████████| 150/150 [00:16<00:00,  9.15it/s, loss=-0.000212, elapsed time=0.1, total time=16.8] 
[I 2025-06-07 14:18:25,029] Trial 515 finished with value: -0.0002123334889830425 and parameters: {'learning_rate': 0.015274832091886471, 'sigma_multiplier': 0.9784648790526589, 'num_layers': 4, 'initialization_multiplier': 0.5649267695340082}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 515 final loss: -0.00021233
Trial 516:
  Learning Rate: 0.01196790185357083
  Sigma Multiplier: 1.0651736456243213
  Initialization Multiplier: 0.6036623944121896
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.52it/s, loss=-0.000365, elapsed time=0.08, total time=12.3]
[I 2025-06-07 14:18:37,396] Trial 516 finished with value: -0.00036488389908400607 and parameters: {'learning_rate': 0.01196790185357083, 'sigma_multiplier': 1.0651736456243213, 'num_layers': 2, 'initialization_multiplier': 0.6036623944121896}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 516 final loss: -0.00036488
Trial 517:
  Learning Rate: 0.007475106072709745
  Sigma Multiplier: 0.9458937500099676
  Initialization Multiplier: 0.4291406336399576
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.18it/s, loss=-0.000195, elapsed time=0.05, total time=10.2]
[I 2025-06-07 14:18:47,637] Trial 517 finished with value: -0.00019486075121033695 and parameters: {'learning_rate': 0.007475106072709745, 'sigma_multiplier': 0.9458937500099676, 'num_layers': 1, 'initialization_multiplier': 0.4291406336399576}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 517 final loss: -0.00019486
Trial 518:
  Learning Rate: 0.00979633092125363
  Sigma Multiplier: 1.0236063592515785
  Initialization Multiplier: 0.5444489757439587
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.60it/s, loss=-0.000313, elapsed time=0.07, total time=11.3]
[I 2025-06-07 14:18:59,013] Trial 518 finished with value: -0.0003134479445667379 and parameters: {'learning_rate': 0.00979633092125363, 'sigma_multiplier': 1.0236063592515785, 'num_layers': 2, 'initialization_multiplier': 0.5444489757439587}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 518 final loss: -0.00031345
Trial 519:
  Learning Rate: 0.00026864548076850105
  Sigma Multiplier: 0.9311202012244634
  Initialization Multiplier: 0.502247910126775
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.51it/s, loss=0.020919, elapsed time=0.07, total time=12.3]
[I 2025-06-07 14:19:11,390] Trial 519 finished with value: 0.020918750748957392 and parameters: {'learning_rate': 0.00026864548076850105, 'sigma_multiplier': 0.9311202012244634, 'num_layers': 2, 'initialization_multiplier': 0.502247910126775}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 519 final loss: 0.02091875
Trial 520:
  Learning Rate: 0.013198880075807306
  Sigma Multiplier: 0.9863541344175837
  Initialization Multiplier: 0.3637052313694774
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.21it/s, loss=-0.000336, elapsed time=0.1, total time=11.7] 
[I 2025-06-07 14:19:23,185] Trial 520 finished with value: -0.0003355764855871204 and parameters: {'learning_rate': 0.013198880075807306, 'sigma_multiplier': 0.9863541344175837, 'num_layers': 2, 'initialization_multiplier': 0.3637052313694774}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 520 final loss: -0.00033558
Trial 521:
  Learning Rate: 0.006226635375965414
  Sigma Multiplier: 0.8983045958966619
  Initialization Multiplier: 0.639912397015708
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.41it/s, loss=-0.000415, elapsed time=0.07, total time=12.3]
[I 2025-06-07 14:19:35,586] Trial 521 finished with value: -0.0004153750451419777 and parameters: {'learning_rate': 0.006226635375965414, 'sigma_multiplier': 0.8983045958966619, 'num_layers': 2, 'initialization_multiplier': 0.639912397015708}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 521 final loss: -0.00041538
Trial 522:
  Learning Rate: 0.008291850411793004
  Sigma Multiplier: 1.0697407925461475
  Initialization Multiplier: 0.589623222790543
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.15it/s, loss=-0.000342, elapsed time=0.07, total time=11.8]
[I 2025-06-07 14:19:47,400] Trial 522 finished with value: -0.0003419821224753935 and parameters: {'learning_rate': 0.008291850411793004, 'sigma_multiplier': 1.0697407925461475, 'num_layers': 2, 'initialization_multiplier': 0.589623222790543}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 522 final loss: -0.00034198
Trial 523:
  Learning Rate: 0.019381820090167725
  Sigma Multiplier: 1.01398731383645
  Initialization Multiplier: 0.7017245680358173
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.90it/s, loss=-0.000047, elapsed time=0.07, total time=12]  
[I 2025-06-07 14:19:59,414] Trial 523 finished with value: -4.737273212263328e-05 and parameters: {'learning_rate': 0.019381820090167725, 'sigma_multiplier': 1.01398731383645, 'num_layers': 2, 'initialization_multiplier': 0.7017245680358173}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 523 final loss: -0.00004737
Trial 524:
  Learning Rate: 0.002927170616632618
  Sigma Multiplier: 1.1571439863585269
  Initialization Multiplier: 0.46669076657075353
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.49it/s, loss=-0.000397, elapsed time=0.06, total time=11.5]
[I 2025-06-07 14:20:10,925] Trial 524 finished with value: -0.0003969598280201449 and parameters: {'learning_rate': 0.002927170616632618, 'sigma_multiplier': 1.1571439863585269, 'num_layers': 2, 'initialization_multiplier': 0.46669076657075353}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 524 final loss: -0.00039696
Trial 525:
  Learning Rate: 0.010424619389391869
  Sigma Multiplier: 0.9693423983890478
  Initialization Multiplier: 0.5592470467103369
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:14<00:00, 10.67it/s, loss=-0.000082, elapsed time=0.09, total time=14.4]
[I 2025-06-07 14:20:25,420] Trial 525 finished with value: -8.159782236839585e-05 and parameters: {'learning_rate': 0.010424619389391869, 'sigma_multiplier': 0.9693423983890478, 'num_layers': 3, 'initialization_multiplier': 0.5592470467103369}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 525 final loss: -0.00008160
Trial 526:
  Learning Rate: 0.007021311687098382
  Sigma Multiplier: 1.1064183633691977
  Initialization Multiplier: 0.62250443986899
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.54it/s, loss=-0.000341, elapsed time=0.07, total time=11.3]
[I 2025-06-07 14:20:36,814] Trial 526 finished with value: -0.000341409226473938 and parameters: {'learning_rate': 0.007021311687098382, 'sigma_multiplier': 1.1064183633691977, 'num_layers': 2, 'initialization_multiplier': 0.62250443986899}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 526 final loss: -0.00034141
Trial 527:
  Learning Rate: 0.014932315244149189
  Sigma Multiplier: 1.0453332028967914
  Initialization Multiplier: 0.5022794391150878
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.08it/s, loss=-0.000400, elapsed time=0.05, total time=11.8]
[I 2025-06-07 14:20:48,653] Trial 527 finished with value: -0.00040031612726635594 and parameters: {'learning_rate': 0.014932315244149189, 'sigma_multiplier': 1.0453332028967914, 'num_layers': 2, 'initialization_multiplier': 0.5022794391150878}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 527 final loss: -0.00040032
Trial 528:
  Learning Rate: 0.008801205011974171
  Sigma Multiplier: 0.8599866491656303
  Initialization Multiplier: 0.5429086713326593
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.23it/s, loss=-0.000359, elapsed time=0.06, total time=12.6]
[I 2025-06-07 14:21:01,320] Trial 528 finished with value: -0.0003591570498848118 and parameters: {'learning_rate': 0.008801205011974171, 'sigma_multiplier': 0.8599866491656303, 'num_layers': 2, 'initialization_multiplier': 0.5429086713326593}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 528 final loss: -0.00035916
Trial 529:
  Learning Rate: 0.012131987056593442
  Sigma Multiplier: 0.9986493641776997
  Initialization Multiplier: 1.1532865943587927
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.50it/s, loss=-0.000156, elapsed time=0.08, total time=12.4]
[I 2025-06-07 14:21:13,757] Trial 529 finished with value: -0.00015596090243328575 and parameters: {'learning_rate': 0.012131987056593442, 'sigma_multiplier': 0.9986493641776997, 'num_layers': 2, 'initialization_multiplier': 1.1532865943587927}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 529 final loss: -0.00015596
Trial 530:
  Learning Rate: 0.010215617374269103
  Sigma Multiplier: 0.9344925797963093
  Initialization Multiplier: 0.588218632467665
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.92it/s, loss=-0.000328, elapsed time=0.09, total time=11.9]
[I 2025-06-07 14:21:25,726] Trial 530 finished with value: -0.0003280276932690296 and parameters: {'learning_rate': 0.010215617374269103, 'sigma_multiplier': 0.9344925797963093, 'num_layers': 2, 'initialization_multiplier': 0.588218632467665}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 530 final loss: -0.00032803
Trial 531:
  Learning Rate: 0.0010933915700973762
  Sigma Multiplier: 1.044884766028485
  Initialization Multiplier: 0.6621532725365346
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.91it/s, loss=0.019121, elapsed time=0.06, total time=11.9]
[I 2025-06-07 14:21:37,721] Trial 531 finished with value: 0.019120936509475116 and parameters: {'learning_rate': 0.0010933915700973762, 'sigma_multiplier': 1.044884766028485, 'num_layers': 2, 'initialization_multiplier': 0.6621532725365346}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 531 final loss: 0.01912094
Trial 532:
  Learning Rate: 0.001841899009741934
  Sigma Multiplier: 1.0863136678908352
  Initialization Multiplier: 0.4435854543131784
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:13<00:00, 10.91it/s, loss=-0.000384, elapsed time=0.05, total time=14.1]
[I 2025-06-07 14:21:51,825] Trial 532 finished with value: -0.00038363840354590584 and parameters: {'learning_rate': 0.001841899009741934, 'sigma_multiplier': 1.0863136678908352, 'num_layers': 2, 'initialization_multiplier': 0.4435854543131784}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 532 final loss: -0.00038364
Trial 533:
  Learning Rate: 0.007862351698396613
  Sigma Multiplier: 0.9124720495655532
  Initialization Multiplier: 0.5262782980666532
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 11.96it/s, loss=-0.000374, elapsed time=0.07, total time=12.9]
[I 2025-06-07 14:22:04,792] Trial 533 finished with value: -0.0003735808593610962 and parameters: {'learning_rate': 0.007862351698396613, 'sigma_multiplier': 0.9124720495655532, 'num_layers': 2, 'initialization_multiplier': 0.5262782980666532}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 533 final loss: -0.00037358
Trial 534:
  Learning Rate: 0.005671068542497572
  Sigma Multiplier: 0.9714671349481387
  Initialization Multiplier: 0.7612281981513376
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.17it/s, loss=-0.000064, elapsed time=0.05, total time=12.8]
[I 2025-06-07 14:22:17,689] Trial 534 finished with value: -6.391584037220211e-05 and parameters: {'learning_rate': 0.005671068542497572, 'sigma_multiplier': 0.9714671349481387, 'num_layers': 2, 'initialization_multiplier': 0.7612281981513376}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 534 final loss: -0.00006392
Trial 535:
  Learning Rate: 0.013309892854161927
  Sigma Multiplier: 1.0169806740095677
  Initialization Multiplier: 0.6168717557428111
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.42it/s, loss=-0.000346, elapsed time=0.06, total time=12.4]
[I 2025-06-07 14:22:30,115] Trial 535 finished with value: -0.0003461357091604852 and parameters: {'learning_rate': 0.013309892854161927, 'sigma_multiplier': 1.0169806740095677, 'num_layers': 2, 'initialization_multiplier': 0.6168717557428111}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 535 final loss: -0.00034614
Trial 536:
  Learning Rate: 0.00922511457295934
  Sigma Multiplier: 0.9666911302999031
  Initialization Multiplier: 0.5710814474386768
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.16it/s, loss=-0.000280, elapsed time=0.1, total time=12.7] 
[I 2025-06-07 14:22:42,881] Trial 536 finished with value: -0.0002800244459803103 and parameters: {'learning_rate': 0.00922511457295934, 'sigma_multiplier': 0.9666911302999031, 'num_layers': 2, 'initialization_multiplier': 0.5710814474386768}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 536 final loss: -0.00028002
Trial 537:
  Learning Rate: 0.016354886945516664
  Sigma Multiplier: 1.0519384183650222
  Initialization Multiplier: 0.49315749540447473
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.67it/s, loss=-0.000326, elapsed time=0.05, total time=12.2]
[I 2025-06-07 14:22:55,090] Trial 537 finished with value: -0.00032600451846422063 and parameters: {'learning_rate': 0.016354886945516664, 'sigma_multiplier': 1.0519384183650222, 'num_layers': 2, 'initialization_multiplier': 0.49315749540447473}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 537 final loss: -0.00032600
Trial 538:
  Learning Rate: 0.011659563194301767
  Sigma Multiplier: 1.0030405005741334
  Initialization Multiplier: 0.39716406720599196
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.08it/s, loss=-0.000385, elapsed time=0.06, total time=11.8]
[I 2025-06-07 14:23:07,004] Trial 538 finished with value: -0.00038478970643519226 and parameters: {'learning_rate': 0.011659563194301767, 'sigma_multiplier': 1.0030405005741334, 'num_layers': 2, 'initialization_multiplier': 0.39716406720599196}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 538 final loss: -0.00038479
Trial 539:
  Learning Rate: 0.004852306877319026
  Sigma Multiplier: 0.7987242598466722
  Initialization Multiplier: 0.6442410979925488
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 11.93it/s, loss=-0.000232, elapsed time=0.06, total time=12.9]
[I 2025-06-07 14:23:19,928] Trial 539 finished with value: -0.00023155951416522756 and parameters: {'learning_rate': 0.004852306877319026, 'sigma_multiplier': 0.7987242598466722, 'num_layers': 2, 'initialization_multiplier': 0.6442410979925488}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 539 final loss: -0.00023156
Trial 540:
  Learning Rate: 0.0068889643175707015
  Sigma Multiplier: 0.8954221621057544
  Initialization Multiplier: 0.6980614399679058
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.52it/s, loss=-0.000296, elapsed time=0.07, total time=12.3]
[I 2025-06-07 14:23:32,242] Trial 540 finished with value: -0.00029596190265555015 and parameters: {'learning_rate': 0.0068889643175707015, 'sigma_multiplier': 0.8954221621057544, 'num_layers': 2, 'initialization_multiplier': 0.6980614399679058}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 540 final loss: -0.00029596
Trial 541:
  Learning Rate: 0.010879105370408159
  Sigma Multiplier: 1.930608539245565
  Initialization Multiplier: 0.5907389089563695
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.20it/s, loss=-0.000214, elapsed time=0.05, total time=10.2]
[I 2025-06-07 14:23:42,485] Trial 541 finished with value: -0.00021380631090761053 and parameters: {'learning_rate': 0.010879105370408159, 'sigma_multiplier': 1.930608539245565, 'num_layers': 2, 'initialization_multiplier': 0.5907389089563695}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 541 final loss: -0.00021381
Trial 542:
  Learning Rate: 0.008950143951180677
  Sigma Multiplier: 0.9408700511017887
  Initialization Multiplier: 0.8150185936366001
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.46it/s, loss=-0.000196, elapsed time=0.06, total time=12.4]
[I 2025-06-07 14:23:54,897] Trial 542 finished with value: -0.00019596140429883367 and parameters: {'learning_rate': 0.008950143951180677, 'sigma_multiplier': 0.9408700511017887, 'num_layers': 2, 'initialization_multiplier': 0.8150185936366001}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 542 final loss: -0.00019596
Trial 543:
  Learning Rate: 0.007895483142918863
  Sigma Multiplier: 1.1230655151318278
  Initialization Multiplier: 0.5323883262187499
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.35it/s, loss=-0.000323, elapsed time=0.06, total time=11.6]
[I 2025-06-07 14:24:06,509] Trial 543 finished with value: -0.00032266765395175387 and parameters: {'learning_rate': 0.007895483142918863, 'sigma_multiplier': 1.1230655151318278, 'num_layers': 2, 'initialization_multiplier': 0.5323883262187499}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 543 final loss: -0.00032267
Trial 544:
  Learning Rate: 0.014268527663130925
  Sigma Multiplier: 1.079938316786168
  Initialization Multiplier: 0.47295558235579677
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.46it/s, loss=-0.000440, elapsed time=0.06, total time=11.4]
[I 2025-06-07 14:24:18,004] Trial 544 finished with value: -0.000439963039844132 and parameters: {'learning_rate': 0.014268527663130925, 'sigma_multiplier': 1.079938316786168, 'num_layers': 2, 'initialization_multiplier': 0.47295558235579677}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 544 final loss: -0.00043996
Trial 545:
  Learning Rate: 0.02275045580341487
  Sigma Multiplier: 1.148087992367468
  Initialization Multiplier: 0.45330837291740345
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.15it/s, loss=-0.000387, elapsed time=0.09, total time=11.7]
[I 2025-06-07 14:24:29,754] Trial 545 finished with value: -0.0003870264795396437 and parameters: {'learning_rate': 0.02275045580341487, 'sigma_multiplier': 1.148087992367468, 'num_layers': 2, 'initialization_multiplier': 0.45330837291740345}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 545 final loss: -0.00038703
Trial 546:
  Learning Rate: 0.01433745818149204
  Sigma Multiplier: 1.075194976439697
  Initialization Multiplier: 0.39483010223594955
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.32it/s, loss=-0.000328, elapsed time=0.06, total time=11.7]
[I 2025-06-07 14:24:41,454] Trial 546 finished with value: -0.0003276749628029848 and parameters: {'learning_rate': 0.01433745818149204, 'sigma_multiplier': 1.075194976439697, 'num_layers': 2, 'initialization_multiplier': 0.39483010223594955}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 546 final loss: -0.00032767
Trial 547:
  Learning Rate: 0.01781598675057018
  Sigma Multiplier: 1.093627018438241
  Initialization Multiplier: 0.4908260184503289
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.42it/s, loss=-0.000325, elapsed time=0.06, total time=11.4]
[I 2025-06-07 14:24:52,904] Trial 547 finished with value: -0.00032489712220825034 and parameters: {'learning_rate': 0.01781598675057018, 'sigma_multiplier': 1.093627018438241, 'num_layers': 2, 'initialization_multiplier': 0.4908260184503289}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 547 final loss: -0.00032490
Trial 548:
  Learning Rate: 0.01527889184597933
  Sigma Multiplier: 1.6898127256508557
  Initialization Multiplier: 0.4240047476715527
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.69it/s, loss=-0.000213, elapsed time=0.09, total time=10.5]
[I 2025-06-07 14:25:03,475] Trial 548 finished with value: -0.0002130389331097889 and parameters: {'learning_rate': 0.01527889184597933, 'sigma_multiplier': 1.6898127256508557, 'num_layers': 2, 'initialization_multiplier': 0.4240047476715527}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 548 final loss: -0.00021304
Trial 549:
  Learning Rate: 0.01811843526388612
  Sigma Multiplier: 1.1271087605730132
  Initialization Multiplier: 1.0488767670568497
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.25it/s, loss=-0.000195, elapsed time=0.07, total time=11.6]
[I 2025-06-07 14:25:15,173] Trial 549 finished with value: -0.00019529034866671836 and parameters: {'learning_rate': 0.01811843526388612, 'sigma_multiplier': 1.1271087605730132, 'num_layers': 2, 'initialization_multiplier': 1.0488767670568497}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 549 final loss: -0.00019529
Trial 550:
  Learning Rate: 0.013280251823198915
  Sigma Multiplier: 1.032516057211448
  Initialization Multiplier: 0.46750394971544357
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.22it/s, loss=-0.000283, elapsed time=0.07, total time=11.7]
[I 2025-06-07 14:25:26,889] Trial 550 finished with value: -0.0002825360628522642 and parameters: {'learning_rate': 0.013280251823198915, 'sigma_multiplier': 1.032516057211448, 'num_layers': 2, 'initialization_multiplier': 0.46750394971544357}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 550 final loss: -0.00028254
Trial 551:
  Learning Rate: 0.026935995257920883
  Sigma Multiplier: 0.8567900961951423
  Initialization Multiplier: 0.5125206230743078
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.39it/s, loss=-0.000250, elapsed time=0.06, total time=12.5]
[I 2025-06-07 14:25:39,399] Trial 551 finished with value: -0.0002501364763686498 and parameters: {'learning_rate': 0.026935995257920883, 'sigma_multiplier': 0.8567900961951423, 'num_layers': 2, 'initialization_multiplier': 0.5125206230743078}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 551 final loss: -0.00025014
Trial 552:
  Learning Rate: 0.021156936203944456
  Sigma Multiplier: 1.075002275853098
  Initialization Multiplier: 0.35216919771741273
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.61it/s, loss=-0.000403, elapsed time=0.05, total time=11.3]
[I 2025-06-07 14:25:50,719] Trial 552 finished with value: -0.00040286363310300475 and parameters: {'learning_rate': 0.021156936203944456, 'sigma_multiplier': 1.075002275853098, 'num_layers': 2, 'initialization_multiplier': 0.35216919771741273}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 552 final loss: -0.00040286
Trial 553:
  Learning Rate: 0.01253597785137797
  Sigma Multiplier: 0.9944422517456006
  Initialization Multiplier: 0.4369913398688736
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.21it/s, loss=-0.000336, elapsed time=0.06, total time=11.6]
[I 2025-06-07 14:26:02,413] Trial 553 finished with value: -0.00033616378750120536 and parameters: {'learning_rate': 0.01253597785137797, 'sigma_multiplier': 0.9944422517456006, 'num_layers': 2, 'initialization_multiplier': 0.4369913398688736}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 553 final loss: -0.00033616
Trial 554:
  Learning Rate: 0.003701714673131165
  Sigma Multiplier: 0.9567258708865047
  Initialization Multiplier: 0.4764848849595965
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.05it/s, loss=-0.000351, elapsed time=0.05, total time=11.8]
[I 2025-06-07 14:26:14,253] Trial 554 finished with value: -0.00035057708603674893 and parameters: {'learning_rate': 0.003701714673131165, 'sigma_multiplier': 0.9567258708865047, 'num_layers': 2, 'initialization_multiplier': 0.4764848849595965}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 554 final loss: -0.00035058
Trial 555:
  Learning Rate: 0.011193398089675599
  Sigma Multiplier: 1.0485761834477105
  Initialization Multiplier: 0.5408305835851653
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.46it/s, loss=-0.000296, elapsed time=0.08, total time=11.5]
[I 2025-06-07 14:26:25,792] Trial 555 finished with value: -0.0002961530144086995 and parameters: {'learning_rate': 0.011193398089675599, 'sigma_multiplier': 1.0485761834477105, 'num_layers': 2, 'initialization_multiplier': 0.5408305835851653}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 555 final loss: -0.00029615
Trial 556:
  Learning Rate: 0.016427536025149167
  Sigma Multiplier: 0.6713087239094702
  Initialization Multiplier: 0.5582769929217895
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:13<00:00, 11.39it/s, loss=-0.000250, elapsed time=0.07, total time=13.6]
[I 2025-06-07 14:26:39,387] Trial 556 finished with value: -0.00024972857503566904 and parameters: {'learning_rate': 0.016427536025149167, 'sigma_multiplier': 0.6713087239094702, 'num_layers': 2, 'initialization_multiplier': 0.5582769929217895}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 556 final loss: -0.00024973
Trial 557:
  Learning Rate: 0.014667553542717898
  Sigma Multiplier: 0.9203960830648054
  Initialization Multiplier: 0.49746893987568436
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.24it/s, loss=-0.000389, elapsed time=0.05, total time=11.6]
[I 2025-06-07 14:26:51,073] Trial 557 finished with value: -0.0003893209972493026 and parameters: {'learning_rate': 0.014667553542717898, 'sigma_multiplier': 0.9203960830648054, 'num_layers': 2, 'initialization_multiplier': 0.49746893987568436}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 557 final loss: -0.00038932
Trial 558:
  Learning Rate: 0.010545306771966636
  Sigma Multiplier: 1.177997611451949
  Initialization Multiplier: 0.42222706207861893
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.84it/s, loss=-0.000395, elapsed time=0.05, total time=11.2]
[I 2025-06-07 14:27:02,292] Trial 558 finished with value: -0.00039498233024595563 and parameters: {'learning_rate': 0.010545306771966636, 'sigma_multiplier': 1.177997611451949, 'num_layers': 2, 'initialization_multiplier': 0.42222706207861893}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 558 final loss: -0.00039498
Trial 559:
  Learning Rate: 0.012589834491758082
  Sigma Multiplier: 1.0023343193841676
  Initialization Multiplier: 0.6210049019188748
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.33it/s, loss=-0.000353, elapsed time=0.06, total time=11.6]
[I 2025-06-07 14:27:13,890] Trial 559 finished with value: -0.00035302923842647516 and parameters: {'learning_rate': 0.012589834491758082, 'sigma_multiplier': 1.0023343193841676, 'num_layers': 2, 'initialization_multiplier': 0.6210049019188748}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 559 final loss: -0.00035303
Trial 560:
  Learning Rate: 0.006710571157210018
  Sigma Multiplier: 0.9729366148956016
  Initialization Multiplier: 0.5260562843121317
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.66it/s, loss=-0.000356, elapsed time=0.06, total time=11.3]
[I 2025-06-07 14:27:25,255] Trial 560 finished with value: -0.00035627310906501125 and parameters: {'learning_rate': 0.006710571157210018, 'sigma_multiplier': 0.9729366148956016, 'num_layers': 2, 'initialization_multiplier': 0.5260562843121317}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 560 final loss: -0.00035627
Trial 561:
  Learning Rate: 0.010574839950748736
  Sigma Multiplier: 1.0952063755559232
  Initialization Multiplier: 0.5669101474481864
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.67it/s, loss=-0.000409, elapsed time=0.05, total time=11.2]
[I 2025-06-07 14:27:36,523] Trial 561 finished with value: -0.00040941861782524305 and parameters: {'learning_rate': 0.010574839950748736, 'sigma_multiplier': 1.0952063755559232, 'num_layers': 2, 'initialization_multiplier': 0.5669101474481864}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 561 final loss: -0.00040942
Trial 562:
  Learning Rate: 0.01977299921952762
  Sigma Multiplier: 0.4562053757608675
  Initialization Multiplier: 0.46379471834825975
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:14<00:00, 10.33it/s, loss=0.000334, elapsed time=0.1, total time=14.8] 
[I 2025-06-07 14:27:51,407] Trial 562 finished with value: 0.00033440279887272034 and parameters: {'learning_rate': 0.01977299921952762, 'sigma_multiplier': 0.4562053757608675, 'num_layers': 2, 'initialization_multiplier': 0.46379471834825975}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 562 final loss: 0.00033440
Trial 563:
  Learning Rate: 0.013686082772880916
  Sigma Multiplier: 1.0200070110527113
  Initialization Multiplier: 0.6731013023247105
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.93it/s, loss=-0.000342, elapsed time=0.07, total time=11.9]
[I 2025-06-07 14:28:03,362] Trial 563 finished with value: -0.00034233594479320253 and parameters: {'learning_rate': 0.013686082772880916, 'sigma_multiplier': 1.0200070110527113, 'num_layers': 2, 'initialization_multiplier': 0.6731013023247105}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 563 final loss: -0.00034234
Trial 564:
  Learning Rate: 0.009267819209260978
  Sigma Multiplier: 0.9245152630478557
  Initialization Multiplier: 0.5096080334519296
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.07it/s, loss=-0.000380, elapsed time=0.06, total time=12.8]
[I 2025-06-07 14:28:16,210] Trial 564 finished with value: -0.0003803517620219168 and parameters: {'learning_rate': 0.009267819209260978, 'sigma_multiplier': 0.9245152630478557, 'num_layers': 2, 'initialization_multiplier': 0.5096080334519296}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 564 final loss: -0.00038035
Trial 565:
  Learning Rate: 0.005989299675026271
  Sigma Multiplier: 1.0362047515445456
  Initialization Multiplier: 0.603568007169099
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.47it/s, loss=-0.000488, elapsed time=0.06, total time=11.6]
[I 2025-06-07 14:28:27,822] Trial 565 finished with value: -0.0004876056830113411 and parameters: {'learning_rate': 0.005989299675026271, 'sigma_multiplier': 1.0362047515445456, 'num_layers': 2, 'initialization_multiplier': 0.603568007169099}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 565 final loss: -0.00048761
Trial 566:
  Learning Rate: 0.004226389590579168
  Sigma Multiplier: 1.1075819288777582
  Initialization Multiplier: 0.638067520600896
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.46it/s, loss=-0.000314, elapsed time=0.07, total time=12.3]
[I 2025-06-07 14:28:40,227] Trial 566 finished with value: -0.0003139658674043982 and parameters: {'learning_rate': 0.004226389590579168, 'sigma_multiplier': 1.1075819288777582, 'num_layers': 2, 'initialization_multiplier': 0.638067520600896}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 566 final loss: -0.00031397
Trial 567:
  Learning Rate: 0.005081364423397803
  Sigma Multiplier: 1.0649461069154462
  Initialization Multiplier: 0.7368324489986204
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.90it/s, loss=-0.000438, elapsed time=0.06, total time=9.8] 
[I 2025-06-07 14:28:50,068] Trial 567 finished with value: -0.00043849156553393153 and parameters: {'learning_rate': 0.005081364423397803, 'sigma_multiplier': 1.0649461069154462, 'num_layers': 1, 'initialization_multiplier': 0.7368324489986204}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 567 final loss: -0.00043849
Trial 568:
  Learning Rate: 0.005169005310204555
  Sigma Multiplier: 1.1454321147402684
  Initialization Multiplier: 0.8625158053651996
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.01it/s, loss=-0.000411, elapsed time=0.07, total time=9.7] 
[I 2025-06-07 14:28:59,824] Trial 568 finished with value: -0.0004106166862311109 and parameters: {'learning_rate': 0.005169005310204555, 'sigma_multiplier': 1.1454321147402684, 'num_layers': 1, 'initialization_multiplier': 0.8625158053651996}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 568 final loss: -0.00041062
Trial 569:
  Learning Rate: 0.004387928940891391
  Sigma Multiplier: 1.0730300327701883
  Initialization Multiplier: 0.7496669591984704
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.28it/s, loss=-0.000336, elapsed time=0.05, total time=9.57]
[I 2025-06-07 14:29:09,437] Trial 569 finished with value: -0.0003363959909492522 and parameters: {'learning_rate': 0.004387928940891391, 'sigma_multiplier': 1.0730300327701883, 'num_layers': 1, 'initialization_multiplier': 0.7496669591984704}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 569 final loss: -0.00033640
Trial 570:
  Learning Rate: 0.005020496088935337
  Sigma Multiplier: 1.1239864651378166
  Initialization Multiplier: 0.5537594595199471
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.38it/s, loss=-0.000327, elapsed time=0.05, total time=9.46]
[I 2025-06-07 14:29:18,933] Trial 570 finished with value: -0.00032739139231832714 and parameters: {'learning_rate': 0.005020496088935337, 'sigma_multiplier': 1.1239864651378166, 'num_layers': 1, 'initialization_multiplier': 0.5537594595199471}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 570 final loss: -0.00032739
Trial 571:
  Learning Rate: 0.003972572642090207
  Sigma Multiplier: 1.0675442551820375
  Initialization Multiplier: 0.46400574909431996
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 11.58it/s, loss=-0.000389, elapsed time=0.09, total time=13.3]
[I 2025-06-07 14:29:32,288] Trial 571 finished with value: -0.0003887639672131855 and parameters: {'learning_rate': 0.003972572642090207, 'sigma_multiplier': 1.0675442551820375, 'num_layers': 3, 'initialization_multiplier': 0.46400574909431996}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 571 final loss: -0.00038876
Trial 572:
  Learning Rate: 0.003513901695652391
  Sigma Multiplier: 1.0513472638675372
  Initialization Multiplier: 0.7121914108216727
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.45it/s, loss=-0.000245, elapsed time=0.06, total time=9.49]
[I 2025-06-07 14:29:41,844] Trial 572 finished with value: -0.00024546129621014456 and parameters: {'learning_rate': 0.003513901695652391, 'sigma_multiplier': 1.0513472638675372, 'num_layers': 1, 'initialization_multiplier': 0.7121914108216727}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 572 final loss: -0.00024546
Trial 573:
  Learning Rate: 3.027675952889044e-05
  Sigma Multiplier: 1.096685773910027
  Initialization Multiplier: 0.7817741907428135
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.12it/s, loss=0.016373, elapsed time=0.05, total time=9.62]
[I 2025-06-07 14:29:51,496] Trial 573 finished with value: 0.01637320470020641 and parameters: {'learning_rate': 3.027675952889044e-05, 'sigma_multiplier': 1.096685773910027, 'num_layers': 1, 'initialization_multiplier': 0.7817741907428135}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 573 final loss: 0.01637320
Trial 574:
  Learning Rate: 0.005709141497031248
  Sigma Multiplier: 1.0393011660820626
  Initialization Multiplier: 0.5969151506972343
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.19it/s, loss=-0.000195, elapsed time=0.06, total time=9.62]
[I 2025-06-07 14:30:01,163] Trial 574 finished with value: -0.00019537222439984118 and parameters: {'learning_rate': 0.005709141497031248, 'sigma_multiplier': 1.0393011660820626, 'num_layers': 1, 'initialization_multiplier': 0.5969151506972343}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 574 final loss: -0.00019537
Trial 575:
  Learning Rate: 0.005761524331366801
  Sigma Multiplier: 1.0654042503021326
  Initialization Multiplier: 0.38324720260915335
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:13<00:00, 11.24it/s, loss=-0.000405, elapsed time=0.09, total time=13.7]
[I 2025-06-07 14:30:14,972] Trial 575 finished with value: -0.00040470570796977134 and parameters: {'learning_rate': 0.005761524331366801, 'sigma_multiplier': 1.0654042503021326, 'num_layers': 3, 'initialization_multiplier': 0.38324720260915335}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 575 final loss: -0.00040471
Trial 576:
  Learning Rate: 0.037036281805768366
  Sigma Multiplier: 1.028044793536628
  Initialization Multiplier: 0.5280930801024858
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.14it/s, loss=-0.000274, elapsed time=0.06, total time=9.6]  
[I 2025-06-07 14:30:24,611] Trial 576 finished with value: -0.000274024026828871 and parameters: {'learning_rate': 0.037036281805768366, 'sigma_multiplier': 1.028044793536628, 'num_layers': 1, 'initialization_multiplier': 0.5280930801024858}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 576 final loss: -0.00027402
Trial 577:
  Learning Rate: 0.004674320608631284
  Sigma Multiplier: 1.123295909028293
  Initialization Multiplier: 0.48769503150355725
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.27it/s, loss=-0.000232, elapsed time=0.06, total time=9.5] 
[I 2025-06-07 14:30:34,152] Trial 577 finished with value: -0.00023236475575689415 and parameters: {'learning_rate': 0.004674320608631284, 'sigma_multiplier': 1.123295909028293, 'num_layers': 1, 'initialization_multiplier': 0.48769503150355725}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 577 final loss: -0.00023236
Trial 578:
  Learning Rate: 0.005794645480029808
  Sigma Multiplier: 1.1898627054033162
  Initialization Multiplier: 0.4286372423653082
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 11.68it/s, loss=-0.000312, elapsed time=0.06, total time=13.2]
[I 2025-06-07 14:30:47,420] Trial 578 finished with value: -0.0003118963715987502 and parameters: {'learning_rate': 0.005794645480029808, 'sigma_multiplier': 1.1898627054033162, 'num_layers': 3, 'initialization_multiplier': 0.4286372423653082}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 578 final loss: -0.00031190
Trial 579:
  Learning Rate: 0.005106663070031187
  Sigma Multiplier: 0.9890281611944065
  Initialization Multiplier: 0.6060521539235909
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.35it/s, loss=-0.000050, elapsed time=0.05, total time=9.39]
[I 2025-06-07 14:30:56,863] Trial 579 finished with value: -4.998890558341012e-05 and parameters: {'learning_rate': 0.005106663070031187, 'sigma_multiplier': 0.9890281611944065, 'num_layers': 1, 'initialization_multiplier': 0.6060521539235909}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 579 final loss: -0.00004999
Trial 580:
  Learning Rate: 0.006529838785567829
  Sigma Multiplier: 1.1570826782899115
  Initialization Multiplier: 0.551364606673881
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:13<00:00, 11.36it/s, loss=-0.000165, elapsed time=0.05, total time=13.5]
[I 2025-06-07 14:31:10,429] Trial 580 finished with value: -0.00016513441673014645 and parameters: {'learning_rate': 0.006529838785567829, 'sigma_multiplier': 1.1570826782899115, 'num_layers': 3, 'initialization_multiplier': 0.551364606673881}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 580 final loss: -0.00016513
Trial 581:
  Learning Rate: 0.004351320744314017
  Sigma Multiplier: 1.0893390362599067
  Initialization Multiplier: 0.6871279340893868
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 17.22it/s, loss=-0.000332, elapsed time=0.06, total time=8.98]
[I 2025-06-07 14:31:19,448] Trial 581 finished with value: -0.0003323838724699379 and parameters: {'learning_rate': 0.004351320744314017, 'sigma_multiplier': 1.0893390362599067, 'num_layers': 1, 'initialization_multiplier': 0.6871279340893868}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 581 final loss: -0.00033238
Trial 582:
  Learning Rate: 0.00621828439507321
  Sigma Multiplier: 1.0370340768277175
  Initialization Multiplier: 0.573122246566628
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:13<00:00, 11.41it/s, loss=-0.000174, elapsed time=0.09, total time=13.4]
[I 2025-06-07 14:31:32,921] Trial 582 finished with value: -0.00017409940707691467 and parameters: {'learning_rate': 0.00621828439507321, 'sigma_multiplier': 1.0370340768277175, 'num_layers': 3, 'initialization_multiplier': 0.573122246566628}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 582 final loss: -0.00017410
Trial 583:
  Learning Rate: 0.007863718953967215
  Sigma Multiplier: 0.9966796621249847
  Initialization Multiplier: 0.5116045000567276
  Number of Layers: 4


Training Progress: 100%|██████████| 150/150 [00:15<00:00,  9.97it/s, loss=-0.000131, elapsed time=0.1, total time=15.3] 
[I 2025-06-07 14:31:48,344] Trial 583 finished with value: -0.00013095315355297912 and parameters: {'learning_rate': 0.007863718953967215, 'sigma_multiplier': 0.9966796621249847, 'num_layers': 4, 'initialization_multiplier': 0.5116045000567276}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 583 final loss: -0.00013095
Trial 584:
  Learning Rate: 0.003285538442107553
  Sigma Multiplier: 0.9766212859266665
  Initialization Multiplier: 0.6294119393360736
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.12it/s, loss=-0.000279, elapsed time=0.06, total time=10.9]
[I 2025-06-07 14:31:59,362] Trial 584 finished with value: -0.0002790009601847465 and parameters: {'learning_rate': 0.003285538442107553, 'sigma_multiplier': 0.9766212859266665, 'num_layers': 2, 'initialization_multiplier': 0.6294119393360736}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 584 final loss: -0.00027900
Trial 585:
  Learning Rate: 0.007393522267474357
  Sigma Multiplier: 1.0476599796830746
  Initialization Multiplier: 0.46844034439627746
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.19it/s, loss=-0.000385, elapsed time=0.05, total time=10.8]
[I 2025-06-07 14:32:10,248] Trial 585 finished with value: -0.0003847056571960205 and parameters: {'learning_rate': 0.007393522267474357, 'sigma_multiplier': 1.0476599796830746, 'num_layers': 2, 'initialization_multiplier': 0.46844034439627746}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 585 final loss: -0.00038471
Trial 586:
  Learning Rate: 0.005456589814108792
  Sigma Multiplier: 1.0864033809893894
  Initialization Multiplier: 0.5516184221346051
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.19it/s, loss=-0.000370, elapsed time=0.05, total time=10.8]
[I 2025-06-07 14:32:21,161] Trial 586 finished with value: -0.0003697418525749576 and parameters: {'learning_rate': 0.005456589814108792, 'sigma_multiplier': 1.0864033809893894, 'num_layers': 2, 'initialization_multiplier': 0.5516184221346051}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 586 final loss: -0.00036974
Trial 587:
  Learning Rate: 0.008276695396012564
  Sigma Multiplier: 1.0076325225614136
  Initialization Multiplier: 0.5139990607714872
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.81it/s, loss=-0.000318, elapsed time=0.07, total time=11.1]
[I 2025-06-07 14:32:32,338] Trial 587 finished with value: -0.0003175131004202259 and parameters: {'learning_rate': 0.008276695396012564, 'sigma_multiplier': 1.0076325225614136, 'num_layers': 2, 'initialization_multiplier': 0.5139990607714872}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 587 final loss: -0.00031751
Trial 588:
  Learning Rate: 0.00638385075999232
  Sigma Multiplier: 0.8833397035911749
  Initialization Multiplier: 0.6651835709207707
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.89it/s, loss=-0.000185, elapsed time=0.05, total time=11.9]
[I 2025-06-07 14:32:44,345] Trial 588 finished with value: -0.00018516637369418815 and parameters: {'learning_rate': 0.00638385075999232, 'sigma_multiplier': 0.8833397035911749, 'num_layers': 2, 'initialization_multiplier': 0.6651835709207707}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 588 final loss: -0.00018517
Trial 589:
  Learning Rate: 0.00962228453168651
  Sigma Multiplier: 0.947608129962642
  Initialization Multiplier: 0.6018153849427126
  Number of Layers: 4


Training Progress: 100%|██████████| 150/150 [00:17<00:00,  8.65it/s, loss=0.000028, elapsed time=0.11, total time=17.9] 
[I 2025-06-07 14:33:02,333] Trial 589 finished with value: 2.7999819717085846e-05 and parameters: {'learning_rate': 0.00962228453168651, 'sigma_multiplier': 0.947608129962642, 'num_layers': 4, 'initialization_multiplier': 0.6018153849427126}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 589 final loss: 0.00002800
Trial 590:
  Learning Rate: 0.007244249772860417
  Sigma Multiplier: 1.034509344493126
  Initialization Multiplier: 0.31807628866075854
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.49it/s, loss=-0.000259, elapsed time=0.05, total time=11.4]
[I 2025-06-07 14:33:13,831] Trial 590 finished with value: -0.0002589836255772042 and parameters: {'learning_rate': 0.007244249772860417, 'sigma_multiplier': 1.034509344493126, 'num_layers': 2, 'initialization_multiplier': 0.31807628866075854}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 590 final loss: -0.00025898
Trial 591:
  Learning Rate: 0.010765017577743544
  Sigma Multiplier: 0.9798436866799907
  Initialization Multiplier: 0.4065271269335967
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.77it/s, loss=-0.000295, elapsed time=0.06, total time=12.1]
[I 2025-06-07 14:33:25,983] Trial 591 finished with value: -0.00029483144513362626 and parameters: {'learning_rate': 0.010765017577743544, 'sigma_multiplier': 0.9798436866799907, 'num_layers': 2, 'initialization_multiplier': 0.4065271269335967}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 591 final loss: -0.00029483
Trial 592:
  Learning Rate: 0.004815332835980337
  Sigma Multiplier: 1.1118762991321265
  Initialization Multiplier: 0.5730656615790708
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.73it/s, loss=-0.000340, elapsed time=0.05, total time=11.2]
[I 2025-06-07 14:33:37,287] Trial 592 finished with value: -0.0003396526219237529 and parameters: {'learning_rate': 0.004815332835980337, 'sigma_multiplier': 1.1118762991321265, 'num_layers': 2, 'initialization_multiplier': 0.5730656615790708}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 592 final loss: -0.00033965
Trial 593:
  Learning Rate: 0.008781320296979532
  Sigma Multiplier: 1.0554303228439266
  Initialization Multiplier: 0.9128438443491182
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.56it/s, loss=-0.000310, elapsed time=0.06, total time=11.3]
[I 2025-06-07 14:33:48,668] Trial 593 finished with value: -0.0003100838588393854 and parameters: {'learning_rate': 0.008781320296979532, 'sigma_multiplier': 1.0554303228439266, 'num_layers': 2, 'initialization_multiplier': 0.9128438443491182}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 593 final loss: -0.00031008
Trial 594:
  Learning Rate: 0.011839126726060654
  Sigma Multiplier: 1.0089473810213057
  Initialization Multiplier: 0.49296332887251476
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.52it/s, loss=-0.000427, elapsed time=0.15, total time=11.4]
[I 2025-06-07 14:34:00,182] Trial 594 finished with value: -0.0004269195160284194 and parameters: {'learning_rate': 0.011839126726060654, 'sigma_multiplier': 1.0089473810213057, 'num_layers': 2, 'initialization_multiplier': 0.49296332887251476}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 594 final loss: -0.00042692
Trial 595:
  Learning Rate: 0.006234951987288581
  Sigma Multiplier: 0.9440882406168218
  Initialization Multiplier: 0.44385681051356296
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.57it/s, loss=-0.000402, elapsed time=0.09, total time=11.4]
[I 2025-06-07 14:34:11,635] Trial 595 finished with value: -0.00040249638422870014 and parameters: {'learning_rate': 0.006234951987288581, 'sigma_multiplier': 0.9440882406168218, 'num_layers': 2, 'initialization_multiplier': 0.44385681051356296}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 595 final loss: -0.00040250
Trial 596:
  Learning Rate: 0.003950858552039628
  Sigma Multiplier: 0.902727095089606
  Initialization Multiplier: 0.53602054972097
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.15it/s, loss=-0.000309, elapsed time=0.06, total time=11.8]
[I 2025-06-07 14:34:23,585] Trial 596 finished with value: -0.00030885056579880635 and parameters: {'learning_rate': 0.003950858552039628, 'sigma_multiplier': 0.902727095089606, 'num_layers': 2, 'initialization_multiplier': 0.53602054972097}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 596 final loss: -0.00030885
Trial 597:
  Learning Rate: 0.007944882371641617
  Sigma Multiplier: 1.07295158980393
  Initialization Multiplier: 1.0955164227401388
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.30it/s, loss=-0.000153, elapsed time=0.07, total time=11.6]
[I 2025-06-07 14:34:35,234] Trial 597 finished with value: -0.00015259337371621595 and parameters: {'learning_rate': 0.007944882371641617, 'sigma_multiplier': 1.07295158980393, 'num_layers': 2, 'initialization_multiplier': 1.0955164227401388}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 597 final loss: -0.00015259
Trial 598:
  Learning Rate: 0.009735471462545851
  Sigma Multiplier: 0.9688993626959002
  Initialization Multiplier: 0.6015005304281067
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.52it/s, loss=-0.000319, elapsed time=0.07, total time=11.4]
[I 2025-06-07 14:34:46,678] Trial 598 finished with value: -0.00031930313203858905 and parameters: {'learning_rate': 0.009735471462545851, 'sigma_multiplier': 0.9688993626959002, 'num_layers': 2, 'initialization_multiplier': 0.6015005304281067}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 598 final loss: -0.00031930
Trial 599:
  Learning Rate: 0.011781628968830478
  Sigma Multiplier: 1.0118376966677969
  Initialization Multiplier: 1.4991036626593002
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.59it/s, loss=-0.000285, elapsed time=0.05, total time=9.34]
[I 2025-06-07 14:34:56,064] Trial 599 finished with value: -0.00028542982173748467 and parameters: {'learning_rate': 0.011781628968830478, 'sigma_multiplier': 1.0118376966677969, 'num_layers': 1, 'initialization_multiplier': 1.4991036626593002}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 599 final loss: -0.00028543
Trial 600:
  Learning Rate: 0.0070018766224903784
  Sigma Multiplier: 0.8246926429157542
  Initialization Multiplier: 0.6549477241412045
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.83it/s, loss=-0.000310, elapsed time=0.08, total time=11.9]
[I 2025-06-07 14:35:08,033] Trial 600 finished with value: -0.00030976691294712795 and parameters: {'learning_rate': 0.0070018766224903784, 'sigma_multiplier': 0.8246926429157542, 'num_layers': 2, 'initialization_multiplier': 0.6549477241412045}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 600 final loss: -0.00030977
Trial 601:
  Learning Rate: 0.0001295823436835382
  Sigma Multiplier: 1.14504552116955
  Initialization Multiplier: 0.5286527514711594
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.27it/s, loss=0.034780, elapsed time=0.06, total time=10.8]
[I 2025-06-07 14:35:18,903] Trial 601 finished with value: 0.03477961185502359 and parameters: {'learning_rate': 0.0001295823436835382, 'sigma_multiplier': 1.14504552116955, 'num_layers': 2, 'initialization_multiplier': 0.5286527514711594}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 601 final loss: 0.03477961
Trial 602:
  Learning Rate: 0.015336774425065584
  Sigma Multiplier: 1.0446919398103396
  Initialization Multiplier: 0.5747535100386003
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.43it/s, loss=-0.000370, elapsed time=0.06, total time=11.5]
[I 2025-06-07 14:35:30,486] Trial 602 finished with value: -0.00036986310719914464 and parameters: {'learning_rate': 0.015336774425065584, 'sigma_multiplier': 1.0446919398103396, 'num_layers': 2, 'initialization_multiplier': 0.5747535100386003}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 602 final loss: -0.00036986
Trial 603:
  Learning Rate: 0.009217365412204652
  Sigma Multiplier: 0.9285634330498803
  Initialization Multiplier: 0.47882430297190176
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.56it/s, loss=-0.000407, elapsed time=0.08, total time=12.7]
[I 2025-06-07 14:35:43,279] Trial 603 finished with value: -0.00040682993088020575 and parameters: {'learning_rate': 0.009217365412204652, 'sigma_multiplier': 0.9285634330498803, 'num_layers': 2, 'initialization_multiplier': 0.47882430297190176}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 603 final loss: -0.00040683
Trial 604:
  Learning Rate: 0.00533729096221635
  Sigma Multiplier: 0.9850106985732044
  Initialization Multiplier: 0.7066886386420745
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.37it/s, loss=-0.000257, elapsed time=0.07, total time=11.6]
[I 2025-06-07 14:35:54,905] Trial 604 finished with value: -0.00025651736704317265 and parameters: {'learning_rate': 0.00533729096221635, 'sigma_multiplier': 0.9850106985732044, 'num_layers': 2, 'initialization_multiplier': 0.7066886386420745}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 604 final loss: -0.00025652
Trial 605:
  Learning Rate: 0.010325438157005703
  Sigma Multiplier: 1.1081554624655159
  Initialization Multiplier: 0.615541324775656
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.41it/s, loss=-0.000347, elapsed time=0.05, total time=10.7]
[I 2025-06-07 14:36:05,648] Trial 605 finished with value: -0.00034693486434211787 and parameters: {'learning_rate': 0.010325438157005703, 'sigma_multiplier': 1.1081554624655159, 'num_layers': 2, 'initialization_multiplier': 0.615541324775656}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 605 final loss: -0.00034693
Trial 606:
  Learning Rate: 0.0028314840155877962
  Sigma Multiplier: 1.0245605480278268
  Initialization Multiplier: 0.5527320264221982
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.60it/s, loss=-0.000389, elapsed time=0.05, total time=11.4]
[I 2025-06-07 14:36:17,102] Trial 606 finished with value: -0.00038916190862004515 and parameters: {'learning_rate': 0.0028314840155877962, 'sigma_multiplier': 1.0245605480278268, 'num_layers': 2, 'initialization_multiplier': 0.5527320264221982}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 606 final loss: -0.00038916
Trial 607:
  Learning Rate: 0.008085245782995163
  Sigma Multiplier: 1.0723360859065905
  Initialization Multiplier: 0.633767850843298
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.48it/s, loss=-0.000403, elapsed time=0.09, total time=11.4]
[I 2025-06-07 14:36:28,567] Trial 607 finished with value: -0.0004030570183643246 and parameters: {'learning_rate': 0.008085245782995163, 'sigma_multiplier': 1.0723360859065905, 'num_layers': 2, 'initialization_multiplier': 0.633767850843298}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 607 final loss: -0.00040306
Trial 608:
  Learning Rate: 0.012692762150963004
  Sigma Multiplier: 0.9530193541857057
  Initialization Multiplier: 0.5106140950197683
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.33it/s, loss=-0.000319, elapsed time=0.08, total time=11.6]
[I 2025-06-07 14:36:40,204] Trial 608 finished with value: -0.0003191733912274768 and parameters: {'learning_rate': 0.012692762150963004, 'sigma_multiplier': 0.9530193541857057, 'num_layers': 2, 'initialization_multiplier': 0.5106140950197683}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 608 final loss: -0.00031917
Trial 609:
  Learning Rate: 0.006143143911040651
  Sigma Multiplier: 0.8618361052462137
  Initialization Multiplier: 0.4412826428711163
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.77it/s, loss=-0.000308, elapsed time=0.05, total time=11.2]
[I 2025-06-07 14:36:51,424] Trial 609 finished with value: -0.0003080135522013076 and parameters: {'learning_rate': 0.006143143911040651, 'sigma_multiplier': 0.8618361052462137, 'num_layers': 2, 'initialization_multiplier': 0.4412826428711163}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 609 final loss: -0.00030801
Trial 610:
  Learning Rate: 0.017470963079564004
  Sigma Multiplier: 0.9959166602298498
  Initialization Multiplier: 0.7246677460598964
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.37it/s, loss=-0.000235, elapsed time=0.07, total time=11.6]
[I 2025-06-07 14:37:03,064] Trial 610 finished with value: -0.00023511938781794182 and parameters: {'learning_rate': 0.017470963079564004, 'sigma_multiplier': 0.9959166602298498, 'num_layers': 2, 'initialization_multiplier': 0.7246677460598964}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 610 final loss: -0.00023512
Trial 611:
  Learning Rate: 0.010768000059927747
  Sigma Multiplier: 1.038947074414477
  Initialization Multiplier: 0.38319645287591775
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.11it/s, loss=-0.000225, elapsed time=0.06, total time=10.9]
[I 2025-06-07 14:37:14,002] Trial 611 finished with value: -0.0002251687549891661 and parameters: {'learning_rate': 0.010768000059927747, 'sigma_multiplier': 1.038947074414477, 'num_layers': 2, 'initialization_multiplier': 0.38319645287591775}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 611 final loss: -0.00022517
Trial 612:
  Learning Rate: 0.007579837631915513
  Sigma Multiplier: 0.9035401018662292
  Initialization Multiplier: 0.584672781641009
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.63it/s, loss=-0.000319, elapsed time=0.09, total time=12.2]
[I 2025-06-07 14:37:26,253] Trial 612 finished with value: -0.00031861733645787723 and parameters: {'learning_rate': 0.007579837631915513, 'sigma_multiplier': 0.9035401018662292, 'num_layers': 2, 'initialization_multiplier': 0.584672781641009}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 612 final loss: -0.00031862
Trial 613:
  Learning Rate: 0.004685080383713583
  Sigma Multiplier: 0.9655478938887984
  Initialization Multiplier: 0.6688097929542626
  Number of Layers: 5


Training Progress: 100%|██████████| 150/150 [00:19<00:00,  7.75it/s, loss=-0.000143, elapsed time=0.11, total time=20]  
[I 2025-06-07 14:37:46,506] Trial 613 finished with value: -0.00014288024470549706 and parameters: {'learning_rate': 0.004685080383713583, 'sigma_multiplier': 0.9655478938887984, 'num_layers': 5, 'initialization_multiplier': 0.6688097929542626}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 613 final loss: -0.00014288
Trial 614:
  Learning Rate: 0.014937619475428697
  Sigma Multiplier: 1.0099080235313893
  Initialization Multiplier: 0.483915531853285
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.53it/s, loss=-0.000393, elapsed time=0.07, total time=11.4]
[I 2025-06-07 14:37:57,939] Trial 614 finished with value: -0.0003933708455163935 and parameters: {'learning_rate': 0.014937619475428697, 'sigma_multiplier': 1.0099080235313893, 'num_layers': 2, 'initialization_multiplier': 0.483915531853285}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 614 final loss: -0.00039337
Trial 615:
  Learning Rate: 0.00910889210402316
  Sigma Multiplier: 1.0986545633641183
  Initialization Multiplier: 0.5383712956601607
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.03it/s, loss=-0.000347, elapsed time=0.07, total time=11.2]
[I 2025-06-07 14:38:09,368] Trial 615 finished with value: -0.00034742552922571246 and parameters: {'learning_rate': 0.00910889210402316, 'sigma_multiplier': 1.0986545633641183, 'num_layers': 2, 'initialization_multiplier': 0.5383712956601607}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 615 final loss: -0.00034743
Trial 616:
  Learning Rate: 0.012212104201066276
  Sigma Multiplier: 0.939447223502361
  Initialization Multiplier: 0.6118365216741317
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:13<00:00, 11.28it/s, loss=-0.000047, elapsed time=0.09, total time=13.7]
[I 2025-06-07 14:38:23,080] Trial 616 finished with value: -4.733269173882921e-05 and parameters: {'learning_rate': 0.012212104201066276, 'sigma_multiplier': 0.939447223502361, 'num_layers': 3, 'initialization_multiplier': 0.6118365216741317}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 616 final loss: -0.00004733
Trial 617:
  Learning Rate: 0.007047120170221414
  Sigma Multiplier: 1.0586739851778348
  Initialization Multiplier: 0.5700934887575366
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.57it/s, loss=-0.000366, elapsed time=0.06, total time=11.4]
[I 2025-06-07 14:38:34,511] Trial 617 finished with value: -0.0003658000805133481 and parameters: {'learning_rate': 0.007047120170221414, 'sigma_multiplier': 1.0586739851778348, 'num_layers': 2, 'initialization_multiplier': 0.5700934887575366}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 617 final loss: -0.00036580
Trial 618:
  Learning Rate: 0.009737934705547008
  Sigma Multiplier: 0.9910397980014953
  Initialization Multiplier: 0.5126755355713029
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.57it/s, loss=-0.000359, elapsed time=0.06, total time=11.4]
[I 2025-06-07 14:38:45,956] Trial 618 finished with value: -0.00035924489506518315 and parameters: {'learning_rate': 0.009737934705547008, 'sigma_multiplier': 0.9910397980014953, 'num_layers': 2, 'initialization_multiplier': 0.5126755355713029}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 618 final loss: -0.00035924
Trial 619:
  Learning Rate: 0.005801301937445217
  Sigma Multiplier: 1.0278335074256817
  Initialization Multiplier: 0.4481397409286443
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.65it/s, loss=-0.000302, elapsed time=0.09, total time=11.3]
[I 2025-06-07 14:38:57,351] Trial 619 finished with value: -0.00030165917618415306 and parameters: {'learning_rate': 0.005801301937445217, 'sigma_multiplier': 1.0278335074256817, 'num_layers': 2, 'initialization_multiplier': 0.4481397409286443}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 619 final loss: -0.00030166
Trial 620:
  Learning Rate: 0.013148658374368133
  Sigma Multiplier: 1.1599980777464647
  Initialization Multiplier: 0.6386091670666489
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.17it/s, loss=-0.000372, elapsed time=0.06, total time=10.9]
[I 2025-06-07 14:39:08,351] Trial 620 finished with value: -0.00037165220266897414 and parameters: {'learning_rate': 0.013148658374368133, 'sigma_multiplier': 1.1599980777464647, 'num_layers': 2, 'initialization_multiplier': 0.6386091670666489}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 620 final loss: -0.00037165
Trial 621:
  Learning Rate: 0.008197581636745143
  Sigma Multiplier: 0.9208254974830136
  Initialization Multiplier: 0.5821905488430912
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.43it/s, loss=-0.000400, elapsed time=0.1, total time=11.4] 
[I 2025-06-07 14:39:19,803] Trial 621 finished with value: -0.00040000974362347383 and parameters: {'learning_rate': 0.008197581636745143, 'sigma_multiplier': 0.9208254974830136, 'num_layers': 2, 'initialization_multiplier': 0.5821905488430912}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 621 final loss: -0.00040001
Trial 622:
  Learning Rate: 0.010908277423827287
  Sigma Multiplier: 1.0776581998549755
  Initialization Multiplier: 0.5295562514097393
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.76it/s, loss=-0.000337, elapsed time=0.07, total time=11.2]
[I 2025-06-07 14:39:31,059] Trial 622 finished with value: -0.0003368189759392556 and parameters: {'learning_rate': 0.010908277423827287, 'sigma_multiplier': 1.0776581998549755, 'num_layers': 2, 'initialization_multiplier': 0.5295562514097393}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 622 final loss: -0.00033682
Trial 623:
  Learning Rate: 0.016342689249994588
  Sigma Multiplier: 0.9665144843735491
  Initialization Multiplier: 0.47381380485167446
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.56it/s, loss=-0.000342, elapsed time=0.05, total time=11.4]
[I 2025-06-07 14:39:42,510] Trial 623 finished with value: -0.00034213474175504754 and parameters: {'learning_rate': 0.016342689249994588, 'sigma_multiplier': 0.9665144843735491, 'num_layers': 2, 'initialization_multiplier': 0.47381380485167446}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 623 final loss: -0.00034213
Trial 624:
  Learning Rate: 0.008941572693160777
  Sigma Multiplier: 0.8760228423843527
  Initialization Multiplier: 0.6114104523727857
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:14<00:00, 10.43it/s, loss=-0.000361, elapsed time=0.07, total time=14.7]
[I 2025-06-07 14:39:57,265] Trial 624 finished with value: -0.0003607748213465884 and parameters: {'learning_rate': 0.008941572693160777, 'sigma_multiplier': 0.8760228423843527, 'num_layers': 2, 'initialization_multiplier': 0.6114104523727857}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 624 final loss: -0.00036077
Trial 625:
  Learning Rate: 0.003703251174634949
  Sigma Multiplier: 1.0108785466313845
  Initialization Multiplier: 0.5548697797543212
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.18it/s, loss=-0.000362, elapsed time=0.05, total time=10.9]
[I 2025-06-07 14:40:08,228] Trial 625 finished with value: -0.0003618208111303256 and parameters: {'learning_rate': 0.003703251174634949, 'sigma_multiplier': 1.0108785466313845, 'num_layers': 2, 'initialization_multiplier': 0.5548697797543212}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 625 final loss: -0.00036182
Trial 626:
  Learning Rate: 0.006728051869328571
  Sigma Multiplier: 1.0570200609376053
  Initialization Multiplier: 0.6836743674102915
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 17.19it/s, loss=-0.000188, elapsed time=0.05, total time=9.02]
[I 2025-06-07 14:40:17,285] Trial 626 finished with value: -0.0001881032008260887 and parameters: {'learning_rate': 0.006728051869328571, 'sigma_multiplier': 1.0570200609376053, 'num_layers': 1, 'initialization_multiplier': 0.6836743674102915}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 626 final loss: -0.00018810
Trial 627:
  Learning Rate: 0.0113535521063887
  Sigma Multiplier: 1.1297073354744547
  Initialization Multiplier: 0.4233060567497713
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.69it/s, loss=-0.000372, elapsed time=0.05, total time=10.4]
[I 2025-06-07 14:40:27,799] Trial 627 finished with value: -0.00037166187539071393 and parameters: {'learning_rate': 0.0113535521063887, 'sigma_multiplier': 1.1297073354744547, 'num_layers': 2, 'initialization_multiplier': 0.4233060567497713}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 627 final loss: -0.00037166
Trial 628:
  Learning Rate: 0.00473572891195858
  Sigma Multiplier: 0.9841031167162347
  Initialization Multiplier: 0.5250068765681645
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.83it/s, loss=-0.000295, elapsed time=0.05, total time=11.1]
[I 2025-06-07 14:40:38,932] Trial 628 finished with value: -0.00029522531967256737 and parameters: {'learning_rate': 0.00473572891195858, 'sigma_multiplier': 0.9841031167162347, 'num_layers': 2, 'initialization_multiplier': 0.5250068765681645}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 628 final loss: -0.00029523
Trial 629:
  Learning Rate: 0.013796828327172802
  Sigma Multiplier: 0.9501468865942123
  Initialization Multiplier: 0.49219580904739646
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.76it/s, loss=-0.000292, elapsed time=0.06, total time=11.3]
[I 2025-06-07 14:40:50,264] Trial 629 finished with value: -0.0002918142194629011 and parameters: {'learning_rate': 0.013796828327172802, 'sigma_multiplier': 0.9501468865942123, 'num_layers': 2, 'initialization_multiplier': 0.49219580904739646}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 629 final loss: -0.00029181
Trial 630:
  Learning Rate: 0.009693922289990716
  Sigma Multiplier: 1.0287745403191735
  Initialization Multiplier: 0.639213127905069
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.11it/s, loss=-0.000403, elapsed time=0.07, total time=11.7]
[I 2025-06-07 14:41:02,024] Trial 630 finished with value: -0.000403174505483067 and parameters: {'learning_rate': 0.009693922289990716, 'sigma_multiplier': 1.0287745403191735, 'num_layers': 2, 'initialization_multiplier': 0.639213127905069}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 630 final loss: -0.00040317
Trial 631:
  Learning Rate: 0.01850254015869139
  Sigma Multiplier: 1.626409412687503
  Initialization Multiplier: 0.5882597956723223
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.59it/s, loss=-0.000273, elapsed time=0.05, total time=10.2]
[I 2025-06-07 14:41:12,261] Trial 631 finished with value: -0.00027309902820085036 and parameters: {'learning_rate': 0.01850254015869139, 'sigma_multiplier': 1.626409412687503, 'num_layers': 2, 'initialization_multiplier': 0.5882597956723223}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 631 final loss: -0.00027310
Trial 632:
  Learning Rate: 0.008243129086774426
  Sigma Multiplier: 0.913770567387073
  Initialization Multiplier: 0.554941211912409
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.50it/s, loss=-0.000337, elapsed time=0.06, total time=11.4]
[I 2025-06-07 14:41:23,711] Trial 632 finished with value: -0.00033716141490924763 and parameters: {'learning_rate': 0.008243129086774426, 'sigma_multiplier': 0.913770567387073, 'num_layers': 2, 'initialization_multiplier': 0.554941211912409}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 632 final loss: -0.00033716
Trial 633:
  Learning Rate: 0.005661501259851788
  Sigma Multiplier: 1.0930129804382283
  Initialization Multiplier: 0.6727898269965402
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.65it/s, loss=-0.000328, elapsed time=0.12, total time=11.4]
[I 2025-06-07 14:41:35,207] Trial 633 finished with value: -0.00032754985589513575 and parameters: {'learning_rate': 0.005661501259851788, 'sigma_multiplier': 1.0930129804382283, 'num_layers': 2, 'initialization_multiplier': 0.6727898269965402}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 633 final loss: -0.00032755
Trial 634:
  Learning Rate: 0.011603406308841399
  Sigma Multiplier: 0.9837601480627233
  Initialization Multiplier: 0.4744074916511464
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.98it/s, loss=-0.000334, elapsed time=0.05, total time=11]  
[I 2025-06-07 14:41:46,257] Trial 634 finished with value: -0.0003338170898553326 and parameters: {'learning_rate': 0.011603406308841399, 'sigma_multiplier': 0.9837601480627233, 'num_layers': 2, 'initialization_multiplier': 0.4744074916511464}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 634 final loss: -0.00033382
Trial 635:
  Learning Rate: 0.007201152952386206
  Sigma Multiplier: 1.0594466399948095
  Initialization Multiplier: 0.36448202608589597
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.86it/s, loss=-0.000313, elapsed time=0.07, total time=11.2]
[I 2025-06-07 14:41:57,455] Trial 635 finished with value: -0.00031280998275834014 and parameters: {'learning_rate': 0.007201152952386206, 'sigma_multiplier': 1.0594466399948095, 'num_layers': 2, 'initialization_multiplier': 0.36448202608589597}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 635 final loss: -0.00031281
Trial 636:
  Learning Rate: 0.013914004548321859
  Sigma Multiplier: 1.0188094015823626
  Initialization Multiplier: 0.6147614526971189
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.69it/s, loss=-0.000267, elapsed time=0.05, total time=9.86]
[I 2025-06-07 14:42:07,361] Trial 636 finished with value: -0.00026689749573398326 and parameters: {'learning_rate': 0.013914004548321859, 'sigma_multiplier': 1.0188094015823626, 'num_layers': 1, 'initialization_multiplier': 0.6147614526971189}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 636 final loss: -0.00026690
Trial 637:
  Learning Rate: 0.023257687235310166
  Sigma Multiplier: 0.9439692066326345
  Initialization Multiplier: 0.5080722612090458
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.54it/s, loss=-0.000267, elapsed time=0.05, total time=11.3]
[I 2025-06-07 14:42:18,748] Trial 637 finished with value: -0.0002669172517351403 and parameters: {'learning_rate': 0.023257687235310166, 'sigma_multiplier': 0.9439692066326345, 'num_layers': 2, 'initialization_multiplier': 0.5080722612090458}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 637 final loss: -0.00026692
Trial 638:
  Learning Rate: 0.01012755302053437
  Sigma Multiplier: 0.7686131550127164
  Initialization Multiplier: 0.5728950356238826
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.98it/s, loss=-0.000211, elapsed time=0.06, total time=11.8]
[I 2025-06-07 14:42:30,667] Trial 638 finished with value: -0.0002111348392352972 and parameters: {'learning_rate': 0.01012755302053437, 'sigma_multiplier': 0.7686131550127164, 'num_layers': 2, 'initialization_multiplier': 0.5728950356238826}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 638 final loss: -0.00021113
Trial 639:
  Learning Rate: 0.006421380902545413
  Sigma Multiplier: 1.1076304049760473
  Initialization Multiplier: 0.7522483724600934
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.86it/s, loss=-0.000316, elapsed time=0.08, total time=11.1]
[I 2025-06-07 14:42:41,821] Trial 639 finished with value: -0.0003163430412215499 and parameters: {'learning_rate': 0.006421380902545413, 'sigma_multiplier': 1.1076304049760473, 'num_layers': 2, 'initialization_multiplier': 0.7522483724600934}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 639 final loss: -0.00031634
Trial 640:
  Learning Rate: 0.00867670451200758
  Sigma Multiplier: 1.2007424881728104
  Initialization Multiplier: 0.42778050317143945
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.25it/s, loss=-0.000321, elapsed time=0.07, total time=10.8]
[I 2025-06-07 14:42:52,684] Trial 640 finished with value: -0.000320775126644034 and parameters: {'learning_rate': 0.00867670451200758, 'sigma_multiplier': 1.2007424881728104, 'num_layers': 2, 'initialization_multiplier': 0.42778050317143945}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 640 final loss: -0.00032078
Trial 641:
  Learning Rate: 0.00431038604559327
  Sigma Multiplier: 0.880506213445359
  Initialization Multiplier: 0.8033859826006691
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.01it/s, loss=-0.000198, elapsed time=0.07, total time=11.8]
[I 2025-06-07 14:43:04,582] Trial 641 finished with value: -0.00019760516419574137 and parameters: {'learning_rate': 0.00431038604559327, 'sigma_multiplier': 0.880506213445359, 'num_layers': 2, 'initialization_multiplier': 0.8033859826006691}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 641 final loss: -0.00019761
Trial 642:
  Learning Rate: 0.01230274645197267
  Sigma Multiplier: 0.976741312224988
  Initialization Multiplier: 0.5461270306643047
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.05it/s, loss=-0.000304, elapsed time=0.06, total time=11.1]
[I 2025-06-07 14:43:15,740] Trial 642 finished with value: -0.00030448097959248724 and parameters: {'learning_rate': 0.01230274645197267, 'sigma_multiplier': 0.976741312224988, 'num_layers': 2, 'initialization_multiplier': 0.5461270306643047}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 642 final loss: -0.00030448
Trial 643:
  Learning Rate: 0.015557248551441192
  Sigma Multiplier: 1.0349542934660856
  Initialization Multiplier: 0.607498607934708
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.98it/s, loss=-0.000286, elapsed time=0.06, total time=11]  
[I 2025-06-07 14:43:26,797] Trial 643 finished with value: -0.0002857172525878605 and parameters: {'learning_rate': 0.015557248551441192, 'sigma_multiplier': 1.0349542934660856, 'num_layers': 2, 'initialization_multiplier': 0.607498607934708}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 643 final loss: -0.00028572
Trial 644:
  Learning Rate: 0.007780953306923447
  Sigma Multiplier: 0.9908502080183662
  Initialization Multiplier: 0.6461519935110712
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.94it/s, loss=-0.000356, elapsed time=0.12, total time=11.1]
[I 2025-06-07 14:43:37,947] Trial 644 finished with value: -0.0003560483411582621 and parameters: {'learning_rate': 0.007780953306923447, 'sigma_multiplier': 0.9908502080183662, 'num_layers': 2, 'initialization_multiplier': 0.6461519935110712}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 644 final loss: -0.00035605
Trial 645:
  Learning Rate: 0.009857572616489868
  Sigma Multiplier: 1.072892704598926
  Initialization Multiplier: 0.5104687994873869
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.30it/s, loss=-0.000434, elapsed time=0.08, total time=10.8]
[I 2025-06-07 14:43:48,826] Trial 645 finished with value: -0.000433973310333205 and parameters: {'learning_rate': 0.009857572616489868, 'sigma_multiplier': 1.072892704598926, 'num_layers': 2, 'initialization_multiplier': 0.5104687994873869}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 645 final loss: -0.00043397
Trial 646:
  Learning Rate: 0.002481436274723902
  Sigma Multiplier: 0.8386541838649169
  Initialization Multiplier: 0.40104383814178807
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:13<00:00, 11.06it/s, loss=-0.000320, elapsed time=0.07, total time=13.9]
[I 2025-06-07 14:44:02,787] Trial 646 finished with value: -0.0003196441864049145 and parameters: {'learning_rate': 0.002481436274723902, 'sigma_multiplier': 0.8386541838649169, 'num_layers': 3, 'initialization_multiplier': 0.40104383814178807}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 646 final loss: -0.00031964
Trial 647:
  Learning Rate: 0.0032486999134658835
  Sigma Multiplier: 0.9246635878184022
  Initialization Multiplier: 0.4639487583767515
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.72it/s, loss=-0.000299, elapsed time=0.05, total time=11.2]
[I 2025-06-07 14:44:14,086] Trial 647 finished with value: -0.0002992214943549796 and parameters: {'learning_rate': 0.0032486999134658835, 'sigma_multiplier': 0.9246635878184022, 'num_layers': 2, 'initialization_multiplier': 0.4639487583767515}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 647 final loss: -0.00029922
Trial 648:
  Learning Rate: 0.005176128801950891
  Sigma Multiplier: 1.0270191532186064
  Initialization Multiplier: 0.7216708251388455
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.98it/s, loss=-0.000386, elapsed time=0.05, total time=11.1]
[I 2025-06-07 14:44:25,262] Trial 648 finished with value: -0.0003861849032979524 and parameters: {'learning_rate': 0.005176128801950891, 'sigma_multiplier': 1.0270191532186064, 'num_layers': 2, 'initialization_multiplier': 0.7216708251388455}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 648 final loss: -0.00038618
Trial 649:
  Learning Rate: 0.011385517933544817
  Sigma Multiplier: 0.9635104633093768
  Initialization Multiplier: 0.5669554583700864
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.35it/s, loss=-0.000280, elapsed time=0.07, total time=11.5]
[I 2025-06-07 14:44:36,839] Trial 649 finished with value: -0.00028048834255751985 and parameters: {'learning_rate': 0.011385517933544817, 'sigma_multiplier': 0.9635104633093768, 'num_layers': 2, 'initialization_multiplier': 0.5669554583700864}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 649 final loss: -0.00028049
Trial 650:
  Learning Rate: 0.006976802424806823
  Sigma Multiplier: 1.003623504942829
  Initialization Multiplier: 0.6004444059596981
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.44it/s, loss=-0.000394, elapsed time=0.06, total time=11.5]
[I 2025-06-07 14:44:48,394] Trial 650 finished with value: -0.000393787966576028 and parameters: {'learning_rate': 0.006976802424806823, 'sigma_multiplier': 1.003623504942829, 'num_layers': 2, 'initialization_multiplier': 0.6004444059596981}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 650 final loss: -0.00039379
Trial 651:
  Learning Rate: 0.009012890190049683
  Sigma Multiplier: 1.052536975616817
  Initialization Multiplier: 0.5361540684533688
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.02it/s, loss=-0.000405, elapsed time=0.06, total time=9.73]
[I 2025-06-07 14:44:58,189] Trial 651 finished with value: -0.00040490326117094557 and parameters: {'learning_rate': 0.009012890190049683, 'sigma_multiplier': 1.052536975616817, 'num_layers': 2, 'initialization_multiplier': 0.5361540684533688}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 651 final loss: -0.00040490
Trial 652:
  Learning Rate: 0.013793385280376507
  Sigma Multiplier: 1.1395921450155488
  Initialization Multiplier: 0.6689077318691241
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.91it/s, loss=-0.000324, elapsed time=0.06, total time=10.5]
[I 2025-06-07 14:45:08,906] Trial 652 finished with value: -0.0003243273370881184 and parameters: {'learning_rate': 0.013793385280376507, 'sigma_multiplier': 1.1395921450155488, 'num_layers': 2, 'initialization_multiplier': 0.6689077318691241}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 652 final loss: -0.00032433
Trial 653:
  Learning Rate: 0.018987142578845352
  Sigma Multiplier: 0.9039149063660635
  Initialization Multiplier: 0.49841647929783345
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.25it/s, loss=-0.000315, elapsed time=0.05, total time=11.8]
[I 2025-06-07 14:45:20,846] Trial 653 finished with value: -0.0003153061818690022 and parameters: {'learning_rate': 0.018987142578845352, 'sigma_multiplier': 0.9039149063660635, 'num_layers': 2, 'initialization_multiplier': 0.49841647929783345}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 653 final loss: -0.00031531
Trial 654:
  Learning Rate: 0.006052180536087992
  Sigma Multiplier: 0.1887411474112507
  Initialization Multiplier: 0.6300072525507769
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:14<00:00, 10.12it/s, loss=0.000108, elapsed time=0.11, total time=15.1] 
[I 2025-06-07 14:45:35,999] Trial 654 finished with value: 0.00010752363832089477 and parameters: {'learning_rate': 0.006052180536087992, 'sigma_multiplier': 0.1887411474112507, 'num_layers': 2, 'initialization_multiplier': 0.6300072525507769}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 654 final loss: 0.00010752
Trial 655:
  Learning Rate: 0.008094967473322322
  Sigma Multiplier: 0.9526241233122964
  Initialization Multiplier: 0.5778283963779736
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.13it/s, loss=-0.000266, elapsed time=0.07, total time=11.7]
[I 2025-06-07 14:45:47,779] Trial 655 finished with value: -0.00026625819385652454 and parameters: {'learning_rate': 0.008094967473322322, 'sigma_multiplier': 0.9526241233122964, 'num_layers': 2, 'initialization_multiplier': 0.5778283963779736}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 655 final loss: -0.00026626
Trial 656:
  Learning Rate: 0.010731650051984197
  Sigma Multiplier: 1.0929567753990403
  Initialization Multiplier: 0.4593727778969069
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.13it/s, loss=-0.000472, elapsed time=0.06, total time=10.9]
[I 2025-06-07 14:45:58,731] Trial 656 finished with value: -0.0004724922704045399 and parameters: {'learning_rate': 0.010731650051984197, 'sigma_multiplier': 1.0929567753990403, 'num_layers': 2, 'initialization_multiplier': 0.4593727778969069}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 656 final loss: -0.00047249
Trial 657:
  Learning Rate: 0.010207062343967866
  Sigma Multiplier: 1.1745286222340885
  Initialization Multiplier: 0.36924355686479193
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.21it/s, loss=-0.000335, elapsed time=0.06, total time=10.9]
[I 2025-06-07 14:46:09,732] Trial 657 finished with value: -0.0003347172127049811 and parameters: {'learning_rate': 0.010207062343967866, 'sigma_multiplier': 1.1745286222340885, 'num_layers': 2, 'initialization_multiplier': 0.36924355686479193}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 657 final loss: -0.00033472
Trial 658:
  Learning Rate: 0.01615866806192552
  Sigma Multiplier: 1.1121191642474257
  Initialization Multiplier: 0.414714727747576
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.26it/s, loss=-0.000234, elapsed time=0.04, total time=9.66]
[I 2025-06-07 14:46:19,446] Trial 658 finished with value: -0.00023388992279479222 and parameters: {'learning_rate': 0.01615866806192552, 'sigma_multiplier': 1.1121191642474257, 'num_layers': 1, 'initialization_multiplier': 0.414714727747576}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 658 final loss: -0.00023389
Trial 659:
  Learning Rate: 0.012276239725497401
  Sigma Multiplier: 1.1469331880420526
  Initialization Multiplier: 1.8429502249064549
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.35it/s, loss=-0.000241, elapsed time=0.05, total time=11.6]
[I 2025-06-07 14:46:31,075] Trial 659 finished with value: -0.00024137614653018012 and parameters: {'learning_rate': 0.012276239725497401, 'sigma_multiplier': 1.1469331880420526, 'num_layers': 2, 'initialization_multiplier': 1.8429502249064549}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 659 final loss: -0.00024138
Trial 660:
  Learning Rate: 0.007990195488756336
  Sigma Multiplier: 0.40048536354989384
  Initialization Multiplier: 0.44044706013855806
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:14<00:00, 10.45it/s, loss=0.000308, elapsed time=0.09, total time=14.7]
[I 2025-06-07 14:46:45,813] Trial 660 finished with value: 0.00030750204930818446 and parameters: {'learning_rate': 0.007990195488756336, 'sigma_multiplier': 0.40048536354989384, 'num_layers': 2, 'initialization_multiplier': 0.44044706013855806}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 660 final loss: 0.00030750
Trial 661:
  Learning Rate: 0.006880774606610322
  Sigma Multiplier: 1.0920810685107383
  Initialization Multiplier: 0.46169258790261064
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.90it/s, loss=-0.000383, elapsed time=0.06, total time=11]  
[I 2025-06-07 14:46:56,911] Trial 661 finished with value: -0.00038326210928583027 and parameters: {'learning_rate': 0.006880774606610322, 'sigma_multiplier': 1.0920810685107383, 'num_layers': 2, 'initialization_multiplier': 0.46169258790261064}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 661 final loss: -0.00038326
Trial 662:
  Learning Rate: 0.00955304827066354
  Sigma Multiplier: 1.110268895087032
  Initialization Multiplier: 0.44660129828124046
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.75it/s, loss=-0.000409, elapsed time=0.05, total time=11.2]
[I 2025-06-07 14:47:08,178] Trial 662 finished with value: -0.000409081649100893 and parameters: {'learning_rate': 0.00955304827066354, 'sigma_multiplier': 1.110268895087032, 'num_layers': 2, 'initialization_multiplier': 0.44660129828124046}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 662 final loss: -0.00040908
Trial 663:
  Learning Rate: 0.004224624505112666
  Sigma Multiplier: 1.404614221296324
  Initialization Multiplier: 0.3990110209860054
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.72it/s, loss=-0.000286, elapsed time=0.07, total time=10.6]
[I 2025-06-07 14:47:18,800] Trial 663 finished with value: -0.0002864309717373391 and parameters: {'learning_rate': 0.004224624505112666, 'sigma_multiplier': 1.404614221296324, 'num_layers': 2, 'initialization_multiplier': 0.3990110209860054}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 663 final loss: -0.00028643
Trial 664:
  Learning Rate: 0.005529540409746293
  Sigma Multiplier: 1.0761835639880566
  Initialization Multiplier: 0.4966741940409274
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.16it/s, loss=-0.000182, elapsed time=0.05, total time=10.9]
[I 2025-06-07 14:47:29,716] Trial 664 finished with value: -0.00018232503188233718 and parameters: {'learning_rate': 0.005529540409746293, 'sigma_multiplier': 1.0761835639880566, 'num_layers': 2, 'initialization_multiplier': 0.4966741940409274}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 664 final loss: -0.00018233
Trial 665:
  Learning Rate: 0.012941860938613986
  Sigma Multiplier: 1.1796371199626232
  Initialization Multiplier: 0.48883190163388196
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.79it/s, loss=-0.000337, elapsed time=0.05, total time=11.3]
[I 2025-06-07 14:47:41,083] Trial 665 finished with value: -0.00033661916866461274 and parameters: {'learning_rate': 0.012941860938613986, 'sigma_multiplier': 1.1796371199626232, 'num_layers': 2, 'initialization_multiplier': 0.48883190163388196}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 665 final loss: -0.00033662
Trial 666:
  Learning Rate: 0.010682650052641936
  Sigma Multiplier: 1.0475277664801694
  Initialization Multiplier: 0.3320143764735526
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.24it/s, loss=-0.000198, elapsed time=0.05, total time=11.7]
[I 2025-06-07 14:47:52,857] Trial 666 finished with value: -0.00019823236798993432 and parameters: {'learning_rate': 0.010682650052641936, 'sigma_multiplier': 1.0475277664801694, 'num_layers': 2, 'initialization_multiplier': 0.3320143764735526}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 666 final loss: -0.00019823
Trial 667:
  Learning Rate: 0.015461424530086918
  Sigma Multiplier: 1.1345387148232122
  Initialization Multiplier: 0.5312964013817978
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.42it/s, loss=-0.000332, elapsed time=0.06, total time=11.6]
[I 2025-06-07 14:48:04,495] Trial 667 finished with value: -0.00033241067523121366 and parameters: {'learning_rate': 0.015461424530086918, 'sigma_multiplier': 1.1345387148232122, 'num_layers': 2, 'initialization_multiplier': 0.5312964013817978}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 667 final loss: -0.00033241
Trial 668:
  Learning Rate: 0.008404509939760328
  Sigma Multiplier: 1.0730132522585016
  Initialization Multiplier: 0.45808749065195375
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.46it/s, loss=-0.000364, elapsed time=0.07, total time=11.5]
[I 2025-06-07 14:48:16,087] Trial 668 finished with value: -0.00036412293396891006 and parameters: {'learning_rate': 0.008404509939760328, 'sigma_multiplier': 1.0730132522585016, 'num_layers': 2, 'initialization_multiplier': 0.45808749065195375}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 668 final loss: -0.00036412
Trial 669:
  Learning Rate: 0.006761535764097453
  Sigma Multiplier: 1.0208656204374615
  Initialization Multiplier: 0.41051371270995196
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.26it/s, loss=-0.000374, elapsed time=0.06, total time=11.6]
[I 2025-06-07 14:48:27,756] Trial 669 finished with value: -0.00037426068260882364 and parameters: {'learning_rate': 0.006761535764097453, 'sigma_multiplier': 1.0208656204374615, 'num_layers': 2, 'initialization_multiplier': 0.41051371270995196}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 669 final loss: -0.00037426
Trial 670:
  Learning Rate: 0.011731197141763974
  Sigma Multiplier: 0.9864169564414597
  Initialization Multiplier: 0.5339003330426719
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.88it/s, loss=-0.000346, elapsed time=0.06, total time=11.9]
[I 2025-06-07 14:48:39,753] Trial 670 finished with value: -0.0003460483276849732 and parameters: {'learning_rate': 0.011731197141763974, 'sigma_multiplier': 0.9864169564414597, 'num_layers': 2, 'initialization_multiplier': 0.5339003330426719}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 670 final loss: -0.00034605
Trial 671:
  Learning Rate: 0.004961607961064224
  Sigma Multiplier: 0.921928585394061
  Initialization Multiplier: 0.49074126291280884
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.97it/s, loss=-0.000340, elapsed time=0.06, total time=11.9]
[I 2025-06-07 14:48:51,682] Trial 671 finished with value: -0.0003399008247615718 and parameters: {'learning_rate': 0.004961607961064224, 'sigma_multiplier': 0.921928585394061, 'num_layers': 2, 'initialization_multiplier': 0.49074126291280884}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 671 final loss: -0.00033990
Trial 672:
  Learning Rate: 0.009242734422850372
  Sigma Multiplier: 1.08411008136022
  Initialization Multiplier: 0.6496154919576812
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.18it/s, loss=-0.000333, elapsed time=0.07, total time=11.7]
[I 2025-06-07 14:49:03,441] Trial 672 finished with value: -0.00033294704362143735 and parameters: {'learning_rate': 0.009242734422850372, 'sigma_multiplier': 1.08411008136022, 'num_layers': 2, 'initialization_multiplier': 0.6496154919576812}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 672 final loss: -0.00033295
Trial 673:
  Learning Rate: 0.020446185748399456
  Sigma Multiplier: 0.8692298779338599
  Initialization Multiplier: 0.7040042644819298
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.46it/s, loss=-0.000290, elapsed time=0.06, total time=10.7]
[I 2025-06-07 14:49:14,237] Trial 673 finished with value: -0.00029003246914341977 and parameters: {'learning_rate': 0.020446185748399456, 'sigma_multiplier': 0.8692298779338599, 'num_layers': 1, 'initialization_multiplier': 0.7040042644819298}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 673 final loss: -0.00029003
Trial 674:
  Learning Rate: 0.05629749833871668
  Sigma Multiplier: 1.0407369853119681
  Initialization Multiplier: 0.45507417052326254
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.55it/s, loss=-0.000269, elapsed time=0.06, total time=12.3]
[I 2025-06-07 14:49:26,572] Trial 674 finished with value: -0.00026850306046821216 and parameters: {'learning_rate': 0.05629749833871668, 'sigma_multiplier': 1.0407369853119681, 'num_layers': 2, 'initialization_multiplier': 0.45507417052326254}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 674 final loss: -0.00026850
Trial 675:
  Learning Rate: 0.03182151471582531
  Sigma Multiplier: 0.9570979300581508
  Initialization Multiplier: 0.595653923591096
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:14<00:00, 10.29it/s, loss=0.000018, elapsed time=0.07, total time=14.9] 
[I 2025-06-07 14:49:41,576] Trial 675 finished with value: 1.7976073504433184e-05 and parameters: {'learning_rate': 0.03182151471582531, 'sigma_multiplier': 0.9570979300581508, 'num_layers': 3, 'initialization_multiplier': 0.595653923591096}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 675 final loss: 0.00001798
Trial 676:
  Learning Rate: 0.0143527636720158
  Sigma Multiplier: 0.1086598865903915
  Initialization Multiplier: 0.5277742350149405
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:15<00:00,  9.50it/s, loss=-0.000037, elapsed time=0.11, total time=16.2]
[I 2025-06-07 14:49:57,925] Trial 676 finished with value: -3.6960161076825345e-05 and parameters: {'learning_rate': 0.0143527636720158, 'sigma_multiplier': 0.1086598865903915, 'num_layers': 2, 'initialization_multiplier': 0.5277742350149405}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 676 final loss: -0.00003696
Trial 677:
  Learning Rate: 0.007939637968226622
  Sigma Multiplier: 1.0020396307705814
  Initialization Multiplier: 0.5604925147213604
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.30it/s, loss=-0.000397, elapsed time=0.07, total time=11.7]
[I 2025-06-07 14:50:09,684] Trial 677 finished with value: -0.00039695816380608864 and parameters: {'learning_rate': 0.007939637968226622, 'sigma_multiplier': 1.0020396307705814, 'num_layers': 2, 'initialization_multiplier': 0.5604925147213604}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 677 final loss: -0.00039696
Trial 678:
  Learning Rate: 0.01095225230791773
  Sigma Multiplier: 1.1162791830180734
  Initialization Multiplier: 0.4958938793119181
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.06it/s, loss=-0.000413, elapsed time=0.08, total time=11.9]
[I 2025-06-07 14:50:21,780] Trial 678 finished with value: -0.00041311979509484965 and parameters: {'learning_rate': 0.01095225230791773, 'sigma_multiplier': 1.1162791830180734, 'num_layers': 2, 'initialization_multiplier': 0.4958938793119181}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 678 final loss: -0.00041312
Trial 679:
  Learning Rate: 0.005765921396768379
  Sigma Multiplier: 1.0517032664354202
  Initialization Multiplier: 1.0032510633604776
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.97it/s, loss=0.000002, elapsed time=0.09, total time=11.9] 
[I 2025-06-07 14:50:33,729] Trial 679 finished with value: 2.171645437408748e-06 and parameters: {'learning_rate': 0.005765921396768379, 'sigma_multiplier': 1.0517032664354202, 'num_layers': 2, 'initialization_multiplier': 1.0032510633604776}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 679 final loss: 0.00000217
Trial 680:
  Learning Rate: 0.00910059883124547
  Sigma Multiplier: 0.9787984538041062
  Initialization Multiplier: 0.34984699136749364
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.74it/s, loss=-0.000371, elapsed time=0.06, total time=12.1]
[I 2025-06-07 14:50:45,923] Trial 680 finished with value: -0.00037143051435907276 and parameters: {'learning_rate': 0.00910059883124547, 'sigma_multiplier': 0.9787984538041062, 'num_layers': 2, 'initialization_multiplier': 0.34984699136749364}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 680 final loss: -0.00037143
Trial 681:
  Learning Rate: 0.012956143603835204
  Sigma Multiplier: 0.9330124271630531
  Initialization Multiplier: 0.6214961640223511
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.38it/s, loss=-0.000306, elapsed time=0.08, total time=12.4]
[I 2025-06-07 14:50:58,402] Trial 681 finished with value: -0.0003057237080564801 and parameters: {'learning_rate': 0.012956143603835204, 'sigma_multiplier': 0.9330124271630531, 'num_layers': 2, 'initialization_multiplier': 0.6214961640223511}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 681 final loss: -0.00030572
Trial 682:
  Learning Rate: 0.007463632260387162
  Sigma Multiplier: 0.5381826881511591
  Initialization Multiplier: 0.5560172396021971
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:14<00:00, 10.67it/s, loss=0.000499, elapsed time=0.1, total time=14.4] 
[I 2025-06-07 14:51:12,847] Trial 682 finished with value: 0.0004990336114147165 and parameters: {'learning_rate': 0.007463632260387162, 'sigma_multiplier': 0.5381826881511591, 'num_layers': 2, 'initialization_multiplier': 0.5560172396021971}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 682 final loss: 0.00049903
Trial 683:
  Learning Rate: 0.003839319704834077
  Sigma Multiplier: 1.009371204969356
  Initialization Multiplier: 0.424116005488882
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.28it/s, loss=-0.000354, elapsed time=0.05, total time=11.6]
[I 2025-06-07 14:51:24,539] Trial 683 finished with value: -0.0003540777214903338 and parameters: {'learning_rate': 0.003839319704834077, 'sigma_multiplier': 1.009371204969356, 'num_layers': 2, 'initialization_multiplier': 0.424116005488882}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 683 final loss: -0.00035408
Trial 684:
  Learning Rate: 0.016792686174739863
  Sigma Multiplier: 0.8930932580444382
  Initialization Multiplier: 0.6794179070834894
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.73it/s, loss=-0.000276, elapsed time=0.1, total time=12.1] 
[I 2025-06-07 14:51:36,700] Trial 684 finished with value: -0.0002757985788556281 and parameters: {'learning_rate': 0.016792686174739863, 'sigma_multiplier': 0.8930932580444382, 'num_layers': 2, 'initialization_multiplier': 0.6794179070834894}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 684 final loss: -0.00027580
Trial 685:
  Learning Rate: 0.009824357723397487
  Sigma Multiplier: 1.0858017900842174
  Initialization Multiplier: 0.5900947798138143
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.51it/s, loss=-0.000442, elapsed time=0.05, total time=11.4]
[I 2025-06-07 14:51:48,173] Trial 685 finished with value: -0.00044197161688646876 and parameters: {'learning_rate': 0.009824357723397487, 'sigma_multiplier': 1.0858017900842174, 'num_layers': 2, 'initialization_multiplier': 0.5900947798138143}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 685 final loss: -0.00044197
Trial 686:
  Learning Rate: 0.010683104662881745
  Sigma Multiplier: 1.1050669353940499
  Initialization Multiplier: 0.5195292886298754
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.78it/s, loss=-0.000325, elapsed time=0.05, total time=11.2]
[I 2025-06-07 14:51:59,482] Trial 686 finished with value: -0.0003245026007086369 and parameters: {'learning_rate': 0.010683104662881745, 'sigma_multiplier': 1.1050669353940499, 'num_layers': 2, 'initialization_multiplier': 0.5195292886298754}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 686 final loss: -0.00032450
Trial 687:
  Learning Rate: 0.012911438997816684
  Sigma Multiplier: 1.1602938910774778
  Initialization Multiplier: 0.48315084500581773
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.76it/s, loss=-0.000299, elapsed time=0.07, total time=11.2]
[I 2025-06-07 14:52:10,769] Trial 687 finished with value: -0.0002988042195869196 and parameters: {'learning_rate': 0.012911438997816684, 'sigma_multiplier': 1.1602938910774778, 'num_layers': 2, 'initialization_multiplier': 0.48315084500581773}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 687 final loss: -0.00029880
Trial 688:
  Learning Rate: 0.011098572546921322
  Sigma Multiplier: 1.1952945973628775
  Initialization Multiplier: 0.5654729791849159
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.79it/s, loss=-0.000372, elapsed time=0.06, total time=11.1]
[I 2025-06-07 14:52:21,954] Trial 688 finished with value: -0.0003721440295627403 and parameters: {'learning_rate': 0.011098572546921322, 'sigma_multiplier': 1.1952945973628775, 'num_layers': 2, 'initialization_multiplier': 0.5654729791849159}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 688 final loss: -0.00037214
Trial 689:
  Learning Rate: 0.01486562449523167
  Sigma Multiplier: 1.1391257145945133
  Initialization Multiplier: 0.5937317460218111
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.71it/s, loss=-0.000371, elapsed time=0.07, total time=11.2]
[I 2025-06-07 14:52:33,236] Trial 689 finished with value: -0.0003706853666314314 and parameters: {'learning_rate': 0.01486562449523167, 'sigma_multiplier': 1.1391257145945133, 'num_layers': 2, 'initialization_multiplier': 0.5937317460218111}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 689 final loss: -0.00037069
Trial 690:
  Learning Rate: 0.009861334739564243
  Sigma Multiplier: 1.0729676101256678
  Initialization Multiplier: 0.4582345621712268
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.38it/s, loss=-0.000424, elapsed time=0.06, total time=11.5]
[I 2025-06-07 14:52:44,831] Trial 690 finished with value: -0.0004235016656750626 and parameters: {'learning_rate': 0.009861334739564243, 'sigma_multiplier': 1.0729676101256678, 'num_layers': 2, 'initialization_multiplier': 0.4582345621712268}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 690 final loss: -0.00042350
Trial 691:
  Learning Rate: 0.01325875100714584
  Sigma Multiplier: 1.1092291150136104
  Initialization Multiplier: 0.5219125291744774
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.02it/s, loss=-0.000390, elapsed time=0.08, total time=11.9]
[I 2025-06-07 14:52:56,768] Trial 691 finished with value: -0.00038985829724772896 and parameters: {'learning_rate': 0.01325875100714584, 'sigma_multiplier': 1.1092291150136104, 'num_layers': 2, 'initialization_multiplier': 0.5219125291744774}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 691 final loss: -0.00038986
Trial 692:
  Learning Rate: 0.009554630859029157
  Sigma Multiplier: 1.0796621053537905
  Initialization Multiplier: 0.5862208243786744
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.33it/s, loss=-0.000365, elapsed time=0.06, total time=11.6]
[I 2025-06-07 14:53:08,473] Trial 692 finished with value: -0.0003650859538189092 and parameters: {'learning_rate': 0.009554630859029157, 'sigma_multiplier': 1.0796621053537905, 'num_layers': 2, 'initialization_multiplier': 0.5862208243786744}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 692 final loss: -0.00036509
Trial 693:
  Learning Rate: 0.011865011889422659
  Sigma Multiplier: 1.0482563204396456
  Initialization Multiplier: 0.5412228192041886
  Number of Layers: 4


Training Progress: 100%|██████████| 150/150 [00:15<00:00,  9.72it/s, loss=-0.000062, elapsed time=0.11, total time=15.7]
[I 2025-06-07 14:53:24,280] Trial 693 finished with value: -6.236536848141586e-05 and parameters: {'learning_rate': 0.011865011889422659, 'sigma_multiplier': 1.0482563204396456, 'num_layers': 4, 'initialization_multiplier': 0.5412228192041886}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 693 final loss: -0.00006237
Trial 694:
  Learning Rate: 0.017035441397953113
  Sigma Multiplier: 1.0879774737770591
  Initialization Multiplier: 0.4007535412053056
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.32it/s, loss=-0.000437, elapsed time=0.05, total time=11.7]
[I 2025-06-07 14:53:35,996] Trial 694 finished with value: -0.0004372130519330987 and parameters: {'learning_rate': 0.017035441397953113, 'sigma_multiplier': 1.0879774737770591, 'num_layers': 2, 'initialization_multiplier': 0.4007535412053056}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 694 final loss: -0.00043721
Trial 695:
  Learning Rate: 0.01898412652314497
  Sigma Multiplier: 1.2253357076331821
  Initialization Multiplier: 0.43818901668456295
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.75it/s, loss=-0.000273, elapsed time=0.05, total time=11.2]
[I 2025-06-07 14:53:47,258] Trial 695 finished with value: -0.000273336646122475 and parameters: {'learning_rate': 0.01898412652314497, 'sigma_multiplier': 1.2253357076331821, 'num_layers': 2, 'initialization_multiplier': 0.43818901668456295}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 695 final loss: -0.00027334
Trial 696:
  Learning Rate: 0.027220908370074348
  Sigma Multiplier: 1.1557171157447683
  Initialization Multiplier: 0.32217331125380794
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.30it/s, loss=-0.000293, elapsed time=0.07, total time=11.7]
[I 2025-06-07 14:53:58,967] Trial 696 finished with value: -0.00029265771353549863 and parameters: {'learning_rate': 0.027220908370074348, 'sigma_multiplier': 1.1557171157447683, 'num_layers': 2, 'initialization_multiplier': 0.32217331125380794}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 696 final loss: -0.00029266
Trial 697:
  Learning Rate: 0.02424058068433244
  Sigma Multiplier: 1.1300774561842701
  Initialization Multiplier: 0.4147621730712452
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.00it/s, loss=-0.000247, elapsed time=0.05, total time=11]  
[I 2025-06-07 14:54:10,059] Trial 697 finished with value: -0.00024722607864892615 and parameters: {'learning_rate': 0.02424058068433244, 'sigma_multiplier': 1.1300774561842701, 'num_layers': 2, 'initialization_multiplier': 0.4147621730712452}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 697 final loss: -0.00024723
Trial 698:
  Learning Rate: 0.015605645526991077
  Sigma Multiplier: 1.0901158886482192
  Initialization Multiplier: 0.36060140990384687
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.19it/s, loss=-0.000296, elapsed time=0.05, total time=9.63]
[I 2025-06-07 14:54:19,742] Trial 698 finished with value: -0.00029581093565982504 and parameters: {'learning_rate': 0.015605645526991077, 'sigma_multiplier': 1.0901158886482192, 'num_layers': 1, 'initialization_multiplier': 0.36060140990384687}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 698 final loss: -0.00029581
Trial 699:
  Learning Rate: 0.01958176260249642
  Sigma Multiplier: 1.1223180392275556
  Initialization Multiplier: 0.38817622036495825
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.25it/s, loss=-0.000258, elapsed time=0.05, total time=10.9]
[I 2025-06-07 14:54:30,674] Trial 699 finished with value: -0.00025765474071019246 and parameters: {'learning_rate': 0.01958176260249642, 'sigma_multiplier': 1.1223180392275556, 'num_layers': 2, 'initialization_multiplier': 0.38817622036495825}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 699 final loss: -0.00025765
Trial 700:
  Learning Rate: 0.021977203273361196
  Sigma Multiplier: 1.0807482168383138
  Initialization Multiplier: 0.422709610034917
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.74it/s, loss=-0.000372, elapsed time=0.05, total time=11.2]
[I 2025-06-07 14:54:41,969] Trial 700 finished with value: -0.00037215581923910954 and parameters: {'learning_rate': 0.021977203273361196, 'sigma_multiplier': 1.0807482168383138, 'num_layers': 2, 'initialization_multiplier': 0.422709610034917}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 700 final loss: -0.00037216
Trial 701:
  Learning Rate: 0.017697676783968542
  Sigma Multiplier: 1.173983051284736
  Initialization Multiplier: 0.3610054920748237
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.98it/s, loss=-0.000319, elapsed time=0.05, total time=11]  
[I 2025-06-07 14:54:53,027] Trial 701 finished with value: -0.0003187871227496458 and parameters: {'learning_rate': 0.017697676783968542, 'sigma_multiplier': 1.173983051284736, 'num_layers': 2, 'initialization_multiplier': 0.3610054920748237}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 701 final loss: -0.00031879
Trial 702:
  Learning Rate: 0.01412020580936548
  Sigma Multiplier: 1.0624611692837607
  Initialization Multiplier: 0.38749130251585945
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.71it/s, loss=-0.000290, elapsed time=0.08, total time=11.3]
[I 2025-06-07 14:55:04,419] Trial 702 finished with value: -0.00029027058224554136 and parameters: {'learning_rate': 0.01412020580936548, 'sigma_multiplier': 1.0624611692837607, 'num_layers': 2, 'initialization_multiplier': 0.38749130251585945}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 702 final loss: -0.00029027
Trial 703:
  Learning Rate: 0.017780847208877123
  Sigma Multiplier: 1.1203932152415963
  Initialization Multiplier: 0.4626361988000485
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.97it/s, loss=-0.000354, elapsed time=0.05, total time=11.1]
[I 2025-06-07 14:55:15,596] Trial 703 finished with value: -0.0003538740922751803 and parameters: {'learning_rate': 0.017780847208877123, 'sigma_multiplier': 1.1203932152415963, 'num_layers': 2, 'initialization_multiplier': 0.4626361988000485}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 703 final loss: -0.00035387
Trial 704:
  Learning Rate: 0.01684367591197933
  Sigma Multiplier: 1.044263622170647
  Initialization Multiplier: 0.3967816767899135
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.85it/s, loss=-0.000310, elapsed time=0.06, total time=11.1]
[I 2025-06-07 14:55:26,802] Trial 704 finished with value: -0.0003102975129483081 and parameters: {'learning_rate': 0.01684367591197933, 'sigma_multiplier': 1.044263622170647, 'num_layers': 2, 'initialization_multiplier': 0.3967816767899135}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 704 final loss: -0.00031030
Trial 705:
  Learning Rate: 0.012448644729237212
  Sigma Multiplier: 1.0920286204863596
  Initialization Multiplier: 0.3275971604636846
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.87it/s, loss=-0.000395, elapsed time=0.06, total time=11.2]
[I 2025-06-07 14:55:38,030] Trial 705 finished with value: -0.000395013507892746 and parameters: {'learning_rate': 0.012448644729237212, 'sigma_multiplier': 1.0920286204863596, 'num_layers': 2, 'initialization_multiplier': 0.3275971604636846}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 705 final loss: -0.00039501
Trial 706:
  Learning Rate: 0.01495031391986899
  Sigma Multiplier: 1.03772177395292
  Initialization Multiplier: 0.4810389923863753
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.95it/s, loss=-0.000346, elapsed time=0.05, total time=11]  
[I 2025-06-07 14:55:49,115] Trial 706 finished with value: -0.0003457128915098667 and parameters: {'learning_rate': 0.01495031391986899, 'sigma_multiplier': 1.03772177395292, 'num_layers': 2, 'initialization_multiplier': 0.4810389923863753}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 706 final loss: -0.00034571
Trial 707:
  Learning Rate: 0.01100076127166456
  Sigma Multiplier: 1.1324683476924757
  Initialization Multiplier: 0.4364692879944317
  Number of Layers: 5


Training Progress: 100%|██████████| 150/150 [00:17<00:00,  8.64it/s, loss=-0.000030, elapsed time=0.13, total time=17.8]
[I 2025-06-07 14:56:06,964] Trial 707 finished with value: -3.0233844067362896e-05 and parameters: {'learning_rate': 0.01100076127166456, 'sigma_multiplier': 1.1324683476924757, 'num_layers': 5, 'initialization_multiplier': 0.4364692879944317}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 707 final loss: -0.00003023
Trial 708:
  Learning Rate: 0.008838436224836136
  Sigma Multiplier: 1.0799907450678683
  Initialization Multiplier: 1.6533061327903766
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:13<00:00, 11.43it/s, loss=-0.000021, elapsed time=0.08, total time=13.5]
[I 2025-06-07 14:56:20,541] Trial 708 finished with value: -2.076999429610814e-05 and parameters: {'learning_rate': 0.008838436224836136, 'sigma_multiplier': 1.0799907450678683, 'num_layers': 3, 'initialization_multiplier': 1.6533061327903766}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 708 final loss: -0.00002077
Trial 709:
  Learning Rate: 0.01263660487577371
  Sigma Multiplier: 1.028133735070744
  Initialization Multiplier: 0.528381587285808
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.88it/s, loss=-0.000311, elapsed time=0.07, total time=11.2]
[I 2025-06-07 14:56:31,779] Trial 709 finished with value: -0.00031071278312378705 and parameters: {'learning_rate': 0.01263660487577371, 'sigma_multiplier': 1.028133735070744, 'num_layers': 2, 'initialization_multiplier': 0.528381587285808}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 709 final loss: -0.00031071
Trial 710:
  Learning Rate: 0.01068581027422156
  Sigma Multiplier: 1.1686392437867237
  Initialization Multiplier: 0.49221753348326086
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.34it/s, loss=-0.000314, elapsed time=0.06, total time=10.7]
[I 2025-06-07 14:56:42,571] Trial 710 finished with value: -0.00031392464635039203 and parameters: {'learning_rate': 0.01068581027422156, 'sigma_multiplier': 1.1686392437867237, 'num_layers': 2, 'initialization_multiplier': 0.49221753348326086}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 710 final loss: -0.00031392
Trial 711:
  Learning Rate: 0.014729699648761384
  Sigma Multiplier: 1.0504329824064094
  Initialization Multiplier: 0.8350621428871237
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.75it/s, loss=-0.000153, elapsed time=0.07, total time=11.2]
[I 2025-06-07 14:56:53,827] Trial 711 finished with value: -0.0001529206595297692 and parameters: {'learning_rate': 0.014729699648761384, 'sigma_multiplier': 1.0504329824064094, 'num_layers': 2, 'initialization_multiplier': 0.8350621428871237}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 711 final loss: -0.00015292
Trial 712:
  Learning Rate: 0.008668310615911917
  Sigma Multiplier: 1.7681393836485024
  Initialization Multiplier: 0.46510441783497664
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.15it/s, loss=-0.000258, elapsed time=0.04, total time=10.2]
[I 2025-06-07 14:57:04,088] Trial 712 finished with value: -0.00025843485101281137 and parameters: {'learning_rate': 0.008668310615911917, 'sigma_multiplier': 1.7681393836485024, 'num_layers': 2, 'initialization_multiplier': 0.46510441783497664}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 712 final loss: -0.00025843
Trial 713:
  Learning Rate: 0.00740280183627509
  Sigma Multiplier: 1.0144284849358738
  Initialization Multiplier: 0.5022754039088486
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.60it/s, loss=-0.000335, elapsed time=0.07, total time=11.4]
[I 2025-06-07 14:57:15,556] Trial 713 finished with value: -0.0003354202781129376 and parameters: {'learning_rate': 0.00740280183627509, 'sigma_multiplier': 1.0144284849358738, 'num_layers': 2, 'initialization_multiplier': 0.5022754039088486}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 713 final loss: -0.00033542
Trial 714:
  Learning Rate: 0.019910810026564262
  Sigma Multiplier: 1.1129457497602848
  Initialization Multiplier: 0.38058234770595006
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.41it/s, loss=-0.000388, elapsed time=0.06, total time=10.7]
[I 2025-06-07 14:57:26,313] Trial 714 finished with value: -0.0003884895693556323 and parameters: {'learning_rate': 0.019910810026564262, 'sigma_multiplier': 1.1129457497602848, 'num_layers': 2, 'initialization_multiplier': 0.38058234770595006}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 714 final loss: -0.00038849
Trial 715:
  Learning Rate: 0.010084157427012509
  Sigma Multiplier: 1.0786615370039951
  Initialization Multiplier: 0.2723730004528202
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 17.31it/s, loss=-0.000292, elapsed time=0.05, total time=9.04]
[I 2025-06-07 14:57:35,395] Trial 715 finished with value: -0.00029204033445634416 and parameters: {'learning_rate': 0.010084157427012509, 'sigma_multiplier': 1.0786615370039951, 'num_layers': 1, 'initialization_multiplier': 0.2723730004528202}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 715 final loss: -0.00029204
Trial 716:
  Learning Rate: 0.09989775617068188
  Sigma Multiplier: 1.0084502680311604
  Initialization Multiplier: 0.4434593451164424
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.95it/s, loss=0.000268, elapsed time=0.06, total time=11.1] 
[I 2025-06-07 14:57:46,539] Trial 716 finished with value: 0.000268096293037651 and parameters: {'learning_rate': 0.09989775617068188, 'sigma_multiplier': 1.0084502680311604, 'num_layers': 2, 'initialization_multiplier': 0.4434593451164424}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 716 final loss: 0.00026810
Trial 717:
  Learning Rate: 0.0005709781506346984
  Sigma Multiplier: 1.0510876238426796
  Initialization Multiplier: 1.2693412075023647
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.53it/s, loss=0.120033, elapsed time=0.05, total time=12.3]
[I 2025-06-07 14:57:58,856] Trial 717 finished with value: 0.12003298539947149 and parameters: {'learning_rate': 0.0005709781506346984, 'sigma_multiplier': 1.0510876238426796, 'num_layers': 2, 'initialization_multiplier': 1.2693412075023647}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 717 final loss: 0.12003299
Trial 718:
  Learning Rate: 0.012528641427232229
  Sigma Multiplier: 1.2029157049410548
  Initialization Multiplier: 0.5499414088546191
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.86it/s, loss=-0.000365, elapsed time=0.08, total time=11.1]
[I 2025-06-07 14:58:10,064] Trial 718 finished with value: -0.00036518097991277554 and parameters: {'learning_rate': 0.012528641427232229, 'sigma_multiplier': 1.2029157049410548, 'num_layers': 2, 'initialization_multiplier': 0.5499414088546191}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 718 final loss: -0.00036518
Trial 719:
  Learning Rate: 0.015736203468117733
  Sigma Multiplier: 1.007346952076405
  Initialization Multiplier: 0.5009242870021928
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.25it/s, loss=-0.000390, elapsed time=0.07, total time=12.6]
[I 2025-06-07 14:58:22,717] Trial 719 finished with value: -0.00038994906104798145 and parameters: {'learning_rate': 0.015736203468117733, 'sigma_multiplier': 1.007346952076405, 'num_layers': 2, 'initialization_multiplier': 0.5009242870021928}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 719 final loss: -0.00038995
Trial 720:
  Learning Rate: 0.009346984424080619
  Sigma Multiplier: 1.102177292698064
  Initialization Multiplier: 0.6124568967552991
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.89it/s, loss=-0.000413, elapsed time=0.05, total time=11.1]
[I 2025-06-07 14:58:33,906] Trial 720 finished with value: -0.0004129200523295701 and parameters: {'learning_rate': 0.009346984424080619, 'sigma_multiplier': 1.102177292698064, 'num_layers': 2, 'initialization_multiplier': 0.6124568967552991}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 720 final loss: -0.00041292
Trial 721:
  Learning Rate: 6.790148566744838e-05
  Sigma Multiplier: 1.0546262017772166
  Initialization Multiplier: 0.5663793480086711
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.78it/s, loss=0.056004, elapsed time=0.06, total time=12.1]
[I 2025-06-07 14:58:46,024] Trial 721 finished with value: 0.05600399375529993 and parameters: {'learning_rate': 6.790148566744838e-05, 'sigma_multiplier': 1.0546262017772166, 'num_layers': 2, 'initialization_multiplier': 0.5663793480086711}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 721 final loss: 0.05600399
Trial 722:
  Learning Rate: 0.007978871971005651
  Sigma Multiplier: 0.9876443156865546
  Initialization Multiplier: 0.5220970465361662
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.19it/s, loss=-0.000454, elapsed time=0.09, total time=12.8]
[I 2025-06-07 14:58:58,883] Trial 722 finished with value: -0.0004544466546560395 and parameters: {'learning_rate': 0.007978871971005651, 'sigma_multiplier': 0.9876443156865546, 'num_layers': 2, 'initialization_multiplier': 0.5220970465361662}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 722 final loss: -0.00045445
Trial 723:
  Learning Rate: 0.0068879753394987505
  Sigma Multiplier: 0.9729219933104395
  Initialization Multiplier: 0.6544212874238073
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.63it/s, loss=-0.000445, elapsed time=0.07, total time=12.2]
[I 2025-06-07 14:59:11,167] Trial 723 finished with value: -0.00044528176210073913 and parameters: {'learning_rate': 0.0068879753394987505, 'sigma_multiplier': 0.9729219933104395, 'num_layers': 2, 'initialization_multiplier': 0.6544212874238073}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 723 final loss: -0.00044528
Trial 724:
  Learning Rate: 0.006451335838850762
  Sigma Multiplier: 0.9645208141105678
  Initialization Multiplier: 0.7565773569113421
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.49it/s, loss=-0.000133, elapsed time=0.07, total time=12.3]
[I 2025-06-07 14:59:23,550] Trial 724 finished with value: -0.00013251419435078305 and parameters: {'learning_rate': 0.006451335838850762, 'sigma_multiplier': 0.9645208141105678, 'num_layers': 2, 'initialization_multiplier': 0.7565773569113421}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 724 final loss: -0.00013251
Trial 725:
  Learning Rate: 0.006654958440340148
  Sigma Multiplier: 0.9740274940879385
  Initialization Multiplier: 0.6769096867102657
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.50it/s, loss=-0.000359, elapsed time=0.05, total time=12.5]
[I 2025-06-07 14:59:36,176] Trial 725 finished with value: -0.0003591361686811808 and parameters: {'learning_rate': 0.006654958440340148, 'sigma_multiplier': 0.9740274940879385, 'num_layers': 2, 'initialization_multiplier': 0.6769096867102657}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 725 final loss: -0.00035914
Trial 726:
  Learning Rate: 0.00743647041910782
  Sigma Multiplier: 0.991739841005479
  Initialization Multiplier: 0.6493619483824485
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.82it/s, loss=-0.000357, elapsed time=0.07, total time=12]  
[I 2025-06-07 14:59:48,276] Trial 726 finished with value: -0.0003572620636701813 and parameters: {'learning_rate': 0.00743647041910782, 'sigma_multiplier': 0.991739841005479, 'num_layers': 2, 'initialization_multiplier': 0.6493619483824485}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 726 final loss: -0.00035726
Trial 727:
  Learning Rate: 0.006127808884488671
  Sigma Multiplier: 0.9497950049421064
  Initialization Multiplier: 0.649388748508519
  Number of Layers: 4


Training Progress: 100%|██████████| 150/150 [00:19<00:00,  7.50it/s, loss=-0.000111, elapsed time=0.09, total time=20.3]
[I 2025-06-07 15:00:08,699] Trial 727 finished with value: -0.0001114034018248895 and parameters: {'learning_rate': 0.006127808884488671, 'sigma_multiplier': 0.9497950049421064, 'num_layers': 4, 'initialization_multiplier': 0.649388748508519}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 727 final loss: -0.00011140
Trial 728:
  Learning Rate: 0.00701393589558548
  Sigma Multiplier: 1.0268889627058444
  Initialization Multiplier: 0.9573241683293483
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.85it/s, loss=-0.000198, elapsed time=0.07, total time=12.1]
[I 2025-06-07 15:00:20,883] Trial 728 finished with value: -0.00019840935465352994 and parameters: {'learning_rate': 0.00701393589558548, 'sigma_multiplier': 1.0268889627058444, 'num_layers': 2, 'initialization_multiplier': 0.9573241683293483}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 728 final loss: -0.00019841
Trial 729:
  Learning Rate: 0.007650999070922918
  Sigma Multiplier: 0.9832668038586734
  Initialization Multiplier: 0.6414309545482871
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.19it/s, loss=-0.000400, elapsed time=0.07, total time=11.7]
[I 2025-06-07 15:00:32,641] Trial 729 finished with value: -0.0004002737755506098 and parameters: {'learning_rate': 0.007650999070922918, 'sigma_multiplier': 0.9832668038586734, 'num_layers': 2, 'initialization_multiplier': 0.6414309545482871}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 729 final loss: -0.00040027
Trial 730:
  Learning Rate: 0.0059692558117027264
  Sigma Multiplier: 0.8187134221515187
  Initialization Multiplier: 0.717837552909417
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.48it/s, loss=-0.000149, elapsed time=0.05, total time=12.4]
[I 2025-06-07 15:00:45,083] Trial 730 finished with value: -0.00014921936795214832 and parameters: {'learning_rate': 0.0059692558117027264, 'sigma_multiplier': 0.8187134221515187, 'num_layers': 2, 'initialization_multiplier': 0.717837552909417}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 730 final loss: -0.00014922
Trial 731:
  Learning Rate: 0.007795814161832459
  Sigma Multiplier: 0.9221428960725251
  Initialization Multiplier: 0.688552478335344
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.08it/s, loss=-0.000417, elapsed time=0.11, total time=11.8]
[I 2025-06-07 15:00:56,980] Trial 731 finished with value: -0.0004168626283561131 and parameters: {'learning_rate': 0.007795814161832459, 'sigma_multiplier': 0.9221428960725251, 'num_layers': 2, 'initialization_multiplier': 0.688552478335344}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 731 final loss: -0.00041686
Trial 732:
  Learning Rate: 0.008313423023785887
  Sigma Multiplier: 1.1454689678931562
  Initialization Multiplier: 0.62416075874884
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.46it/s, loss=-0.000327, elapsed time=0.06, total time=10.7]
[I 2025-06-07 15:01:07,719] Trial 732 finished with value: -0.00032745919769177424 and parameters: {'learning_rate': 0.008313423023785887, 'sigma_multiplier': 1.1454689678931562, 'num_layers': 2, 'initialization_multiplier': 0.62416075874884}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 732 final loss: -0.00032746
Trial 733:
  Learning Rate: 0.00038962357434424487
  Sigma Multiplier: 1.070496390233562
  Initialization Multiplier: 0.2976206293624062
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.52it/s, loss=0.006517, elapsed time=0.06, total time=11.3]
[I 2025-06-07 15:01:19,122] Trial 733 finished with value: 0.0065165662502419935 and parameters: {'learning_rate': 0.00038962357434424487, 'sigma_multiplier': 1.070496390233562, 'num_layers': 2, 'initialization_multiplier': 0.2976206293624062}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 733 final loss: 0.00651657
Trial 734:
  Learning Rate: 0.00627766382640825
  Sigma Multiplier: 1.0191513601480704
  Initialization Multiplier: 0.7325931391897375
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.23it/s, loss=-0.000127, elapsed time=0.11, total time=11.7]
[I 2025-06-07 15:01:30,862] Trial 734 finished with value: -0.00012748841867197186 and parameters: {'learning_rate': 0.00627766382640825, 'sigma_multiplier': 1.0191513601480704, 'num_layers': 2, 'initialization_multiplier': 0.7325931391897375}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 734 final loss: -0.00012749
Trial 735:
  Learning Rate: 0.008710630603701844
  Sigma Multiplier: 0.9604666612822583
  Initialization Multiplier: 0.5889265916556229
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.62it/s, loss=-0.000475, elapsed time=0.05, total time=11.3]
[I 2025-06-07 15:01:42,192] Trial 735 finished with value: -0.00047462951300322785 and parameters: {'learning_rate': 0.008710630603701844, 'sigma_multiplier': 0.9604666612822583, 'num_layers': 2, 'initialization_multiplier': 0.5889265916556229}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 735 final loss: -0.00047463
Trial 736:
  Learning Rate: 0.008517251234864178
  Sigma Multiplier: 0.8566495082859401
  Initialization Multiplier: 0.6069931827776934
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.94it/s, loss=-0.000326, elapsed time=0.06, total time=11.9]
[I 2025-06-07 15:01:54,125] Trial 736 finished with value: -0.000326480151808102 and parameters: {'learning_rate': 0.008517251234864178, 'sigma_multiplier': 0.8566495082859401, 'num_layers': 2, 'initialization_multiplier': 0.6069931827776934}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 736 final loss: -0.00032648
Trial 737:
  Learning Rate: 0.007538148828449586
  Sigma Multiplier: 0.8859896083162806
  Initialization Multiplier: 0.6905065535871612
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.20it/s, loss=-0.000249, elapsed time=0.05, total time=10.2]
[I 2025-06-07 15:02:04,348] Trial 737 finished with value: -0.00024871285880695356 and parameters: {'learning_rate': 0.007538148828449586, 'sigma_multiplier': 0.8859896083162806, 'num_layers': 1, 'initialization_multiplier': 0.6905065535871612}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 737 final loss: -0.00024871
Trial 738:
  Learning Rate: 0.005514817167202739
  Sigma Multiplier: 0.9416077391804544
  Initialization Multiplier: 0.6431480171803092
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.89it/s, loss=-0.000383, elapsed time=0.08, total time=12]  
[I 2025-06-07 15:02:16,404] Trial 738 finished with value: -0.0003826657072619391 and parameters: {'learning_rate': 0.005514817167202739, 'sigma_multiplier': 0.9416077391804544, 'num_layers': 2, 'initialization_multiplier': 0.6431480171803092}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 738 final loss: -0.00038267
Trial 739:
  Learning Rate: 0.009977632056505063
  Sigma Multiplier: 0.9037529661146658
  Initialization Multiplier: 0.5950908102632885
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.00it/s, loss=-0.000305, elapsed time=0.07, total time=11.8]
[I 2025-06-07 15:02:28,273] Trial 739 finished with value: -0.00030503120111254804 and parameters: {'learning_rate': 0.009977632056505063, 'sigma_multiplier': 0.9037529661146658, 'num_layers': 2, 'initialization_multiplier': 0.5950908102632885}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 739 final loss: -0.00030503
Trial 740:
  Learning Rate: 0.00664296891710113
  Sigma Multiplier: 0.9622132225507553
  Initialization Multiplier: 0.6644900704212108
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:14<00:00, 10.41it/s, loss=-0.000143, elapsed time=0.14, total time=14.7]
[I 2025-06-07 15:02:43,057] Trial 740 finished with value: -0.0001432392957392749 and parameters: {'learning_rate': 0.00664296891710113, 'sigma_multiplier': 0.9622132225507553, 'num_layers': 3, 'initialization_multiplier': 0.6644900704212108}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 740 final loss: -0.00014324
Trial 741:
  Learning Rate: 0.008955836014379052
  Sigma Multiplier: 1.0916940025425221
  Initialization Multiplier: 0.602975565606296
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.34it/s, loss=-0.000319, elapsed time=0.09, total time=11.6]
[I 2025-06-07 15:02:54,664] Trial 741 finished with value: -0.0003192897174317213 and parameters: {'learning_rate': 0.008955836014379052, 'sigma_multiplier': 1.0916940025425221, 'num_layers': 2, 'initialization_multiplier': 0.602975565606296}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 741 final loss: -0.00031929
Trial 742:
  Learning Rate: 0.007318179543903815
  Sigma Multiplier: 0.9958487351836545
  Initialization Multiplier: 0.5709146489161401
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.78it/s, loss=-0.000295, elapsed time=0.06, total time=12.2]
[I 2025-06-07 15:03:06,937] Trial 742 finished with value: -0.00029543031250360785 and parameters: {'learning_rate': 0.007318179543903815, 'sigma_multiplier': 0.9958487351836545, 'num_layers': 2, 'initialization_multiplier': 0.5709146489161401}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 742 final loss: -0.00029543
Trial 743:
  Learning Rate: 0.009441327548132364
  Sigma Multiplier: 0.9439634117802873
  Initialization Multiplier: 0.6282952456352904
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.64it/s, loss=-0.000299, elapsed time=0.08, total time=12.3]
[I 2025-06-07 15:03:19,339] Trial 743 finished with value: -0.00029884375325855457 and parameters: {'learning_rate': 0.009441327548132364, 'sigma_multiplier': 0.9439634117802873, 'num_layers': 2, 'initialization_multiplier': 0.6282952456352904}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 743 final loss: -0.00029884
Trial 744:
  Learning Rate: 0.005052166485213425
  Sigma Multiplier: 1.1435448264188903
  Initialization Multiplier: 0.5321030036858307
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.26it/s, loss=-0.000370, elapsed time=0.08, total time=11.6]
[I 2025-06-07 15:03:30,958] Trial 744 finished with value: -0.00036996345319951015 and parameters: {'learning_rate': 0.005052166485213425, 'sigma_multiplier': 1.1435448264188903, 'num_layers': 2, 'initialization_multiplier': 0.5321030036858307}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 744 final loss: -0.00036996
Trial 745:
  Learning Rate: 0.011392202498667781
  Sigma Multiplier: 1.0485327145868943
  Initialization Multiplier: 0.4044264477394042
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.50it/s, loss=-0.000404, elapsed time=0.07, total time=11.5]
[I 2025-06-07 15:03:42,468] Trial 745 finished with value: -0.00040398254115558963 and parameters: {'learning_rate': 0.011392202498667781, 'sigma_multiplier': 1.0485327145868943, 'num_layers': 2, 'initialization_multiplier': 0.4044264477394042}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 745 final loss: -0.00040398
Trial 746:
  Learning Rate: 0.008383514411823265
  Sigma Multiplier: 0.974673543034704
  Initialization Multiplier: 0.5862669740165236
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.74it/s, loss=-0.000352, elapsed time=0.08, total time=12.2]
[I 2025-06-07 15:03:54,715] Trial 746 finished with value: -0.00035232270101300466 and parameters: {'learning_rate': 0.008383514411823265, 'sigma_multiplier': 0.974673543034704, 'num_layers': 2, 'initialization_multiplier': 0.5862669740165236}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 746 final loss: -0.00035232
Trial 747:
  Learning Rate: 0.006901026066111823
  Sigma Multiplier: 0.794644007965185
  Initialization Multiplier: 0.716503679591368
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:13<00:00, 11.50it/s, loss=-0.000171, elapsed time=0.08, total time=13.4]
[I 2025-06-07 15:04:08,153] Trial 747 finished with value: -0.00017093738571495937 and parameters: {'learning_rate': 0.006901026066111823, 'sigma_multiplier': 0.794644007965185, 'num_layers': 2, 'initialization_multiplier': 0.716503679591368}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 747 final loss: -0.00017094
Trial 748:
  Learning Rate: 0.009906238265482115
  Sigma Multiplier: 1.026270822759051
  Initialization Multiplier: 0.7820647243963994
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.44it/s, loss=-0.000089, elapsed time=0.08, total time=12.4]
[I 2025-06-07 15:04:20,620] Trial 748 finished with value: -8.949644568183601e-05 and parameters: {'learning_rate': 0.009906238265482115, 'sigma_multiplier': 1.026270822759051, 'num_layers': 2, 'initialization_multiplier': 0.7820647243963994}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 748 final loss: -0.00008950
Trial 749:
  Learning Rate: 0.00845106397380253
  Sigma Multiplier: 1.092812863690611
  Initialization Multiplier: 0.47220227323922476
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.39it/s, loss=-0.000231, elapsed time=0.06, total time=11.6]
[I 2025-06-07 15:04:32,244] Trial 749 finished with value: -0.00023125385889709389 and parameters: {'learning_rate': 0.00845106397380253, 'sigma_multiplier': 1.092812863690611, 'num_layers': 2, 'initialization_multiplier': 0.47220227323922476}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 749 final loss: -0.00023125
Trial 750:
  Learning Rate: 0.005882771387326149
  Sigma Multiplier: 0.9159002034401242
  Initialization Multiplier: 0.5259690211756264
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.52it/s, loss=-0.000434, elapsed time=0.07, total time=12.3]
[I 2025-06-07 15:04:44,665] Trial 750 finished with value: -0.0004339045880816505 and parameters: {'learning_rate': 0.005882771387326149, 'sigma_multiplier': 0.9159002034401242, 'num_layers': 2, 'initialization_multiplier': 0.5259690211756264}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 750 final loss: -0.00043390
Trial 751:
  Learning Rate: 0.011120166278461427
  Sigma Multiplier: 0.9946519412859451
  Initialization Multiplier: 0.671085701092148
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.82it/s, loss=-0.000353, elapsed time=0.07, total time=12.1]
[I 2025-06-07 15:04:56,781] Trial 751 finished with value: -0.00035251662928179864 and parameters: {'learning_rate': 0.011120166278461427, 'sigma_multiplier': 0.9946519412859451, 'num_layers': 2, 'initialization_multiplier': 0.671085701092148}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 751 final loss: -0.00035252
Trial 752:
  Learning Rate: 0.007283313674922107
  Sigma Multiplier: 1.0562006579136578
  Initialization Multiplier: 0.6118741632340825
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.42it/s, loss=-0.000408, elapsed time=0.05, total time=11.4]
[I 2025-06-07 15:05:08,284] Trial 752 finished with value: -0.0004076410948232848 and parameters: {'learning_rate': 0.007283313674922107, 'sigma_multiplier': 1.0562006579136578, 'num_layers': 2, 'initialization_multiplier': 0.6118741632340825}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 752 final loss: -0.00040764
Trial 753:
  Learning Rate: 0.009538213219091186
  Sigma Multiplier: 0.9549057945778774
  Initialization Multiplier: 0.554683807275005
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.73it/s, loss=-0.000294, elapsed time=0.1, total time=12.1] 
[I 2025-06-07 15:05:20,476] Trial 753 finished with value: -0.00029412622467978614 and parameters: {'learning_rate': 0.009538213219091186, 'sigma_multiplier': 0.9549057945778774, 'num_layers': 2, 'initialization_multiplier': 0.554683807275005}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 753 final loss: -0.00029413
Trial 754:
  Learning Rate: 0.01187658640722841
  Sigma Multiplier: 1.2571083474659495
  Initialization Multiplier: 0.4415447866189659
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.32it/s, loss=-0.000362, elapsed time=0.05, total time=10.8]
[I 2025-06-07 15:05:31,357] Trial 754 finished with value: -0.00036167672343396384 and parameters: {'learning_rate': 0.01187658640722841, 'sigma_multiplier': 1.2571083474659495, 'num_layers': 2, 'initialization_multiplier': 0.4415447866189659}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 754 final loss: -0.00036168
Trial 755:
  Learning Rate: 0.008183252689031077
  Sigma Multiplier: 0.7478884385103077
  Initialization Multiplier: 0.5060092212068742
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 11.69it/s, loss=-0.000241, elapsed time=0.06, total time=13.2]
[I 2025-06-07 15:05:44,690] Trial 755 finished with value: -0.0002411644889080703 and parameters: {'learning_rate': 0.008183252689031077, 'sigma_multiplier': 0.7478884385103077, 'num_layers': 2, 'initialization_multiplier': 0.5060092212068742}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 755 final loss: -0.00024116
Trial 756:
  Learning Rate: 0.00199567521802995
  Sigma Multiplier: 1.1158319760055952
  Initialization Multiplier: 0.5803587683925568
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.22it/s, loss=-0.000312, elapsed time=0.07, total time=11.8]
[I 2025-06-07 15:05:56,576] Trial 756 finished with value: -0.0003120550637056797 and parameters: {'learning_rate': 0.00199567521802995, 'sigma_multiplier': 1.1158319760055952, 'num_layers': 2, 'initialization_multiplier': 0.5803587683925568}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 756 final loss: -0.00031206
Trial 757:
  Learning Rate: 0.005340291803272617
  Sigma Multiplier: 1.0306641309094366
  Initialization Multiplier: 0.35469287517325826
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.65it/s, loss=-0.000318, elapsed time=0.06, total time=12.2]
[I 2025-06-07 15:06:08,815] Trial 757 finished with value: -0.0003177023448711156 and parameters: {'learning_rate': 0.005340291803272617, 'sigma_multiplier': 1.0306641309094366, 'num_layers': 2, 'initialization_multiplier': 0.35469287517325826}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 757 final loss: -0.00031770
Trial 758:
  Learning Rate: 0.006629543878297159
  Sigma Multiplier: 0.8491114025202073
  Initialization Multiplier: 0.6363293258127467
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.26it/s, loss=-0.000263, elapsed time=0.07, total time=12.5]
[I 2025-06-07 15:06:21,402] Trial 758 finished with value: -0.00026256926552911127 and parameters: {'learning_rate': 0.006629543878297159, 'sigma_multiplier': 0.8491114025202073, 'num_layers': 2, 'initialization_multiplier': 0.6363293258127467}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 758 final loss: -0.00026257
Trial 759:
  Learning Rate: 0.010257957467814634
  Sigma Multiplier: 1.1752336956815697
  Initialization Multiplier: 0.4686795498862622
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.89it/s, loss=-0.000335, elapsed time=0.08, total time=11.2]
[I 2025-06-07 15:06:32,659] Trial 759 finished with value: -0.00033493757226763356 and parameters: {'learning_rate': 0.010257957467814634, 'sigma_multiplier': 1.1752336956815697, 'num_layers': 2, 'initialization_multiplier': 0.4686795498862622}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 759 final loss: -0.00033494
Trial 760:
  Learning Rate: 0.012911723480021769
  Sigma Multiplier: 0.8993293629460986
  Initialization Multiplier: 0.5341500920274204
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.74it/s, loss=-0.000211, elapsed time=0.06, total time=10.6]
[I 2025-06-07 15:06:43,284] Trial 760 finished with value: -0.00021074241000690563 and parameters: {'learning_rate': 0.012911723480021769, 'sigma_multiplier': 0.8993293629460986, 'num_layers': 1, 'initialization_multiplier': 0.5341500920274204}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 760 final loss: -0.00021074
Trial 761:
  Learning Rate: 0.008807423497733977
  Sigma Multiplier: 0.9942691466106034
  Initialization Multiplier: 0.4148869601333131
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.54it/s, loss=-0.000340, elapsed time=0.06, total time=12.4]
[I 2025-06-07 15:06:55,762] Trial 761 finished with value: -0.0003404617473087335 and parameters: {'learning_rate': 0.008807423497733977, 'sigma_multiplier': 0.9942691466106034, 'num_layers': 2, 'initialization_multiplier': 0.4148869601333131}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 761 final loss: -0.00034046
Trial 762:
  Learning Rate: 0.010631475813958667
  Sigma Multiplier: 1.0547965649173343
  Initialization Multiplier: 0.6849331445169439
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.77it/s, loss=-0.000271, elapsed time=0.09, total time=12.1]
[I 2025-06-07 15:07:07,917] Trial 762 finished with value: -0.0002709641051884987 and parameters: {'learning_rate': 0.010631475813958667, 'sigma_multiplier': 1.0547965649173343, 'num_layers': 2, 'initialization_multiplier': 0.6849331445169439}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 762 final loss: -0.00027096
Trial 763:
  Learning Rate: 0.00022464768308505078
  Sigma Multiplier: 0.954288318677023
  Initialization Multiplier: 0.5694378714806018
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.65it/s, loss=0.044411, elapsed time=0.07, total time=12.2]
[I 2025-06-07 15:07:20,210] Trial 763 finished with value: 0.04441083878688734 and parameters: {'learning_rate': 0.00022464768308505078, 'sigma_multiplier': 0.954288318677023, 'num_layers': 2, 'initialization_multiplier': 0.5694378714806018}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 763 final loss: 0.04441084
Trial 764:
  Learning Rate: 0.006299874590496089
  Sigma Multiplier: 1.0916492123778279
  Initialization Multiplier: 0.6137584083678528
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.26it/s, loss=-0.000359, elapsed time=0.05, total time=12.6]
[I 2025-06-07 15:07:32,851] Trial 764 finished with value: -0.00035946117939722667 and parameters: {'learning_rate': 0.006299874590496089, 'sigma_multiplier': 1.0916492123778279, 'num_layers': 2, 'initialization_multiplier': 0.6137584083678528}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 764 final loss: -0.00035946
Trial 765:
  Learning Rate: 0.007999071810496847
  Sigma Multiplier: 1.0124182740160572
  Initialization Multiplier: 0.49661356718096866
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.42it/s, loss=-0.000343, elapsed time=0.08, total time=12.4]
[I 2025-06-07 15:07:45,412] Trial 765 finished with value: -0.0003429341883686654 and parameters: {'learning_rate': 0.007999071810496847, 'sigma_multiplier': 1.0124182740160572, 'num_layers': 2, 'initialization_multiplier': 0.49661356718096866}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 765 final loss: -0.00034293
Trial 766:
  Learning Rate: 0.012029640500095314
  Sigma Multiplier: 0.9240010133828582
  Initialization Multiplier: 0.547590147263409
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 11.75it/s, loss=-0.000324, elapsed time=0.09, total time=13.1]
[I 2025-06-07 15:07:58,627] Trial 766 finished with value: -0.0003243128020035604 and parameters: {'learning_rate': 0.012029640500095314, 'sigma_multiplier': 0.9240010133828582, 'num_layers': 2, 'initialization_multiplier': 0.547590147263409}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 766 final loss: -0.00032431
Trial 767:
  Learning Rate: 0.009851581368106514
  Sigma Multiplier: 1.1477681429857882
  Initialization Multiplier: 0.6337447999780225
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.47it/s, loss=-0.000328, elapsed time=0.05, total time=12.4]
[I 2025-06-07 15:08:11,253] Trial 767 finished with value: -0.00032786808917350797 and parameters: {'learning_rate': 0.009851581368106514, 'sigma_multiplier': 1.1477681429857882, 'num_layers': 2, 'initialization_multiplier': 0.6337447999780225}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 767 final loss: -0.00032787
Trial 768:
  Learning Rate: 0.005238619317208971
  Sigma Multiplier: 0.9803233127558972
  Initialization Multiplier: 0.4637651453638353
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.80it/s, loss=-0.000396, elapsed time=0.06, total time=11.2]
[I 2025-06-07 15:08:22,516] Trial 768 finished with value: -0.00039552275328540957 and parameters: {'learning_rate': 0.005238619317208971, 'sigma_multiplier': 0.9803233127558972, 'num_layers': 2, 'initialization_multiplier': 0.4637651453638353}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 768 final loss: -0.00039552
Trial 769:
  Learning Rate: 0.01374160358818277
  Sigma Multiplier: 1.0761642932450721
  Initialization Multiplier: 0.5803510507429608
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.17it/s, loss=-0.000305, elapsed time=0.08, total time=11.9]
[I 2025-06-07 15:08:34,566] Trial 769 finished with value: -0.0003053344939619738 and parameters: {'learning_rate': 0.01374160358818277, 'sigma_multiplier': 1.0761642932450721, 'num_layers': 2, 'initialization_multiplier': 0.5803510507429608}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 769 final loss: -0.00030533
Trial 770:
  Learning Rate: 0.007401550603813541
  Sigma Multiplier: 1.0289218186618858
  Initialization Multiplier: 0.5089632214838017
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.85it/s, loss=-0.000259, elapsed time=0.05, total time=12.1]
[I 2025-06-07 15:08:46,766] Trial 770 finished with value: -0.0002588623177898789 and parameters: {'learning_rate': 0.007401550603813541, 'sigma_multiplier': 1.0289218186618858, 'num_layers': 2, 'initialization_multiplier': 0.5089632214838017}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 770 final loss: -0.00025886
Trial 771:
  Learning Rate: 0.001669740057928426
  Sigma Multiplier: 1.1162092026069537
  Initialization Multiplier: 0.7049779087078298
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:14<00:00, 10.45it/s, loss=0.012896, elapsed time=0.09, total time=14.7]
[I 2025-06-07 15:09:01,563] Trial 771 finished with value: 0.012896303653961886 and parameters: {'learning_rate': 0.001669740057928426, 'sigma_multiplier': 1.1162092026069537, 'num_layers': 3, 'initialization_multiplier': 0.7049779087078298}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 771 final loss: 0.01289630
Trial 772:
  Learning Rate: 0.00955228873486367
  Sigma Multiplier: 0.9708110085585626
  Initialization Multiplier: 0.3860676260240215
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:13<00:00, 11.47it/s, loss=-0.000390, elapsed time=0.06, total time=13.6]
[I 2025-06-07 15:09:15,357] Trial 772 finished with value: -0.0003896391684508469 and parameters: {'learning_rate': 0.00955228873486367, 'sigma_multiplier': 0.9708110085585626, 'num_layers': 2, 'initialization_multiplier': 0.3860676260240215}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 772 final loss: -0.00038964
Trial 773:
  Learning Rate: 0.0060842412150032285
  Sigma Multiplier: 0.6944940207193762
  Initialization Multiplier: 0.6610985890602492
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 11.65it/s, loss=-0.000051, elapsed time=0.07, total time=13.2]
[I 2025-06-07 15:09:28,663] Trial 773 finished with value: -5.093349891368047e-05 and parameters: {'learning_rate': 0.0060842412150032285, 'sigma_multiplier': 0.6944940207193762, 'num_layers': 2, 'initialization_multiplier': 0.6610985890602492}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 773 final loss: -0.00005093
Trial 774:
  Learning Rate: 0.01111860611235807
  Sigma Multiplier: 1.8562284909949285
  Initialization Multiplier: 0.54233112304956
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.23it/s, loss=-0.000206, elapsed time=0.06, total time=10.2]
[I 2025-06-07 15:09:38,956] Trial 774 finished with value: -0.00020631122589626698 and parameters: {'learning_rate': 0.01111860611235807, 'sigma_multiplier': 1.8562284909949285, 'num_layers': 2, 'initialization_multiplier': 0.54233112304956}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 774 final loss: -0.00020631
Trial 775:
  Learning Rate: 0.008492296495631064
  Sigma Multiplier: 1.0606920823424346
  Initialization Multiplier: 0.6042921892607332
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.36it/s, loss=-0.000363, elapsed time=0.06, total time=12.7]
[I 2025-06-07 15:09:51,869] Trial 775 finished with value: -0.0003634181296578732 and parameters: {'learning_rate': 0.008492296495631064, 'sigma_multiplier': 1.0606920823424346, 'num_layers': 2, 'initialization_multiplier': 0.6042921892607332}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 775 final loss: -0.00036342
Trial 776:
  Learning Rate: 0.0007829053823633073
  Sigma Multiplier: 0.9299409849440209
  Initialization Multiplier: 0.4328058561405605
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.44it/s, loss=0.001442, elapsed time=0.06, total time=12.4]
[I 2025-06-07 15:10:04,361] Trial 776 finished with value: 0.001441888245659329 and parameters: {'learning_rate': 0.0007829053823633073, 'sigma_multiplier': 0.9299409849440209, 'num_layers': 2, 'initialization_multiplier': 0.4328058561405605}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 776 final loss: 0.00144189
Trial 777:
  Learning Rate: 0.006911990331797882
  Sigma Multiplier: 1.0187028126014515
  Initialization Multiplier: 0.4889583444425235
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.98it/s, loss=-0.000263, elapsed time=0.05, total time=9.74]
[I 2025-06-07 15:10:14,164] Trial 777 finished with value: -0.0002627724487639863 and parameters: {'learning_rate': 0.006911990331797882, 'sigma_multiplier': 1.0187028126014515, 'num_layers': 1, 'initialization_multiplier': 0.4889583444425235}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 777 final loss: -0.00026277
Trial 778:
  Learning Rate: 0.012723091014509956
  Sigma Multiplier: 0.9867712709146709
  Initialization Multiplier: 0.5775193884916331
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.33it/s, loss=-0.000384, elapsed time=0.07, total time=11.5]
[I 2025-06-07 15:10:25,771] Trial 778 finished with value: -0.0003844595167124354 and parameters: {'learning_rate': 0.012723091014509956, 'sigma_multiplier': 0.9867712709146709, 'num_layers': 2, 'initialization_multiplier': 0.5775193884916331}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 778 final loss: -0.00038446
Trial 779:
  Learning Rate: 0.008826763207888406
  Sigma Multiplier: 0.8863783746975852
  Initialization Multiplier: 0.7461291818048269
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.31it/s, loss=-0.000284, elapsed time=0.1, total time=12.5] 
[I 2025-06-07 15:10:38,389] Trial 779 finished with value: -0.0002838189165356858 and parameters: {'learning_rate': 0.008826763207888406, 'sigma_multiplier': 0.8863783746975852, 'num_layers': 2, 'initialization_multiplier': 0.7461291818048269}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 779 final loss: -0.00028382
Trial 780:
  Learning Rate: 0.004926247951479959
  Sigma Multiplier: 1.5416386575939462
  Initialization Multiplier: 0.5257972092718188
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.09it/s, loss=-0.000296, elapsed time=0.1, total time=11.1] 
[I 2025-06-07 15:10:49,687] Trial 780 finished with value: -0.000296045408005288 and parameters: {'learning_rate': 0.004926247951479959, 'sigma_multiplier': 1.5416386575939462, 'num_layers': 2, 'initialization_multiplier': 0.5257972092718188}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 780 final loss: -0.00029605
Trial 781:
  Learning Rate: 0.010841327100409314
  Sigma Multiplier: 1.0459022880471203
  Initialization Multiplier: 1.5206487176362646
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 12.01it/s, loss=-0.000020, elapsed time=0.06, total time=12.7]
[I 2025-06-07 15:11:02,501] Trial 781 finished with value: -1.9613304867595148e-05 and parameters: {'learning_rate': 0.010841327100409314, 'sigma_multiplier': 1.0459022880471203, 'num_layers': 2, 'initialization_multiplier': 1.5206487176362646}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 781 final loss: -0.00001961
Trial 782:
  Learning Rate: 0.01372110457757233
  Sigma Multiplier: 1.117102895026668
  Initialization Multiplier: 0.6565332033488502
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.87it/s, loss=-0.000298, elapsed time=0.07, total time=12.2]
[I 2025-06-07 15:11:14,932] Trial 782 finished with value: -0.00029810704049899276 and parameters: {'learning_rate': 0.01372110457757233, 'sigma_multiplier': 1.117102895026668, 'num_layers': 2, 'initialization_multiplier': 0.6565332033488502}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 782 final loss: -0.00029811
Trial 783:
  Learning Rate: 0.007663865874844277
  Sigma Multiplier: 0.9388370585760918
  Initialization Multiplier: 1.1885588104642664
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:13<00:00, 11.23it/s, loss=0.000126, elapsed time=0.07, total time=13.7] 
[I 2025-06-07 15:11:28,751] Trial 783 finished with value: 0.0001261800061946075 and parameters: {'learning_rate': 0.007663865874844277, 'sigma_multiplier': 0.9388370585760918, 'num_layers': 2, 'initialization_multiplier': 1.1885588104642664}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 783 final loss: 0.00012618
Trial 784:
  Learning Rate: 0.006394950507468848
  Sigma Multiplier: 1.2099682364650572
  Initialization Multiplier: 0.9015115916745051
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.46it/s, loss=-0.000221, elapsed time=0.05, total time=11.7]
[I 2025-06-07 15:11:40,675] Trial 784 finished with value: -0.00022070542359269444 and parameters: {'learning_rate': 0.006394950507468848, 'sigma_multiplier': 1.2099682364650572, 'num_layers': 2, 'initialization_multiplier': 0.9015115916745051}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 784 final loss: -0.00022071
Trial 785:
  Learning Rate: 0.009559497341567335
  Sigma Multiplier: 1.0086988678611835
  Initialization Multiplier: 0.6106442709246497
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.97it/s, loss=-0.000325, elapsed time=0.06, total time=11.9]
[I 2025-06-07 15:11:52,650] Trial 785 finished with value: -0.0003250699739244674 and parameters: {'learning_rate': 0.009559497341567335, 'sigma_multiplier': 1.0086988678611835, 'num_layers': 2, 'initialization_multiplier': 0.6106442709246497}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 785 final loss: -0.00032507
Trial 786:
  Learning Rate: 0.016265142236929007
  Sigma Multiplier: 1.0856388784132056
  Initialization Multiplier: 0.4676500349805427
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.00it/s, loss=-0.000312, elapsed time=0.06, total time=12]  
[I 2025-06-07 15:12:04,841] Trial 786 finished with value: -0.00031207350120250995 and parameters: {'learning_rate': 0.016265142236929007, 'sigma_multiplier': 1.0856388784132056, 'num_layers': 2, 'initialization_multiplier': 0.4676500349805427}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 786 final loss: -0.00031207
Trial 787:
  Learning Rate: 0.010735597142020152
  Sigma Multiplier: 0.9649739741805445
  Initialization Multiplier: 0.5514725417701013
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 11.70it/s, loss=-0.000331, elapsed time=0.07, total time=13.2]
[I 2025-06-07 15:12:18,134] Trial 787 finished with value: -0.0003310553845135491 and parameters: {'learning_rate': 0.010735597142020152, 'sigma_multiplier': 0.9649739741805445, 'num_layers': 2, 'initialization_multiplier': 0.5514725417701013}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 787 final loss: -0.00033106
Trial 788:
  Learning Rate: 0.007981870845601903
  Sigma Multiplier: 1.03751480753576
  Initialization Multiplier: 0.5035573502126024
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.14it/s, loss=-0.000369, elapsed time=0.08, total time=11.1]
[I 2025-06-07 15:12:29,458] Trial 788 finished with value: -0.00036890406525684786 and parameters: {'learning_rate': 0.007981870845601903, 'sigma_multiplier': 1.03751480753576, 'num_layers': 2, 'initialization_multiplier': 0.5035573502126024}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 788 final loss: -0.00036890
Trial 789:
  Learning Rate: 0.013862160921192607
  Sigma Multiplier: 1.0010311886807903
  Initialization Multiplier: 0.4108423820559486
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.18it/s, loss=-0.000419, elapsed time=0.04, total time=10.2]
[I 2025-06-07 15:12:39,675] Trial 789 finished with value: -0.0004190550191208203 and parameters: {'learning_rate': 0.013862160921192607, 'sigma_multiplier': 1.0010311886807903, 'num_layers': 2, 'initialization_multiplier': 0.4108423820559486}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 789 final loss: -0.00041906
Trial 790:
  Learning Rate: 0.011762848535083345
  Sigma Multiplier: 1.0710710377875154
  Initialization Multiplier: 0.6362158823745308
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.34it/s, loss=-0.000352, elapsed time=0.06, total time=9.44]
[I 2025-06-07 15:12:49,189] Trial 790 finished with value: -0.0003520492552294676 and parameters: {'learning_rate': 0.011762848535083345, 'sigma_multiplier': 1.0710710377875154, 'num_layers': 2, 'initialization_multiplier': 0.6362158823745308}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 790 final loss: -0.00035205
Trial 791:
  Learning Rate: 0.006046076160870519
  Sigma Multiplier: 1.150888187639642
  Initialization Multiplier: 0.5904731015109667
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.57it/s, loss=-0.000278, elapsed time=0.04, total time=10]  
[I 2025-06-07 15:12:59,282] Trial 791 finished with value: -0.0002782082936781984 and parameters: {'learning_rate': 0.006046076160870519, 'sigma_multiplier': 1.150888187639642, 'num_layers': 2, 'initialization_multiplier': 0.5904731015109667}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 791 final loss: -0.00027821
Trial 792:
  Learning Rate: 0.00900613400120749
  Sigma Multiplier: 0.8890014693884452
  Initialization Multiplier: 0.5543117120039345
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.32it/s, loss=-0.000272, elapsed time=0.05, total time=10.1]
[I 2025-06-07 15:13:09,469] Trial 792 finished with value: -0.00027201797461134166 and parameters: {'learning_rate': 0.00900613400120749, 'sigma_multiplier': 0.8890014693884452, 'num_layers': 2, 'initialization_multiplier': 0.5543117120039345}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 792 final loss: -0.00027202
Trial 793:
  Learning Rate: 0.007282263924239518
  Sigma Multiplier: 0.9608687584982691
  Initialization Multiplier: 0.33974387285284485
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.41it/s, loss=-0.000212, elapsed time=0.07, total time=9.97]
[I 2025-06-07 15:13:19,492] Trial 793 finished with value: -0.0002119945895783372 and parameters: {'learning_rate': 0.007282263924239518, 'sigma_multiplier': 0.9608687584982691, 'num_layers': 2, 'initialization_multiplier': 0.33974387285284485}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 793 final loss: -0.00021199
Trial 794:
  Learning Rate: 0.00475360590236568
  Sigma Multiplier: 1.0434658736150815
  Initialization Multiplier: 0.44618413266159335
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.48it/s, loss=-0.000337, elapsed time=0.04, total time=10]  
[I 2025-06-07 15:13:29,595] Trial 794 finished with value: -0.0003365555856623432 and parameters: {'learning_rate': 0.00475360590236568, 'sigma_multiplier': 1.0434658736150815, 'num_layers': 2, 'initialization_multiplier': 0.44618413266159335}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 794 final loss: -0.00033656
Trial 795:
  Learning Rate: 0.01020004073744031
  Sigma Multiplier: 0.8429799737289729
  Initialization Multiplier: 0.6873104685367178
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.87it/s, loss=-0.000298, elapsed time=0.05, total time=10.3]
[I 2025-06-07 15:13:39,958] Trial 795 finished with value: -0.00029780177551043884 and parameters: {'learning_rate': 0.01020004073744031, 'sigma_multiplier': 0.8429799737289729, 'num_layers': 2, 'initialization_multiplier': 0.6873104685367178}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 795 final loss: -0.00029780
Trial 796:
  Learning Rate: 0.008170297411060378
  Sigma Multiplier: 1.0918976255797324
  Initialization Multiplier: 0.5131062046240609
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 19.27it/s, loss=-0.000227, elapsed time=0.04, total time=8.04]
[I 2025-06-07 15:13:48,044] Trial 796 finished with value: -0.00022663880807321891 and parameters: {'learning_rate': 0.008170297411060378, 'sigma_multiplier': 1.0918976255797324, 'num_layers': 1, 'initialization_multiplier': 0.5131062046240609}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 796 final loss: -0.00022664
Trial 797:
  Learning Rate: 0.01215194971343665
  Sigma Multiplier: 0.9207105218500327
  Initialization Multiplier: 0.6002108495754672
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.59it/s, loss=-0.000362, elapsed time=0.06, total time=9.96]
[I 2025-06-07 15:13:58,055] Trial 797 finished with value: -0.00036211575289913766 and parameters: {'learning_rate': 0.01215194971343665, 'sigma_multiplier': 0.9207105218500327, 'num_layers': 2, 'initialization_multiplier': 0.6002108495754672}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 797 final loss: -0.00036212
Trial 798:
  Learning Rate: 0.014447767069749394
  Sigma Multiplier: 1.001206331093995
  Initialization Multiplier: 0.4781830977324313
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.87it/s, loss=-0.000408, elapsed time=0.07, total time=9.73]
[I 2025-06-07 15:14:07,836] Trial 798 finished with value: -0.0004083978742646118 and parameters: {'learning_rate': 0.014447767069749394, 'sigma_multiplier': 1.001206331093995, 'num_layers': 2, 'initialization_multiplier': 0.4781830977324313}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 798 final loss: -0.00040840
Trial 799:
  Learning Rate: 0.005520586132742357
  Sigma Multiplier: 1.125651051929583
  Initialization Multiplier: 0.5523667944892048
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 17.00it/s, loss=-0.000383, elapsed time=0.07, total time=9.17]
[I 2025-06-07 15:14:17,069] Trial 799 finished with value: -0.0003828248374844622 and parameters: {'learning_rate': 0.005520586132742357, 'sigma_multiplier': 1.125651051929583, 'num_layers': 2, 'initialization_multiplier': 0.5523667944892048}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 799 final loss: -0.00038282
Trial 800:
  Learning Rate: 0.01709898709059642
  Sigma Multiplier: 0.9741173447894558
  Initialization Multiplier: 0.6637621260647208
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.33it/s, loss=-0.000377, elapsed time=0.04, total time=10.1]
[I 2025-06-07 15:14:27,170] Trial 800 finished with value: -0.00037711517586648346 and parameters: {'learning_rate': 0.01709898709059642, 'sigma_multiplier': 0.9741173447894558, 'num_layers': 2, 'initialization_multiplier': 0.6637621260647208}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 800 final loss: -0.00037712
Trial 801:
  Learning Rate: 0.006876821728706214
  Sigma Multiplier: 1.028244737475856
  Initialization Multiplier: 0.5273592282576541
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.08it/s, loss=-0.000306, elapsed time=0.08, total time=11.8]
[I 2025-06-07 15:14:39,006] Trial 801 finished with value: -0.0003061195880757682 and parameters: {'learning_rate': 0.006876821728706214, 'sigma_multiplier': 1.028244737475856, 'num_layers': 3, 'initialization_multiplier': 0.5273592282576541}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 801 final loss: -0.00030612
Trial 802:
  Learning Rate: 0.009327113846615566
  Sigma Multiplier: 1.0620308868761619
  Initialization Multiplier: 0.6274746466129119
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.14it/s, loss=-0.000295, elapsed time=0.06, total time=9.54]
[I 2025-06-07 15:14:48,598] Trial 802 finished with value: -0.00029458977838625085 and parameters: {'learning_rate': 0.009327113846615566, 'sigma_multiplier': 1.0620308868761619, 'num_layers': 2, 'initialization_multiplier': 0.6274746466129119}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 802 final loss: -0.00029459
Trial 803:
  Learning Rate: 0.01159184296724169
  Sigma Multiplier: 0.9374710420215147
  Initialization Multiplier: 0.38188814196815324
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.08it/s, loss=-0.000348, elapsed time=0.05, total time=10.2]
[I 2025-06-07 15:14:58,882] Trial 803 finished with value: -0.00034830083047287586 and parameters: {'learning_rate': 0.01159184296724169, 'sigma_multiplier': 0.9374710420215147, 'num_layers': 2, 'initialization_multiplier': 0.38188814196815324}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 803 final loss: -0.00034830
Trial 804:
  Learning Rate: 0.0012806045304591062
  Sigma Multiplier: 0.99938654067567
  Initialization Multiplier: 0.5819016770259975
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.22it/s, loss=0.001224, elapsed time=0.06, total time=10.1]
[I 2025-06-07 15:15:09,067] Trial 804 finished with value: 0.0012239132419631313 and parameters: {'learning_rate': 0.0012806045304591062, 'sigma_multiplier': 0.99938654067567, 'num_layers': 2, 'initialization_multiplier': 0.5819016770259975}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 804 final loss: 0.00122391
Trial 805:
  Learning Rate: 0.00792347235596719
  Sigma Multiplier: 1.0885891869345161
  Initialization Multiplier: 1.7119007228270857
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.26it/s, loss=-0.000220, elapsed time=0.05, total time=10.1]
[I 2025-06-07 15:15:19,257] Trial 805 finished with value: -0.0002201700940307721 and parameters: {'learning_rate': 0.00792347235596719, 'sigma_multiplier': 1.0885891869345161, 'num_layers': 2, 'initialization_multiplier': 1.7119007228270857}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 805 final loss: -0.00022017
Trial 806:
  Learning Rate: 0.009841395554653293
  Sigma Multiplier: 1.0272550943039587
  Initialization Multiplier: 0.43206700675531007
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.59it/s, loss=-0.000375, elapsed time=0.06, total time=9.86]
[I 2025-06-07 15:15:29,184] Trial 806 finished with value: -0.0003750302428453417 and parameters: {'learning_rate': 0.009841395554653293, 'sigma_multiplier': 1.0272550943039587, 'num_layers': 2, 'initialization_multiplier': 0.43206700675531007}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 806 final loss: -0.00037503
Trial 807:
  Learning Rate: 0.0140388255566882
  Sigma Multiplier: 0.9657999744368496
  Initialization Multiplier: 0.49613147634695004
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.46it/s, loss=-0.000327, elapsed time=0.04, total time=10]  
[I 2025-06-07 15:15:39,244] Trial 807 finished with value: -0.0003266568659049196 and parameters: {'learning_rate': 0.0140388255566882, 'sigma_multiplier': 0.9657999744368496, 'num_layers': 2, 'initialization_multiplier': 0.49613147634695004}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 807 final loss: -0.00032666
Trial 808:
  Learning Rate: 0.005904540315625267
  Sigma Multiplier: 0.9088357360755787
  Initialization Multiplier: 0.559875250359085
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.56it/s, loss=-0.000343, elapsed time=0.06, total time=9.93]
[I 2025-06-07 15:15:49,228] Trial 808 finished with value: -0.00034316914262590614 and parameters: {'learning_rate': 0.005904540315625267, 'sigma_multiplier': 0.9088357360755787, 'num_layers': 2, 'initialization_multiplier': 0.559875250359085}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 808 final loss: -0.00034317
Trial 809:
  Learning Rate: 0.007254891115452406
  Sigma Multiplier: 1.062096797358205
  Initialization Multiplier: 0.7187035259102503
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.49it/s, loss=-0.000281, elapsed time=0.08, total time=9.93]
[I 2025-06-07 15:15:59,208] Trial 809 finished with value: -0.0002805728307745051 and parameters: {'learning_rate': 0.007254891115452406, 'sigma_multiplier': 1.062096797358205, 'num_layers': 2, 'initialization_multiplier': 0.7187035259102503}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 809 final loss: -0.00028057
Trial 810:
  Learning Rate: 0.011138313444470832
  Sigma Multiplier: 0.994593000605113
  Initialization Multiplier: 1.3827527832520472
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.12it/s, loss=-0.000193, elapsed time=0.05, total time=10.2]
[I 2025-06-07 15:16:09,443] Trial 810 finished with value: -0.00019344542453804688 and parameters: {'learning_rate': 0.011138313444470832, 'sigma_multiplier': 0.994593000605113, 'num_layers': 2, 'initialization_multiplier': 1.3827527832520472}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 810 final loss: -0.00019345
Trial 811:
  Learning Rate: 0.004505189869288956
  Sigma Multiplier: 1.1243595931876686
  Initialization Multiplier: 0.6272425650517468
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 16.81it/s, loss=-0.000447, elapsed time=0.05, total time=9.2] 
[I 2025-06-07 15:16:18,701] Trial 811 finished with value: -0.0004470739296767425 and parameters: {'learning_rate': 0.004505189869288956, 'sigma_multiplier': 1.1243595931876686, 'num_layers': 2, 'initialization_multiplier': 0.6272425650517468}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 811 final loss: -0.00044707
Trial 812:
  Learning Rate: 0.0033558796162382483
  Sigma Multiplier: 1.1885652289825872
  Initialization Multiplier: 0.6616699661355819
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.61it/s, loss=-0.000351, elapsed time=0.05, total time=9.3] 
[I 2025-06-07 15:16:28,047] Trial 812 finished with value: -0.0003507141995114227 and parameters: {'learning_rate': 0.0033558796162382483, 'sigma_multiplier': 1.1885652289825872, 'num_layers': 2, 'initialization_multiplier': 0.6616699661355819}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 812 final loss: -0.00035071
Trial 813:
  Learning Rate: 0.004526694769615638
  Sigma Multiplier: 1.1680008501799835
  Initialization Multiplier: 0.6311963375305709
  Number of Layers: 5


Training Progress: 100%|██████████| 150/150 [00:14<00:00, 10.63it/s, loss=-0.000205, elapsed time=0.11, total time=14.4]
[I 2025-06-07 15:16:42,547] Trial 813 finished with value: -0.00020520006738151175 and parameters: {'learning_rate': 0.004526694769615638, 'sigma_multiplier': 1.1680008501799835, 'num_layers': 5, 'initialization_multiplier': 0.6311963375305709}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 813 final loss: -0.00020520
Trial 814:
  Learning Rate: 0.004635655750294839
  Sigma Multiplier: 1.2417421458407372
  Initialization Multiplier: 0.6920570744299263
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 16.97it/s, loss=-0.000383, elapsed time=0.05, total time=9.1] 
[I 2025-06-07 15:16:51,702] Trial 814 finished with value: -0.0003825151111534234 and parameters: {'learning_rate': 0.004635655750294839, 'sigma_multiplier': 1.2417421458407372, 'num_layers': 2, 'initialization_multiplier': 0.6920570744299263}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 814 final loss: -0.00038252
Trial 815:
  Learning Rate: 0.0035491460312575256
  Sigma Multiplier: 1.1627941995590365
  Initialization Multiplier: 0.6202925612362986
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 17.04it/s, loss=-0.000369, elapsed time=0.05, total time=9.06]
[I 2025-06-07 15:17:00,813] Trial 815 finished with value: -0.00036857218500318503 and parameters: {'learning_rate': 0.0035491460312575256, 'sigma_multiplier': 1.1627941995590365, 'num_layers': 2, 'initialization_multiplier': 0.6202925612362986}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 815 final loss: -0.00036857
Trial 816:
  Learning Rate: 0.003927912993201798
  Sigma Multiplier: 1.125297131793365
  Initialization Multiplier: 0.5923703417135886
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 16.81it/s, loss=-0.000344, elapsed time=0.06, total time=9.2] 
[I 2025-06-07 15:17:10,067] Trial 816 finished with value: -0.0003436565358736733 and parameters: {'learning_rate': 0.003927912993201798, 'sigma_multiplier': 1.125297131793365, 'num_layers': 2, 'initialization_multiplier': 0.5923703417135886}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 816 final loss: -0.00034366
Trial 817:
  Learning Rate: 0.005351795160696857
  Sigma Multiplier: 0.8700116941406566
  Initialization Multiplier: 0.7743718600556896
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.97it/s, loss=-0.000017, elapsed time=0.05, total time=11.1]
[I 2025-06-07 15:17:21,216] Trial 817 finished with value: -1.7024841435123666e-05 and parameters: {'learning_rate': 0.005351795160696857, 'sigma_multiplier': 0.8700116941406566, 'num_layers': 2, 'initialization_multiplier': 0.7743718600556896}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 817 final loss: -0.00001702
Trial 818:
  Learning Rate: 0.004679955600453607
  Sigma Multiplier: 0.9443130614824553
  Initialization Multiplier: 0.6570131717626503
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.30it/s, loss=-0.000330, elapsed time=0.06, total time=10.2]
[I 2025-06-07 15:17:31,563] Trial 818 finished with value: -0.0003302479941156724 and parameters: {'learning_rate': 0.004679955600453607, 'sigma_multiplier': 0.9443130614824553, 'num_layers': 2, 'initialization_multiplier': 0.6570131717626503}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 818 final loss: -0.00033025
Trial 819:
  Learning Rate: 0.004179606643320815
  Sigma Multiplier: 1.1148637920623727
  Initialization Multiplier: 0.7315986191010624
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 17.90it/s, loss=-0.000325, elapsed time=0.04, total time=8.63]
[I 2025-06-07 15:17:40,246] Trial 819 finished with value: -0.00032456909461267417 and parameters: {'learning_rate': 0.004179606643320815, 'sigma_multiplier': 1.1148637920623727, 'num_layers': 1, 'initialization_multiplier': 0.7315986191010624}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 819 final loss: -0.00032457
Trial 820:
  Learning Rate: 0.005197105858822613
  Sigma Multiplier: 1.0214978952467126
  Initialization Multiplier: 0.6004418989369132
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.98it/s, loss=-0.000289, elapsed time=0.06, total time=10.3]
[I 2025-06-07 15:17:50,579] Trial 820 finished with value: -0.00028909078572887426 and parameters: {'learning_rate': 0.005197105858822613, 'sigma_multiplier': 1.0214978952467126, 'num_layers': 2, 'initialization_multiplier': 0.6004418989369132}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 820 final loss: -0.00028909
Trial 821:
  Learning Rate: 0.002316823411861299
  Sigma Multiplier: 0.9663073002466772
  Initialization Multiplier: 0.6378269288933038
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.83it/s, loss=-0.000407, elapsed time=0.05, total time=9.73]
[I 2025-06-07 15:18:00,370] Trial 821 finished with value: -0.00040691117941037743 and parameters: {'learning_rate': 0.002316823411861299, 'sigma_multiplier': 0.9663073002466772, 'num_layers': 2, 'initialization_multiplier': 0.6378269288933038}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 821 final loss: -0.00040691
Trial 822:
  Learning Rate: 0.006009322505078024
  Sigma Multiplier: 0.9025001765886692
  Initialization Multiplier: 0.5745840863487707
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.20it/s, loss=-0.000369, elapsed time=0.06, total time=10.2]
[I 2025-06-07 15:18:10,598] Trial 822 finished with value: -0.0003687135254358111 and parameters: {'learning_rate': 0.006009322505078024, 'sigma_multiplier': 0.9025001765886692, 'num_layers': 2, 'initialization_multiplier': 0.5745840863487707}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 822 final loss: -0.00036871
Trial 823:
  Learning Rate: 0.004326254011918599
  Sigma Multiplier: 1.0526910175006188
  Initialization Multiplier: 0.6959782831173532
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.37it/s, loss=-0.000399, elapsed time=0.05, total time=9.43]
[I 2025-06-07 15:18:20,088] Trial 823 finished with value: -0.00039921956370437173 and parameters: {'learning_rate': 0.004326254011918599, 'sigma_multiplier': 1.0526910175006188, 'num_layers': 2, 'initialization_multiplier': 0.6959782831173532}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 823 final loss: -0.00039922
Trial 824:
  Learning Rate: 0.002801062753752911
  Sigma Multiplier: 0.9935885205990213
  Initialization Multiplier: 0.5299197244072468
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.25it/s, loss=-0.000306, elapsed time=0.05, total time=9.51]
[I 2025-06-07 15:18:29,654] Trial 824 finished with value: -0.0003064417456354855 and parameters: {'learning_rate': 0.002801062753752911, 'sigma_multiplier': 0.9935885205990213, 'num_layers': 2, 'initialization_multiplier': 0.5299197244072468}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 824 final loss: -0.00030644
Trial 825:
  Learning Rate: 0.006363039379997926
  Sigma Multiplier: 1.1378765489673666
  Initialization Multiplier: 0.622100935899545
  Number of Layers: 4


Training Progress: 100%|██████████| 150/150 [00:12<00:00, 11.70it/s, loss=-0.000254, elapsed time=0.1, total time=13.2] 
[I 2025-06-07 15:18:42,897] Trial 825 finished with value: -0.0002543720626364553 and parameters: {'learning_rate': 0.006363039379997926, 'sigma_multiplier': 1.1378765489673666, 'num_layers': 4, 'initialization_multiplier': 0.622100935899545}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 825 final loss: -0.00025437
Trial 826:
  Learning Rate: 0.005614327231648608
  Sigma Multiplier: 0.813670265375014
  Initialization Multiplier: 0.5548131749590091
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.83it/s, loss=-0.000205, elapsed time=0.05, total time=10.4]
[I 2025-06-07 15:18:53,381] Trial 826 finished with value: -0.00020464610079880315 and parameters: {'learning_rate': 0.005614327231648608, 'sigma_multiplier': 0.813670265375014, 'num_layers': 2, 'initialization_multiplier': 0.5548131749590091}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 826 final loss: -0.00020465
Trial 827:
  Learning Rate: 0.006856976851783148
  Sigma Multiplier: 0.9403512327044645
  Initialization Multiplier: 0.6704102376901655
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.60it/s, loss=-0.000338, elapsed time=0.05, total time=9.92]
[I 2025-06-07 15:19:03,356] Trial 827 finished with value: -0.0003376761505801388 and parameters: {'learning_rate': 0.006856976851783148, 'sigma_multiplier': 0.9403512327044645, 'num_layers': 2, 'initialization_multiplier': 0.6704102376901655}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 827 final loss: -0.00033768
Trial 828:
  Learning Rate: 0.008669536740922761
  Sigma Multiplier: 1.0294221239994172
  Initialization Multiplier: 0.5889436410246932
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.02it/s, loss=-0.000389, elapsed time=0.05, total time=9.68]
[I 2025-06-07 15:19:13,084] Trial 828 finished with value: -0.00038891242345675604 and parameters: {'learning_rate': 0.008669536740922761, 'sigma_multiplier': 1.0294221239994172, 'num_layers': 2, 'initialization_multiplier': 0.5889436410246932}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 828 final loss: -0.00038891
Trial 829:
  Learning Rate: 0.0037905268922803085
  Sigma Multiplier: 1.0907194582716473
  Initialization Multiplier: 0.5271124836099538
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.48it/s, loss=-0.000317, elapsed time=0.06, total time=9.36]
[I 2025-06-07 15:19:22,503] Trial 829 finished with value: -0.0003169021011720211 and parameters: {'learning_rate': 0.0037905268922803085, 'sigma_multiplier': 1.0907194582716473, 'num_layers': 2, 'initialization_multiplier': 0.5271124836099538}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 829 final loss: -0.00031690
Trial 830:
  Learning Rate: 0.005040700503135329
  Sigma Multiplier: 1.1997332931352338
  Initialization Multiplier: 0.6139876103582882
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 16.81it/s, loss=-0.000331, elapsed time=0.05, total time=9.2] 
[I 2025-06-07 15:19:31,758] Trial 830 finished with value: -0.0003309130557723501 and parameters: {'learning_rate': 0.005040700503135329, 'sigma_multiplier': 1.1997332931352338, 'num_layers': 2, 'initialization_multiplier': 0.6139876103582882}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 830 final loss: -0.00033091
Trial 831:
  Learning Rate: 0.007764237391167974
  Sigma Multiplier: 0.9973171996505562
  Initialization Multiplier: 0.5590003830570832
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.45it/s, loss=-0.000345, elapsed time=0.05, total time=9.98]
[I 2025-06-07 15:19:41,799] Trial 831 finished with value: -0.000345287468149406 and parameters: {'learning_rate': 0.007764237391167974, 'sigma_multiplier': 0.9973171996505562, 'num_layers': 2, 'initialization_multiplier': 0.5590003830570832}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 831 final loss: -0.00034529
Trial 832:
  Learning Rate: 0.00908654972307385
  Sigma Multiplier: 1.0592886520088716
  Initialization Multiplier: 0.645792696607312
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.94it/s, loss=-0.000350, elapsed time=0.05, total time=9.74]
[I 2025-06-07 15:19:51,640] Trial 832 finished with value: -0.0003504956669409363 and parameters: {'learning_rate': 0.00908654972307385, 'sigma_multiplier': 1.0592886520088716, 'num_layers': 2, 'initialization_multiplier': 0.645792696607312}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 832 final loss: -0.00035050
Trial 833:
  Learning Rate: 0.007141994193254009
  Sigma Multiplier: 0.9579531638322487
  Initialization Multiplier: 0.5039460325342293
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.33it/s, loss=-0.000323, elapsed time=0.06, total time=11.5]
[I 2025-06-07 15:20:03,237] Trial 833 finished with value: -0.0003232180490391533 and parameters: {'learning_rate': 0.007141994193254009, 'sigma_multiplier': 0.9579531638322487, 'num_layers': 3, 'initialization_multiplier': 0.5039460325342293}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 833 final loss: -0.00032322
Trial 834:
  Learning Rate: 0.010026418362140174
  Sigma Multiplier: 0.9164852112854417
  Initialization Multiplier: 0.5818617129556338
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.44it/s, loss=-0.000354, elapsed time=0.05, total time=9.98]
[I 2025-06-07 15:20:13,270] Trial 834 finished with value: -0.0003537467329707726 and parameters: {'learning_rate': 0.010026418362140174, 'sigma_multiplier': 0.9164852112854417, 'num_layers': 2, 'initialization_multiplier': 0.5818617129556338}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 834 final loss: -0.00035375
Trial 835:
  Learning Rate: 0.005884267217019267
  Sigma Multiplier: 1.0234216596004961
  Initialization Multiplier: 0.7067153814130861
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 18.49it/s, loss=-0.000363, elapsed time=0.04, total time=8.42]
[I 2025-06-07 15:20:21,732] Trial 835 finished with value: -0.00036260961034135543 and parameters: {'learning_rate': 0.005884267217019267, 'sigma_multiplier': 1.0234216596004961, 'num_layers': 1, 'initialization_multiplier': 0.7067153814130861}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 835 final loss: -0.00036261
Trial 836:
  Learning Rate: 0.008614676296545424
  Sigma Multiplier: 1.1146873039628062
  Initialization Multiplier: 0.5578126072563072
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.37it/s, loss=-0.000384, elapsed time=0.05, total time=9.46]
[I 2025-06-07 15:20:31,258] Trial 836 finished with value: -0.00038359857666028297 and parameters: {'learning_rate': 0.008614676296545424, 'sigma_multiplier': 1.1146873039628062, 'num_layers': 2, 'initialization_multiplier': 0.5578126072563072}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 836 final loss: -0.00038360
Trial 837:
  Learning Rate: 0.0068732063339704795
  Sigma Multiplier: 0.8595933421112005
  Initialization Multiplier: 0.6541794551637641
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.60it/s, loss=-0.000273, elapsed time=0.06, total time=10.5]
[I 2025-06-07 15:20:41,846] Trial 837 finished with value: -0.00027292124277870656 and parameters: {'learning_rate': 0.0068732063339704795, 'sigma_multiplier': 0.8595933421112005, 'num_layers': 2, 'initialization_multiplier': 0.6541794551637641}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 837 final loss: -0.00027292
Trial 838:
  Learning Rate: 0.0030412927034137545
  Sigma Multiplier: 0.9805015202674785
  Initialization Multiplier: 0.5126853855167061
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.54it/s, loss=-0.000397, elapsed time=0.05, total time=9.33]
[I 2025-06-07 15:20:51,231] Trial 838 finished with value: -0.0003973333465588652 and parameters: {'learning_rate': 0.0030412927034137545, 'sigma_multiplier': 0.9805015202674785, 'num_layers': 2, 'initialization_multiplier': 0.5126853855167061}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 838 final loss: -0.00039733
Trial 839:
  Learning Rate: 0.011649594566926309
  Sigma Multiplier: 1.0600396591120667
  Initialization Multiplier: 0.611202495240911
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.31it/s, loss=-0.000380, elapsed time=0.06, total time=9.48]
[I 2025-06-07 15:21:00,771] Trial 839 finished with value: -0.0003801186312869341 and parameters: {'learning_rate': 0.011649594566926309, 'sigma_multiplier': 1.0600396591120667, 'num_layers': 2, 'initialization_multiplier': 0.611202495240911}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 839 final loss: -0.00038012
Trial 840:
  Learning Rate: 0.004647288421866366
  Sigma Multiplier: 0.9353894710552421
  Initialization Multiplier: 0.536263593942159
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.04it/s, loss=-0.000412, elapsed time=0.05, total time=9.62]
[I 2025-06-07 15:21:10,448] Trial 840 finished with value: -0.00041161044806120974 and parameters: {'learning_rate': 0.004647288421866366, 'sigma_multiplier': 0.9353894710552421, 'num_layers': 2, 'initialization_multiplier': 0.536263593942159}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 840 final loss: -0.00041161
Trial 841:
  Learning Rate: 0.009999917428555783
  Sigma Multiplier: 1.0236795850203118
  Initialization Multiplier: 0.47449399585367735
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 16.94it/s, loss=-0.000389, elapsed time=0.05, total time=9.06]
[I 2025-06-07 15:21:19,558] Trial 841 finished with value: -0.00038917038864300725 and parameters: {'learning_rate': 0.009999917428555783, 'sigma_multiplier': 1.0236795850203118, 'num_layers': 2, 'initialization_multiplier': 0.47449399585367735}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 841 final loss: -0.00038917
Trial 842:
  Learning Rate: 0.008305509194189367
  Sigma Multiplier: 0.9769484890284982
  Initialization Multiplier: 0.5913993156133762
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.33it/s, loss=-0.000368, elapsed time=0.05, total time=9.46]
[I 2025-06-07 15:21:29,074] Trial 842 finished with value: -0.0003677573831052089 and parameters: {'learning_rate': 0.008305509194189367, 'sigma_multiplier': 0.9769484890284982, 'num_layers': 2, 'initialization_multiplier': 0.5913993156133762}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 842 final loss: -0.00036776
Trial 843:
  Learning Rate: 0.006472028454967815
  Sigma Multiplier: 1.1471036974281896
  Initialization Multiplier: 0.6803650060924319
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.42it/s, loss=-0.000416, elapsed time=0.05, total time=9.38]
[I 2025-06-07 15:21:38,515] Trial 843 finished with value: -0.00041596263188611263 and parameters: {'learning_rate': 0.006472028454967815, 'sigma_multiplier': 1.1471036974281896, 'num_layers': 2, 'initialization_multiplier': 0.6803650060924319}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 843 final loss: -0.00041596
Trial 844:
  Learning Rate: 0.005381891306788822
  Sigma Multiplier: 1.1018324623324542
  Initialization Multiplier: 0.751296172826495
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.51it/s, loss=-0.000334, elapsed time=0.06, total time=9.34]
[I 2025-06-07 15:21:47,913] Trial 844 finished with value: -0.0003336812341442761 and parameters: {'learning_rate': 0.005381891306788822, 'sigma_multiplier': 1.1018324623324542, 'num_layers': 2, 'initialization_multiplier': 0.751296172826495}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 844 final loss: -0.00033368
Trial 845:
  Learning Rate: 0.012238485202735032
  Sigma Multiplier: 1.061854898693757
  Initialization Multiplier: 0.6300550892159751
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:13<00:00, 11.24it/s, loss=-0.000387, elapsed time=0.06, total time=13.7]
[I 2025-06-07 15:22:01,612] Trial 845 finished with value: -0.00038745987149961866 and parameters: {'learning_rate': 0.012238485202735032, 'sigma_multiplier': 1.061854898693757, 'num_layers': 2, 'initialization_multiplier': 0.6300550892159751}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 845 final loss: -0.00038746
Trial 846:
  Learning Rate: 0.010521552802839552
  Sigma Multiplier: 0.9038830829920216
  Initialization Multiplier: 0.5671887727075212
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.29it/s, loss=-0.000311, elapsed time=0.05, total time=10.1]
[I 2025-06-07 15:22:11,758] Trial 846 finished with value: -0.0003106750990644704 and parameters: {'learning_rate': 0.010521552802839552, 'sigma_multiplier': 0.9038830829920216, 'num_layers': 2, 'initialization_multiplier': 0.5671887727075212}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 846 final loss: -0.00031068
Trial 847:
  Learning Rate: 0.007734627868652664
  Sigma Multiplier: 1.003400104608978
  Initialization Multiplier: 0.47471782204881346
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.52it/s, loss=-0.000413, elapsed time=0.06, total time=10.6]
[I 2025-06-07 15:22:22,395] Trial 847 finished with value: -0.0004132845360144074 and parameters: {'learning_rate': 0.007734627868652664, 'sigma_multiplier': 1.003400104608978, 'num_layers': 2, 'initialization_multiplier': 0.47471782204881346}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 847 final loss: -0.00041328
Trial 848:
  Learning Rate: 0.008792192512637981
  Sigma Multiplier: 0.9685684866438995
  Initialization Multiplier: 0.5181964829504752
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.07it/s, loss=-0.000334, elapsed time=0.07, total time=10.3]
[I 2025-06-07 15:22:32,828] Trial 848 finished with value: -0.0003344337960470937 and parameters: {'learning_rate': 0.008792192512637981, 'sigma_multiplier': 0.9685684866438995, 'num_layers': 2, 'initialization_multiplier': 0.5181964829504752}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 848 final loss: -0.00033443
Trial 849:
  Learning Rate: 0.009649402363510098
  Sigma Multiplier: 1.2908914456198555
  Initialization Multiplier: 0.6058594025450772
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 17.09it/s, loss=-0.000403, elapsed time=0.06, total time=9.02]
[I 2025-06-07 15:22:41,905] Trial 849 finished with value: -0.0004030856463998481 and parameters: {'learning_rate': 0.009649402363510098, 'sigma_multiplier': 1.2908914456198555, 'num_layers': 2, 'initialization_multiplier': 0.6058594025450772}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 849 final loss: -0.00040309
Trial 850:
  Learning Rate: 0.004265151586372884
  Sigma Multiplier: 1.0338592219166138
  Initialization Multiplier: 0.5539155559084934
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 16.75it/s, loss=-0.000340, elapsed time=0.04, total time=9.21]
[I 2025-06-07 15:22:51,174] Trial 850 finished with value: -0.00034040557609208523 and parameters: {'learning_rate': 0.004265151586372884, 'sigma_multiplier': 1.0338592219166138, 'num_layers': 2, 'initialization_multiplier': 0.5539155559084934}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 850 final loss: -0.00034041
Trial 851:
  Learning Rate: 0.01215417498595026
  Sigma Multiplier: 1.3427645908282342
  Initialization Multiplier: 1.9583168082391618
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.55it/s, loss=-0.000318, elapsed time=0.06, total time=9.38]
[I 2025-06-07 15:23:00,609] Trial 851 finished with value: -0.0003183300423173059 and parameters: {'learning_rate': 0.01215417498595026, 'sigma_multiplier': 1.3427645908282342, 'num_layers': 2, 'initialization_multiplier': 1.9583168082391618}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 851 final loss: -0.00031833
Trial 852:
  Learning Rate: 0.006271125086301817
  Sigma Multiplier: 1.0849397376642655
  Initialization Multiplier: 0.6627677621626445
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.04it/s, loss=-0.000275, elapsed time=0.05, total time=10.2]
[I 2025-06-07 15:23:10,938] Trial 852 finished with value: -0.00027533377394962254 and parameters: {'learning_rate': 0.006271125086301817, 'sigma_multiplier': 1.0849397376642655, 'num_layers': 2, 'initialization_multiplier': 0.6627677621626445}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 852 final loss: -0.00027533
Trial 853:
  Learning Rate: 0.006996108896247611
  Sigma Multiplier: 0.8760033715162219
  Initialization Multiplier: 0.5216139659210145
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.27it/s, loss=-0.000323, elapsed time=0.05, total time=10.2]
[I 2025-06-07 15:23:21,180] Trial 853 finished with value: -0.00032308553101687275 and parameters: {'learning_rate': 0.006996108896247611, 'sigma_multiplier': 0.8760033715162219, 'num_layers': 2, 'initialization_multiplier': 0.5216139659210145}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 853 final loss: -0.00032309
Trial 854:
  Learning Rate: 0.010813753457915182
  Sigma Multiplier: 0.9443365322133761
  Initialization Multiplier: 0.47709539305378473
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.21it/s, loss=-0.000227, elapsed time=0.06, total time=10.2]
[I 2025-06-07 15:23:31,431] Trial 854 finished with value: -0.00022704667359183585 and parameters: {'learning_rate': 0.010813753457915182, 'sigma_multiplier': 0.9443365322133761, 'num_layers': 2, 'initialization_multiplier': 0.47709539305378473}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 854 final loss: -0.00022705
Trial 855:
  Learning Rate: 4.162986646309142e-05
  Sigma Multiplier: 1.0072043397712982
  Initialization Multiplier: 0.5797157998554141
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 18.00it/s, loss=0.036728, elapsed time=0.05, total time=8.62]
[I 2025-06-07 15:23:40,117] Trial 855 finished with value: 0.0367277757903798 and parameters: {'learning_rate': 4.162986646309142e-05, 'sigma_multiplier': 1.0072043397712982, 'num_layers': 1, 'initialization_multiplier': 0.5797157998554141}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 855 final loss: 0.03672778
Trial 856:
  Learning Rate: 0.007920001174565313
  Sigma Multiplier: 1.1596456890357103
  Initialization Multiplier: 0.6239912184497821
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 17.01it/s, loss=-0.000405, elapsed time=0.04, total time=9.11]
[I 2025-06-07 15:23:49,290] Trial 856 finished with value: -0.000405419781604074 and parameters: {'learning_rate': 0.007920001174565313, 'sigma_multiplier': 1.1596456890357103, 'num_layers': 2, 'initialization_multiplier': 0.6239912184497821}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 856 final loss: -0.00040542
Trial 857:
  Learning Rate: 0.013600934246771265
  Sigma Multiplier: 1.0576024759714164
  Initialization Multiplier: 0.7203023115055343
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.56it/s, loss=-0.000294, elapsed time=0.07, total time=10.5]
[I 2025-06-07 15:23:59,886] Trial 857 finished with value: -0.00029443182489815434 and parameters: {'learning_rate': 0.013600934246771265, 'sigma_multiplier': 1.0576024759714164, 'num_layers': 2, 'initialization_multiplier': 0.7203023115055343}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 857 final loss: -0.00029443
Trial 858:
  Learning Rate: 0.0052531971800577025
  Sigma Multiplier: 1.1097567541150857
  Initialization Multiplier: 0.5493822060071458
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.98it/s, loss=-0.000405, elapsed time=0.06, total time=9.72]
[I 2025-06-07 15:24:09,662] Trial 858 finished with value: -0.0004051159846155994 and parameters: {'learning_rate': 0.0052531971800577025, 'sigma_multiplier': 1.1097567541150857, 'num_layers': 2, 'initialization_multiplier': 0.5493822060071458}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 858 final loss: -0.00040512
Trial 859:
  Learning Rate: 0.00942733946974963
  Sigma Multiplier: 0.9735940343947829
  Initialization Multiplier: 0.45012051874996495
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.79it/s, loss=-0.000383, elapsed time=0.05, total time=9.87]
[I 2025-06-07 15:24:19,605] Trial 859 finished with value: -0.0003827417136394438 and parameters: {'learning_rate': 0.00942733946974963, 'sigma_multiplier': 0.9735940343947829, 'num_layers': 2, 'initialization_multiplier': 0.45012051874996495}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 859 final loss: -0.00038274
Trial 860:
  Learning Rate: 0.010985038525376157
  Sigma Multiplier: 1.0281926618001818
  Initialization Multiplier: 0.5006353236226444
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.78it/s, loss=-0.000378, elapsed time=0.06, total time=9.8] 
[I 2025-06-07 15:24:29,455] Trial 860 finished with value: -0.0003782726375078969 and parameters: {'learning_rate': 0.010985038525376157, 'sigma_multiplier': 1.0281926618001818, 'num_layers': 2, 'initialization_multiplier': 0.5006353236226444}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 860 final loss: -0.00037827
Trial 861:
  Learning Rate: 0.008773356866339016
  Sigma Multiplier: 0.9231798402055437
  Initialization Multiplier: 0.5913531104403473
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.95it/s, loss=-0.000359, elapsed time=0.06, total time=10.3]
[I 2025-06-07 15:24:39,818] Trial 861 finished with value: -0.0003586430736144315 and parameters: {'learning_rate': 0.008773356866339016, 'sigma_multiplier': 0.9231798402055437, 'num_layers': 2, 'initialization_multiplier': 0.5913531104403473}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 861 final loss: -0.00035864
Trial 862:
  Learning Rate: 0.005958920866962708
  Sigma Multiplier: 0.9990931553389228
  Initialization Multiplier: 0.6559665416129504
  Number of Layers: 3


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 12.85it/s, loss=-0.000233, elapsed time=0.04, total time=11.9]
[I 2025-06-07 15:24:51,844] Trial 862 finished with value: -0.00023295936737688033 and parameters: {'learning_rate': 0.005958920866962708, 'sigma_multiplier': 0.9990931553389228, 'num_layers': 3, 'initialization_multiplier': 0.6559665416129504}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 862 final loss: -0.00023296
Trial 863:
  Learning Rate: 0.0071990892065526444
  Sigma Multiplier: 1.075817161451557
  Initialization Multiplier: 0.5360392675027332
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.37it/s, loss=-0.000328, elapsed time=0.06, total time=9.95]
[I 2025-06-07 15:25:01,846] Trial 863 finished with value: -0.0003276131031546956 and parameters: {'learning_rate': 0.0071990892065526444, 'sigma_multiplier': 1.075817161451557, 'num_layers': 2, 'initialization_multiplier': 0.5360392675027332}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 863 final loss: -0.00032761
Trial 864:
  Learning Rate: 0.0037502002728355957
  Sigma Multiplier: 0.9456437587038968
  Initialization Multiplier: 0.6180331025164003
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.98it/s, loss=-0.000367, elapsed time=0.05, total time=10.3]
[I 2025-06-07 15:25:12,199] Trial 864 finished with value: -0.00036666202756581467 and parameters: {'learning_rate': 0.0037502002728355957, 'sigma_multiplier': 0.9456437587038968, 'num_layers': 2, 'initialization_multiplier': 0.6180331025164003}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 864 final loss: -0.00036666
Trial 865:
  Learning Rate: 0.0123972036555753
  Sigma Multiplier: 1.1316260690539939
  Initialization Multiplier: 0.7996207535465907
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.68it/s, loss=-0.000212, elapsed time=0.06, total time=9.89]
[I 2025-06-07 15:25:22,144] Trial 865 finished with value: -0.00021209678914594593 and parameters: {'learning_rate': 0.0123972036555753, 'sigma_multiplier': 1.1316260690539939, 'num_layers': 2, 'initialization_multiplier': 0.7996207535465907}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 865 final loss: -0.00021210
Trial 866:
  Learning Rate: 0.014170968056942861
  Sigma Multiplier: 1.0475193317181248
  Initialization Multiplier: 0.6874278740464024
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.35it/s, loss=-0.000189, elapsed time=0.06, total time=10.1]
[I 2025-06-07 15:25:32,278] Trial 866 finished with value: -0.00018850513744266914 and parameters: {'learning_rate': 0.014170968056942861, 'sigma_multiplier': 1.0475193317181248, 'num_layers': 2, 'initialization_multiplier': 0.6874278740464024}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 866 final loss: -0.00018851
Trial 867:
  Learning Rate: 0.0009538967976799997
  Sigma Multiplier: 1.2232139620925846
  Initialization Multiplier: 0.4827034075669615
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.09it/s, loss=0.001273, elapsed time=0.05, total time=9.61]
[I 2025-06-07 15:25:41,945] Trial 867 finished with value: 0.0012732041197662447 and parameters: {'learning_rate': 0.0009538967976799997, 'sigma_multiplier': 1.2232139620925846, 'num_layers': 2, 'initialization_multiplier': 0.4827034075669615}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 867 final loss: 0.00127320
Trial 868:
  Learning Rate: 0.008482541317569751
  Sigma Multiplier: 0.9877182555134906
  Initialization Multiplier: 0.5698337804795316
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.35it/s, loss=-0.000339, elapsed time=0.08, total time=10.1]
[I 2025-06-07 15:25:52,126] Trial 868 finished with value: -0.000338903456771053 and parameters: {'learning_rate': 0.008482541317569751, 'sigma_multiplier': 0.9877182555134906, 'num_layers': 2, 'initialization_multiplier': 0.5698337804795316}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 868 final loss: -0.00033890
Trial 869:
  Learning Rate: 0.010135154104855595
  Sigma Multiplier: 1.9961163161668949
  Initialization Multiplier: 0.6447230927171609
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 18.15it/s, loss=-0.000198, elapsed time=0.04, total time=8.5] 
[I 2025-06-07 15:26:00,681] Trial 869 finished with value: -0.00019771392059959658 and parameters: {'learning_rate': 0.010135154104855595, 'sigma_multiplier': 1.9961163161668949, 'num_layers': 2, 'initialization_multiplier': 0.6447230927171609}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 869 final loss: -0.00019771
Trial 870:
  Learning Rate: 0.0076134211287027355
  Sigma Multiplier: 0.649324378350064
  Initialization Multiplier: 0.517790383602111
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.30it/s, loss=-0.000148, elapsed time=0.06, total time=11.6]
[I 2025-06-07 15:26:12,373] Trial 870 finished with value: -0.00014796450675417727 and parameters: {'learning_rate': 0.0076134211287027355, 'sigma_multiplier': 0.649324378350064, 'num_layers': 2, 'initialization_multiplier': 0.517790383602111}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 870 final loss: -0.00014796
Trial 871:
  Learning Rate: 0.006292855936162343
  Sigma Multiplier: 1.0279509272182825
  Initialization Multiplier: 0.4487906720586336
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.11it/s, loss=-0.000409, elapsed time=0.05, total time=10.3]
[I 2025-06-07 15:26:22,732] Trial 871 finished with value: -0.00040924078157777223 and parameters: {'learning_rate': 0.006292855936162343, 'sigma_multiplier': 1.0279509272182825, 'num_layers': 2, 'initialization_multiplier': 0.4487906720586336}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 871 final loss: -0.00040924
Trial 872:
  Learning Rate: 0.004790131725339676
  Sigma Multiplier: 0.9597678250014812
  Initialization Multiplier: 0.5950419175784964
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.24it/s, loss=-0.000354, elapsed time=0.06, total time=10.2]
[I 2025-06-07 15:26:32,948] Trial 872 finished with value: -0.00035391021187498157 and parameters: {'learning_rate': 0.004790131725339676, 'sigma_multiplier': 0.9597678250014812, 'num_layers': 2, 'initialization_multiplier': 0.5950419175784964}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 872 final loss: -0.00035391
Trial 873:
  Learning Rate: 0.010989344357869434
  Sigma Multiplier: 0.8996238865169826
  Initialization Multiplier: 0.8542940223106296
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.94it/s, loss=0.000075, elapsed time=0.06, total time=11.1] 
[I 2025-06-07 15:26:44,127] Trial 873 finished with value: 7.51630424266834e-05 and parameters: {'learning_rate': 0.010989344357869434, 'sigma_multiplier': 0.8996238865169826, 'num_layers': 2, 'initialization_multiplier': 0.8542940223106296}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 873 final loss: 0.00007516
Trial 874:
  Learning Rate: 0.009369228676477886
  Sigma Multiplier: 1.086008516555585
  Initialization Multiplier: 0.5451958555010139
  Number of Layers: 1


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 18.99it/s, loss=-0.000218, elapsed time=0.05, total time=8.2] 
[I 2025-06-07 15:26:52,377] Trial 874 finished with value: -0.00021805249745972675 and parameters: {'learning_rate': 0.009369228676477886, 'sigma_multiplier': 1.086008516555585, 'num_layers': 1, 'initialization_multiplier': 0.5451958555010139}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 874 final loss: -0.00021805
Trial 875:
  Learning Rate: 0.012870845252440546
  Sigma Multiplier: 1.0006177173054331
  Initialization Multiplier: 0.5010337484406306
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.97it/s, loss=-0.000448, elapsed time=0.06, total time=10.3]
[I 2025-06-07 15:27:02,708] Trial 875 finished with value: -0.0004481912845556237 and parameters: {'learning_rate': 0.012870845252440546, 'sigma_multiplier': 1.0006177173054331, 'num_layers': 2, 'initialization_multiplier': 0.5010337484406306}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 875 final loss: -0.00044819
Trial 876:
  Learning Rate: 0.014100856472079974
  Sigma Multiplier: 0.2711901973659212
  Initialization Multiplier: 0.07939647113624981
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:13<00:00, 11.21it/s, loss=0.001533, elapsed time=0.09, total time=13.7]
[I 2025-06-07 15:27:16,438] Trial 876 finished with value: 0.0015332582932590696 and parameters: {'learning_rate': 0.014100856472079974, 'sigma_multiplier': 0.2711901973659212, 'num_layers': 2, 'initialization_multiplier': 0.07939647113624981}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 876 final loss: 0.00153326
Trial 877:
  Learning Rate: 0.012986205437375614
  Sigma Multiplier: 0.9267573190517251
  Initialization Multiplier: 0.4381666159628285
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.97it/s, loss=-0.000305, elapsed time=0.05, total time=10.3]
[I 2025-06-07 15:27:26,799] Trial 877 finished with value: -0.0003047809918173005 and parameters: {'learning_rate': 0.012986205437375614, 'sigma_multiplier': 0.9267573190517251, 'num_layers': 2, 'initialization_multiplier': 0.4381666159628285}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 877 final loss: -0.00030478
Trial 878:
  Learning Rate: 0.016235279278925326
  Sigma Multiplier: 0.9775130316668585
  Initialization Multiplier: 0.4843737259930274
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.67it/s, loss=-0.000252, elapsed time=0.05, total time=9.84]
[I 2025-06-07 15:27:36,706] Trial 878 finished with value: -0.00025211269682637696 and parameters: {'learning_rate': 0.016235279278925326, 'sigma_multiplier': 0.9775130316668585, 'num_layers': 2, 'initialization_multiplier': 0.4843737259930274}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 878 final loss: -0.00025211
Trial 879:
  Learning Rate: 0.011895633391484504
  Sigma Multiplier: 0.8599990594691641
  Initialization Multiplier: 0.4939471204379513
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.48it/s, loss=-0.000247, elapsed time=0.06, total time=10.7]
[I 2025-06-07 15:27:47,471] Trial 879 finished with value: -0.00024667800708552597 and parameters: {'learning_rate': 0.011895633391484504, 'sigma_multiplier': 0.8599990594691641, 'num_layers': 2, 'initialization_multiplier': 0.4939471204379513}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 879 final loss: -0.00024668
Trial 880:
  Learning Rate: 0.015400420088254197
  Sigma Multiplier: 0.9972923763486555
  Initialization Multiplier: 0.44901642579477363
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.10it/s, loss=-0.000321, elapsed time=0.06, total time=10.2]
[I 2025-06-07 15:27:57,717] Trial 880 finished with value: -0.00032076119792978874 and parameters: {'learning_rate': 0.015400420088254197, 'sigma_multiplier': 0.9972923763486555, 'num_layers': 2, 'initialization_multiplier': 0.44901642579477363}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 880 final loss: -0.00032076
Trial 881:
  Learning Rate: 0.013203785942741481
  Sigma Multiplier: 0.9515646962405133
  Initialization Multiplier: 0.5018010388828276
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.35it/s, loss=-0.000411, elapsed time=0.06, total time=10.8]
[I 2025-06-07 15:28:08,591] Trial 881 finished with value: -0.00041102578078599833 and parameters: {'learning_rate': 0.013203785942741481, 'sigma_multiplier': 0.9515646962405133, 'num_layers': 2, 'initialization_multiplier': 0.5018010388828276}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 881 final loss: -0.00041103
Trial 882:
  Learning Rate: 0.011070810749894086
  Sigma Multiplier: 1.015460056498316
  Initialization Multiplier: 0.5264423418872161
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.09it/s, loss=-0.000354, elapsed time=0.08, total time=11.1]
[I 2025-06-07 15:28:19,878] Trial 882 finished with value: -0.0003536386723851458 and parameters: {'learning_rate': 0.011070810749894086, 'sigma_multiplier': 1.015460056498316, 'num_layers': 2, 'initialization_multiplier': 0.5264423418872161}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 882 final loss: -0.00035364
Trial 883:
  Learning Rate: 0.014592426100122532
  Sigma Multiplier: 0.9137698665022642
  Initialization Multiplier: 0.47850487112488516
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.80it/s, loss=-0.000330, elapsed time=0.06, total time=10.4]
[I 2025-06-07 15:28:30,366] Trial 883 finished with value: -0.0003303609997519016 and parameters: {'learning_rate': 0.014592426100122532, 'sigma_multiplier': 0.9137698665022642, 'num_layers': 2, 'initialization_multiplier': 0.47850487112488516}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 883 final loss: -0.00033036
Trial 884:
  Learning Rate: 0.01015620800351391
  Sigma Multiplier: 0.9669890877471192
  Initialization Multiplier: 0.4287686596581082
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.45it/s, loss=-0.000357, elapsed time=0.09, total time=10.8]
[I 2025-06-07 15:28:41,246] Trial 884 finished with value: -0.00035659506903805335 and parameters: {'learning_rate': 0.01015620800351391, 'sigma_multiplier': 0.9669890877471192, 'num_layers': 2, 'initialization_multiplier': 0.4287686596581082}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 884 final loss: -0.00035660
Trial 885:
  Learning Rate: 0.012443696736264154
  Sigma Multiplier: 1.7282269625961844
  Initialization Multiplier: 0.5377971164844962
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 17.70it/s, loss=-0.000252, elapsed time=0.05, total time=8.8] 
[I 2025-06-07 15:28:50,103] Trial 885 finished with value: -0.00025196301352531044 and parameters: {'learning_rate': 0.012443696736264154, 'sigma_multiplier': 1.7282269625961844, 'num_layers': 2, 'initialization_multiplier': 0.5377971164844962}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 885 final loss: -0.00025196
Trial 886:
  Learning Rate: 0.009403560183163037
  Sigma Multiplier: 0.7834107295026518
  Initialization Multiplier: 0.5693111236579573
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.40it/s, loss=-0.000323, elapsed time=0.06, total time=10.7]
[I 2025-06-07 15:29:00,882] Trial 886 finished with value: -0.00032335919163625597 and parameters: {'learning_rate': 0.009403560183163037, 'sigma_multiplier': 0.7834107295026518, 'num_layers': 2, 'initialization_multiplier': 0.5693111236579573}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 886 final loss: -0.00032336
Trial 887:
  Learning Rate: 0.01163496528144572
  Sigma Multiplier: 0.8341260953755864
  Initialization Multiplier: 0.5105731363083474
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.73it/s, loss=-0.000193, elapsed time=0.05, total time=10.5]
[I 2025-06-07 15:29:11,435] Trial 887 finished with value: -0.00019344164474918292 and parameters: {'learning_rate': 0.01163496528144572, 'sigma_multiplier': 0.8341260953755864, 'num_layers': 2, 'initialization_multiplier': 0.5105731363083474}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 887 final loss: -0.00019344
Trial 888:
  Learning Rate: 0.018394430895587868
  Sigma Multiplier: 1.0019874895434708
  Initialization Multiplier: 0.48257277713625907
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.21it/s, loss=-0.000294, elapsed time=0.05, total time=10.2]
[I 2025-06-07 15:29:21,743] Trial 888 finished with value: -0.00029410256556239235 and parameters: {'learning_rate': 0.018394430895587868, 'sigma_multiplier': 1.0019874895434708, 'num_layers': 2, 'initialization_multiplier': 0.48257277713625907}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 888 final loss: -0.00029410
Trial 889:
  Learning Rate: 0.008571538354703223
  Sigma Multiplier: 0.8939740914247086
  Initialization Multiplier: 0.4523880399521599
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.67it/s, loss=-0.000273, elapsed time=0.06, total time=11.3]
[I 2025-06-07 15:29:33,093] Trial 889 finished with value: -0.00027306345785340876 and parameters: {'learning_rate': 0.008571538354703223, 'sigma_multiplier': 0.8939740914247086, 'num_layers': 2, 'initialization_multiplier': 0.4523880399521599}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 889 final loss: -0.00027306
Trial 890:
  Learning Rate: 0.010766751879575
  Sigma Multiplier: 0.9414376040750428
  Initialization Multiplier: 0.5537215456055115
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.76it/s, loss=-0.000339, elapsed time=0.05, total time=10.5]
[I 2025-06-07 15:29:43,615] Trial 890 finished with value: -0.0003385598271286038 and parameters: {'learning_rate': 0.010766751879575, 'sigma_multiplier': 0.9414376040750428, 'num_layers': 2, 'initialization_multiplier': 0.5537215456055115}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 890 final loss: -0.00033856
Trial 891:
  Learning Rate: 0.015023452664639657
  Sigma Multiplier: 1.0384018285387993
  Initialization Multiplier: 0.4187156387949576
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.89it/s, loss=-0.000410, elapsed time=0.05, total time=11.1]
[I 2025-06-07 15:29:54,786] Trial 891 finished with value: -0.00040964437674837053 and parameters: {'learning_rate': 0.015023452664639657, 'sigma_multiplier': 1.0384018285387993, 'num_layers': 2, 'initialization_multiplier': 0.4187156387949576}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 891 final loss: -0.00040964
Trial 892:
  Learning Rate: 0.012861249147713132
  Sigma Multiplier: 0.9866541388934394
  Initialization Multiplier: 0.5240143637964606
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.40it/s, loss=-0.000373, elapsed time=0.05, total time=10.9]
[I 2025-06-07 15:30:05,731] Trial 892 finished with value: -0.00037336717163644897 and parameters: {'learning_rate': 0.012861249147713132, 'sigma_multiplier': 0.9866541388934394, 'num_layers': 2, 'initialization_multiplier': 0.5240143637964606}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 892 final loss: -0.00037337
Trial 893:
  Learning Rate: 0.009629690171655434
  Sigma Multiplier: 1.0284625414731206
  Initialization Multiplier: 1.5754685413969611
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.74it/s, loss=-0.000078, elapsed time=0.07, total time=11.3]
[I 2025-06-07 15:30:17,205] Trial 893 finished with value: -7.80480199930482e-05 and parameters: {'learning_rate': 0.009629690171655434, 'sigma_multiplier': 1.0284625414731206, 'num_layers': 2, 'initialization_multiplier': 1.5754685413969611}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 893 final loss: -0.00007805
Trial 894:
  Learning Rate: 0.008014206084465002
  Sigma Multiplier: 0.9519527650326882
  Initialization Multiplier: 0.5745557596905667
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 13.96it/s, loss=-0.000262, elapsed time=0.06, total time=11]  
[I 2025-06-07 15:30:28,299] Trial 894 finished with value: -0.00026203423046639637 and parameters: {'learning_rate': 0.008014206084465002, 'sigma_multiplier': 0.9519527650326882, 'num_layers': 2, 'initialization_multiplier': 0.5745557596905667}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 894 final loss: -0.00026203
Trial 895:
  Learning Rate: 0.010830630031721161
  Sigma Multiplier: 0.9971089572296671
  Initialization Multiplier: 0.4690421986116034
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.97it/s, loss=-0.000352, elapsed time=0.06, total time=10.3]
[I 2025-06-07 15:30:38,629] Trial 895 finished with value: -0.00035225676535739417 and parameters: {'learning_rate': 0.010830630031721161, 'sigma_multiplier': 0.9971089572296671, 'num_layers': 2, 'initialization_multiplier': 0.4690421986116034}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 895 final loss: -0.00035226
Trial 896:
  Learning Rate: 0.013106988120117694
  Sigma Multiplier: 0.9170041145976252
  Initialization Multiplier: 0.5918969481283619
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.13it/s, loss=-0.000350, elapsed time=0.06, total time=10.2]
[I 2025-06-07 15:30:48,870] Trial 896 finished with value: -0.0003503719996958127 and parameters: {'learning_rate': 0.013106988120117694, 'sigma_multiplier': 0.9170041145976252, 'num_layers': 2, 'initialization_multiplier': 0.5918969481283619}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 896 final loss: -0.00035037
Trial 897:
  Learning Rate: 0.008745426680665594
  Sigma Multiplier: 1.0551075018128733
  Initialization Multiplier: 0.5212140619604206
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.68it/s, loss=-0.000327, elapsed time=0.07, total time=10.5]
[I 2025-06-07 15:30:59,446] Trial 897 finished with value: -0.0003273238494479227 and parameters: {'learning_rate': 0.008745426680665594, 'sigma_multiplier': 1.0551075018128733, 'num_layers': 2, 'initialization_multiplier': 0.5212140619604206}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 897 final loss: -0.00032732
Trial 898:
  Learning Rate: 0.016701930241651494
  Sigma Multiplier: 0.9807242652282391
  Initialization Multiplier: 0.5604241230559934
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.16it/s, loss=-0.000444, elapsed time=0.04, total time=10.2]
[I 2025-06-07 15:31:09,728] Trial 898 finished with value: -0.00044396674507511614 and parameters: {'learning_rate': 0.016701930241651494, 'sigma_multiplier': 0.9807242652282391, 'num_layers': 2, 'initialization_multiplier': 0.5604241230559934}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 898 final loss: -0.00044397
Trial 899:
  Learning Rate: 0.0181367593283496
  Sigma Multiplier: 0.95892849255308
  Initialization Multiplier: 0.6021102932630022
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.11it/s, loss=-0.000362, elapsed time=0.05, total time=10.9]
[I 2025-06-07 15:31:20,765] Trial 899 finished with value: -0.0003617528086461154 and parameters: {'learning_rate': 0.0181367593283496, 'sigma_multiplier': 0.95892849255308, 'num_layers': 2, 'initialization_multiplier': 0.6021102932630022}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 899 final loss: -0.00036175
Trial 900:
  Learning Rate: 0.020779277809807623
  Sigma Multiplier: 0.8747466119988282
  Initialization Multiplier: 0.5570712346793113
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.57it/s, loss=-0.000280, elapsed time=0.07, total time=10.6]
[I 2025-06-07 15:31:31,424] Trial 900 finished with value: -0.0002797819015391951 and parameters: {'learning_rate': 0.020779277809807623, 'sigma_multiplier': 0.8747466119988282, 'num_layers': 2, 'initialization_multiplier': 0.5570712346793113}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 900 final loss: -0.00027978
Trial 901:
  Learning Rate: 0.018866974786069312
  Sigma Multiplier: 1.018875063661493
  Initialization Multiplier: 0.625388869225922
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.07it/s, loss=-0.000337, elapsed time=0.05, total time=10.3]
[I 2025-06-07 15:31:41,816] Trial 901 finished with value: -0.000337219959187835 and parameters: {'learning_rate': 0.018866974786069312, 'sigma_multiplier': 1.018875063661493, 'num_layers': 2, 'initialization_multiplier': 0.625388869225922}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 901 final loss: -0.00033722
Trial 902:
  Learning Rate: 0.01666111719012241
  Sigma Multiplier: 1.0979909105109833
  Initialization Multiplier: 0.5045144132380744
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.07it/s, loss=-0.000383, elapsed time=0.07, total time=10.2]
[I 2025-06-07 15:31:52,136] Trial 902 finished with value: -0.0003834601094627151 and parameters: {'learning_rate': 0.01666111719012241, 'sigma_multiplier': 1.0979909105109833, 'num_layers': 2, 'initialization_multiplier': 0.5045144132380744}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 902 final loss: -0.00038346
Trial 903:
  Learning Rate: 0.015148606255129276
  Sigma Multiplier: 0.9355200955629763
  Initialization Multiplier: 0.4049166860016499
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.39it/s, loss=-0.000323, elapsed time=0.04, total time=10.7]
[I 2025-06-07 15:32:02,872] Trial 903 finished with value: -0.000323248349344318 and parameters: {'learning_rate': 0.015148606255129276, 'sigma_multiplier': 0.9355200955629763, 'num_layers': 2, 'initialization_multiplier': 0.4049166860016499}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 903 final loss: -0.00032325
Trial 904:
  Learning Rate: 0.021166126345694397
  Sigma Multiplier: 1.1718179352493119
  Initialization Multiplier: 0.5877925388547495
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.57it/s, loss=-0.000322, elapsed time=0.07, total time=10]  
[I 2025-06-07 15:32:12,929] Trial 904 finished with value: -0.00032229334648704524 and parameters: {'learning_rate': 0.021166126345694397, 'sigma_multiplier': 1.1718179352493119, 'num_layers': 2, 'initialization_multiplier': 0.5877925388547495}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 904 final loss: -0.00032229
Trial 905:
  Learning Rate: 0.015101874121313273
  Sigma Multiplier: 1.049996399160759
  Initialization Multiplier: 0.5528313627996039
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.38it/s, loss=-0.000303, elapsed time=0.09, total time=10.8]
[I 2025-06-07 15:32:23,760] Trial 905 finished with value: -0.0003030191372212348 and parameters: {'learning_rate': 0.015101874121313273, 'sigma_multiplier': 1.049996399160759, 'num_layers': 2, 'initialization_multiplier': 0.5528313627996039}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 905 final loss: -0.00030302
Trial 906:
  Learning Rate: 0.02628299947449508
  Sigma Multiplier: 0.9831627605096884
  Initialization Multiplier: 0.4929451207737733
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.01it/s, loss=-0.000328, elapsed time=0.05, total time=11.1]
[I 2025-06-07 15:32:34,885] Trial 906 finished with value: -0.000328106051868223 and parameters: {'learning_rate': 0.02628299947449508, 'sigma_multiplier': 0.9831627605096884, 'num_layers': 2, 'initialization_multiplier': 0.4929451207737733}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 906 final loss: -0.00032811
Trial 907:
  Learning Rate: 0.024339165668863558
  Sigma Multiplier: 1.018142709067512
  Initialization Multiplier: 0.4554139377222715
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.30it/s, loss=-0.000307, elapsed time=0.08, total time=11.6]
[I 2025-06-07 15:32:46,558] Trial 907 finished with value: -0.0003071638661908237 and parameters: {'learning_rate': 0.024339165668863558, 'sigma_multiplier': 1.018142709067512, 'num_layers': 2, 'initialization_multiplier': 0.4554139377222715}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 907 final loss: -0.00030716
Trial 908:
  Learning Rate: 0.016417903532985768
  Sigma Multiplier: 0.89917347077956
  Initialization Multiplier: 0.628534496572215
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.35it/s, loss=-0.000408, elapsed time=0.06, total time=10.8]
[I 2025-06-07 15:32:57,524] Trial 908 finished with value: -0.0004076141116000268 and parameters: {'learning_rate': 0.016417903532985768, 'sigma_multiplier': 0.89917347077956, 'num_layers': 2, 'initialization_multiplier': 0.628534496572215}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 908 final loss: -0.00040761
Trial 909:
  Learning Rate: 0.01799936461575598
  Sigma Multiplier: 1.0762338039430908
  Initialization Multiplier: 0.5380043027797428
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.83it/s, loss=-0.000360, elapsed time=0.05, total time=10.4]
[I 2025-06-07 15:33:07,971] Trial 909 finished with value: -0.0003602155314514124 and parameters: {'learning_rate': 0.01799936461575598, 'sigma_multiplier': 1.0762338039430908, 'num_layers': 2, 'initialization_multiplier': 0.5380043027797428}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 909 final loss: -0.00036022
Trial 910:
  Learning Rate: 0.02148842596722561
  Sigma Multiplier: 0.9706394222151868
  Initialization Multiplier: 0.6152914878393387
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.10it/s, loss=-0.000344, elapsed time=0.06, total time=11]  
[I 2025-06-07 15:33:18,990] Trial 910 finished with value: -0.00034421380783799984 and parameters: {'learning_rate': 0.02148842596722561, 'sigma_multiplier': 0.9706394222151868, 'num_layers': 2, 'initialization_multiplier': 0.6152914878393387}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 910 final loss: -0.00034421
Trial 911:
  Learning Rate: 0.014287950120440339
  Sigma Multiplier: 1.1223172626075828
  Initialization Multiplier: 0.5770531104536109
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.65it/s, loss=-0.000352, elapsed time=0.07, total time=9.92]
[I 2025-06-07 15:33:28,972] Trial 911 finished with value: -0.00035152345741242165 and parameters: {'learning_rate': 0.014287950120440339, 'sigma_multiplier': 1.1223172626075828, 'num_layers': 2, 'initialization_multiplier': 0.5770531104536109}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 911 final loss: -0.00035152
Trial 912:
  Learning Rate: 0.012722937381049872
  Sigma Multiplier: 1.0482990558608623
  Initialization Multiplier: 0.5075927403099767
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.01it/s, loss=-0.000385, elapsed time=0.08, total time=10.3]
[I 2025-06-07 15:33:39,322] Trial 912 finished with value: -0.0003853433296202336 and parameters: {'learning_rate': 0.012722937381049872, 'sigma_multiplier': 1.0482990558608623, 'num_layers': 2, 'initialization_multiplier': 0.5075927403099767}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 912 final loss: -0.00038534
Trial 913:
  Learning Rate: 0.015873766798478635
  Sigma Multiplier: 1.0009845048530934
  Initialization Multiplier: 0.44097276176170963
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.55it/s, loss=-0.000298, elapsed time=0.07, total time=9.95]
[I 2025-06-07 15:33:49,338] Trial 913 finished with value: -0.00029780526822886387 and parameters: {'learning_rate': 0.015873766798478635, 'sigma_multiplier': 1.0009845048530934, 'num_layers': 2, 'initialization_multiplier': 0.44097276176170963}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 913 final loss: -0.00029781
Trial 914:
  Learning Rate: 0.01255451930680294
  Sigma Multiplier: 0.9408040684445463
  Initialization Multiplier: 0.6645064417667299
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.45it/s, loss=-0.000367, elapsed time=0.07, total time=10.7]
[I 2025-06-07 15:34:00,088] Trial 914 finished with value: -0.0003669722532810535 and parameters: {'learning_rate': 0.01255451930680294, 'sigma_multiplier': 0.9408040684445463, 'num_layers': 2, 'initialization_multiplier': 0.6645064417667299}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 914 final loss: -0.00036697
Trial 915:
  Learning Rate: 0.011177115921385292
  Sigma Multiplier: 1.0915672921417536
  Initialization Multiplier: 0.584599548089443
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.27it/s, loss=-0.000349, elapsed time=0.05, total time=10.9]
[I 2025-06-07 15:34:11,069] Trial 915 finished with value: -0.0003492862273347694 and parameters: {'learning_rate': 0.011177115921385292, 'sigma_multiplier': 1.0915672921417536, 'num_layers': 2, 'initialization_multiplier': 0.584599548089443}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 915 final loss: -0.00034929
Trial 916:
  Learning Rate: 0.014150660703514858
  Sigma Multiplier: 1.0249355840507206
  Initialization Multiplier: 0.5492078980980982
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.46it/s, loss=-0.000455, elapsed time=0.05, total time=10.6]
[I 2025-06-07 15:34:21,773] Trial 916 finished with value: -0.00045526134166227783 and parameters: {'learning_rate': 0.014150660703514858, 'sigma_multiplier': 1.0249355840507206, 'num_layers': 2, 'initialization_multiplier': 0.5492078980980982}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 916 final loss: -0.00045526
Trial 917:
  Learning Rate: 0.019872037916942194
  Sigma Multiplier: 1.0544304867972634
  Initialization Multiplier: 0.5645357670232406
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.14it/s, loss=-0.000398, elapsed time=0.05, total time=10.2]
[I 2025-06-07 15:34:32,025] Trial 917 finished with value: -0.00039799478268703014 and parameters: {'learning_rate': 0.019872037916942194, 'sigma_multiplier': 1.0544304867972634, 'num_layers': 2, 'initialization_multiplier': 0.5645357670232406}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 917 final loss: -0.00039799
Trial 918:
  Learning Rate: 0.07152203562761499
  Sigma Multiplier: 1.1370692456152014
  Initialization Multiplier: 0.6464278558252133
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.77it/s, loss=-0.000128, elapsed time=0.06, total time=9.8] 
[I 2025-06-07 15:34:41,893] Trial 918 finished with value: -0.00012790718399064888 and parameters: {'learning_rate': 0.07152203562761499, 'sigma_multiplier': 1.1370692456152014, 'num_layers': 2, 'initialization_multiplier': 0.6464278558252133}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 918 final loss: -0.00012791
Trial 919:
  Learning Rate: 0.023406228901557583
  Sigma Multiplier: 1.0822511984885006
  Initialization Multiplier: 0.6148335941971423
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.21it/s, loss=-0.000325, elapsed time=0.06, total time=9.56]
[I 2025-06-07 15:34:51,509] Trial 919 finished with value: -0.000324705286995425 and parameters: {'learning_rate': 0.023406228901557583, 'sigma_multiplier': 1.0822511984885006, 'num_layers': 2, 'initialization_multiplier': 0.6148335941971423}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 919 final loss: -0.00032471
Trial 920:
  Learning Rate: 0.01668063796923407
  Sigma Multiplier: 1.034342610187125
  Initialization Multiplier: 0.5440282722725677
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.21it/s, loss=-0.000349, elapsed time=0.06, total time=9.57]
[I 2025-06-07 15:35:01,144] Trial 920 finished with value: -0.0003486043190627338 and parameters: {'learning_rate': 0.01668063796923407, 'sigma_multiplier': 1.034342610187125, 'num_layers': 2, 'initialization_multiplier': 0.5440282722725677}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 920 final loss: -0.00034860
Trial 921:
  Learning Rate: 0.018981020281713445
  Sigma Multiplier: 1.0697888121466672
  Initialization Multiplier: 0.5881466079266812
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.24it/s, loss=-0.000402, elapsed time=0.05, total time=9.47]
[I 2025-06-07 15:35:10,669] Trial 921 finished with value: -0.00040174140546546367 and parameters: {'learning_rate': 0.018981020281713445, 'sigma_multiplier': 1.0697888121466672, 'num_layers': 2, 'initialization_multiplier': 0.5881466079266812}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 921 final loss: -0.00040174
Trial 922:
  Learning Rate: 0.01461867939018723
  Sigma Multiplier: 1.1112836375119095
  Initialization Multiplier: 0.5355764781609483
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.63it/s, loss=-0.000367, elapsed time=0.05, total time=9.28]
[I 2025-06-07 15:35:20,014] Trial 922 finished with value: -0.00036650991876149586 and parameters: {'learning_rate': 0.01461867939018723, 'sigma_multiplier': 1.1112836375119095, 'num_layers': 2, 'initialization_multiplier': 0.5355764781609483}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 922 final loss: -0.00036651
Trial 923:
  Learning Rate: 0.01720452827707901
  Sigma Multiplier: 1.0274474345438696
  Initialization Multiplier: 0.6224536103225422
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.64it/s, loss=-0.000360, elapsed time=0.06, total time=9.28]
[I 2025-06-07 15:35:29,355] Trial 923 finished with value: -0.0003601874300674179 and parameters: {'learning_rate': 0.01720452827707901, 'sigma_multiplier': 1.0274474345438696, 'num_layers': 2, 'initialization_multiplier': 0.6224536103225422}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 923 final loss: -0.00036019
Trial 924:
  Learning Rate: 0.01465440338560261
  Sigma Multiplier: 1.1901781965602938
  Initialization Multiplier: 0.556384801589809
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.64it/s, loss=-0.000330, elapsed time=0.05, total time=9.27]
[I 2025-06-07 15:35:38,673] Trial 924 finished with value: -0.00032959209102462027 and parameters: {'learning_rate': 0.01465440338560261, 'sigma_multiplier': 1.1901781965602938, 'num_layers': 2, 'initialization_multiplier': 0.556384801589809}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 924 final loss: -0.00032959
Trial 925:
  Learning Rate: 0.012641387799938248
  Sigma Multiplier: 1.0164752903461765
  Initialization Multiplier: 0.5774243003892772
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.82it/s, loss=-0.000400, elapsed time=0.05, total time=9.77]
[I 2025-06-07 15:35:48,500] Trial 925 finished with value: -0.0003995940776527892 and parameters: {'learning_rate': 0.012641387799938248, 'sigma_multiplier': 1.0164752903461765, 'num_layers': 2, 'initialization_multiplier': 0.5774243003892772}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 925 final loss: -0.00039959
Trial 926:
  Learning Rate: 0.01378865499180923
  Sigma Multiplier: 0.7469599284970533
  Initialization Multiplier: 0.6781810520720621
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.49it/s, loss=-0.000153, elapsed time=0.06, total time=10.7]
[I 2025-06-07 15:35:59,201] Trial 926 finished with value: -0.0001527291363867408 and parameters: {'learning_rate': 0.01378865499180923, 'sigma_multiplier': 0.7469599284970533, 'num_layers': 2, 'initialization_multiplier': 0.6781810520720621}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 926 final loss: -0.00015273
Trial 927:
  Learning Rate: 0.017441781365808023
  Sigma Multiplier: 1.1030742089603158
  Initialization Multiplier: 0.5063508286090121
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.43it/s, loss=-0.000321, elapsed time=0.05, total time=9.39]
[I 2025-06-07 15:36:08,649] Trial 927 finished with value: -0.00032077787049097255 and parameters: {'learning_rate': 0.017441781365808023, 'sigma_multiplier': 1.1030742089603158, 'num_layers': 2, 'initialization_multiplier': 0.5063508286090121}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 927 final loss: -0.00032078
Trial 928:
  Learning Rate: 0.013360148141917525
  Sigma Multiplier: 1.0531127733300065
  Initialization Multiplier: 0.6078770848431574
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.03it/s, loss=-0.000434, elapsed time=0.04, total time=9.64]
[I 2025-06-07 15:36:18,353] Trial 928 finished with value: -0.00043447373792806213 and parameters: {'learning_rate': 0.013360148141917525, 'sigma_multiplier': 1.0531127733300065, 'num_layers': 2, 'initialization_multiplier': 0.6078770848431574}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 928 final loss: -0.00043447
Trial 929:
  Learning Rate: 0.015466905748585212
  Sigma Multiplier: 1.1421531346292042
  Initialization Multiplier: 0.6411014752245134
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.06it/s, loss=-0.000365, elapsed time=0.05, total time=9.67]
[I 2025-06-07 15:36:28,137] Trial 929 finished with value: -0.00036462786788118057 and parameters: {'learning_rate': 0.015466905748585212, 'sigma_multiplier': 1.1421531346292042, 'num_layers': 2, 'initialization_multiplier': 0.6411014752245134}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 929 final loss: -0.00036463
Trial 930:
  Learning Rate: 0.020845388843452054
  Sigma Multiplier: 1.0146923311110159
  Initialization Multiplier: 0.5269387158508204
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.67it/s, loss=-0.000276, elapsed time=0.06, total time=9.89]
[I 2025-06-07 15:36:38,090] Trial 930 finished with value: -0.00027631842967631726 and parameters: {'learning_rate': 0.020845388843452054, 'sigma_multiplier': 1.0146923311110159, 'num_layers': 2, 'initialization_multiplier': 0.5269387158508204}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 930 final loss: -0.00027632
Trial 931:
  Learning Rate: 0.012344477719793467
  Sigma Multiplier: 1.0742807407437787
  Initialization Multiplier: 0.583186121203068
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.03it/s, loss=-0.000278, elapsed time=0.05, total time=9.63]
[I 2025-06-07 15:36:47,773] Trial 931 finished with value: -0.00027817115787841007 and parameters: {'learning_rate': 0.012344477719793467, 'sigma_multiplier': 1.0742807407437787, 'num_layers': 2, 'initialization_multiplier': 0.583186121203068}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 931 final loss: -0.00027817
Trial 932:
  Learning Rate: 0.006957258492456497
  Sigma Multiplier: 0.515049226256023
  Initialization Multiplier: 0.5436912949205639
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.09it/s, loss=0.000280, elapsed time=0.07, total time=11.8]
[I 2025-06-07 15:36:59,631] Trial 932 finished with value: 0.00027981363241464275 and parameters: {'learning_rate': 0.006957258492456497, 'sigma_multiplier': 0.515049226256023, 'num_layers': 2, 'initialization_multiplier': 0.5436912949205639}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 932 final loss: 0.00027981
Trial 933:
  Learning Rate: 0.011046624496748257
  Sigma Multiplier: 1.4467544613439256
  Initialization Multiplier: 0.5036960120135004
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 17.92it/s, loss=-0.000330, elapsed time=0.05, total time=8.62]
[I 2025-06-07 15:37:08,310] Trial 933 finished with value: -0.00032969003085628616 and parameters: {'learning_rate': 0.011046624496748257, 'sigma_multiplier': 1.4467544613439256, 'num_layers': 2, 'initialization_multiplier': 0.5036960120135004}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 933 final loss: -0.00032969
Trial 934:
  Learning Rate: 0.0001484693387230326
  Sigma Multiplier: 1.0037940063871302
  Initialization Multiplier: 0.6135786494186535
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.95it/s, loss=0.067598, elapsed time=0.05, total time=9.74]
[I 2025-06-07 15:37:18,105] Trial 934 finished with value: 0.06759807654600454 and parameters: {'learning_rate': 0.0001484693387230326, 'sigma_multiplier': 1.0037940063871302, 'num_layers': 2, 'initialization_multiplier': 0.6135786494186535}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 934 final loss: 0.06759808
Trial 935:
  Learning Rate: 0.007663918178072212
  Sigma Multiplier: 0.9920590392332251
  Initialization Multiplier: 0.6595943931666747
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.98it/s, loss=-0.000424, elapsed time=0.06, total time=10.3]
[I 2025-06-07 15:37:28,480] Trial 935 finished with value: -0.00042382464912207407 and parameters: {'learning_rate': 0.007663918178072212, 'sigma_multiplier': 0.9920590392332251, 'num_layers': 2, 'initialization_multiplier': 0.6595943931666747}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 935 final loss: -0.00042382
Trial 936:
  Learning Rate: 0.010537656080535631
  Sigma Multiplier: 1.0470205696554664
  Initialization Multiplier: 0.5648442186902444
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.76it/s, loss=-0.000422, elapsed time=0.06, total time=9.97]
[I 2025-06-07 15:37:38,537] Trial 936 finished with value: -0.00042195025048515615 and parameters: {'learning_rate': 0.010537656080535631, 'sigma_multiplier': 1.0470205696554664, 'num_layers': 2, 'initialization_multiplier': 0.5648442186902444}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 936 final loss: -0.00042195
Trial 937:
  Learning Rate: 0.014444794202249077
  Sigma Multiplier: 1.1236606138535108
  Initialization Multiplier: 0.493319839944981
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 16.97it/s, loss=-0.000268, elapsed time=0.06, total time=9.14]
[I 2025-06-07 15:37:47,746] Trial 937 finished with value: -0.00026815316931761487 and parameters: {'learning_rate': 0.014444794202249077, 'sigma_multiplier': 1.1236606138535108, 'num_layers': 2, 'initialization_multiplier': 0.493319839944981}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 937 final loss: -0.00026815
Trial 938:
  Learning Rate: 0.017042261851075487
  Sigma Multiplier: 0.9840912164237817
  Initialization Multiplier: 0.5385492025561593
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.42it/s, loss=-0.000349, elapsed time=0.05, total time=9.42]
[I 2025-06-07 15:37:57,221] Trial 938 finished with value: -0.0003489551491799645 and parameters: {'learning_rate': 0.017042261851075487, 'sigma_multiplier': 0.9840912164237817, 'num_layers': 2, 'initialization_multiplier': 0.5385492025561593}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 938 final loss: -0.00034896
Trial 939:
  Learning Rate: 0.01227539096381832
  Sigma Multiplier: 1.0766193138341478
  Initialization Multiplier: 0.6961498670802553
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.50it/s, loss=-0.000200, elapsed time=0.04, total time=9.98]
[I 2025-06-07 15:38:07,274] Trial 939 finished with value: -0.0002004877235389071 and parameters: {'learning_rate': 0.01227539096381832, 'sigma_multiplier': 1.0766193138341478, 'num_layers': 2, 'initialization_multiplier': 0.6961498670802553}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 939 final loss: -0.00020049
Trial 940:
  Learning Rate: 0.009438866936721836
  Sigma Multiplier: 1.04013507470643
  Initialization Multiplier: 0.6039888242680019
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.98it/s, loss=-0.000404, elapsed time=0.06, total time=9.76]
[I 2025-06-07 15:38:17,148] Trial 940 finished with value: -0.000403716619641213 and parameters: {'learning_rate': 0.009438866936721836, 'sigma_multiplier': 1.04013507470643, 'num_layers': 2, 'initialization_multiplier': 0.6039888242680019}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 940 final loss: -0.00040372
Trial 941:
  Learning Rate: 0.006693583363702875
  Sigma Multiplier: 0.9651290492660097
  Initialization Multiplier: 0.47084943304758886
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.15it/s, loss=-0.000368, elapsed time=0.05, total time=9.57]
[I 2025-06-07 15:38:26,781] Trial 941 finished with value: -0.00036773594083570806 and parameters: {'learning_rate': 0.006693583363702875, 'sigma_multiplier': 0.9651290492660097, 'num_layers': 2, 'initialization_multiplier': 0.47084943304758886}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 941 final loss: -0.00036774
Trial 942:
  Learning Rate: 0.011116273823285856
  Sigma Multiplier: 1.1661232675376574
  Initialization Multiplier: 0.6416931370424532
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.45it/s, loss=-0.000378, elapsed time=0.05, total time=9.39]
[I 2025-06-07 15:38:36,237] Trial 942 finished with value: -0.00037771835708315794 and parameters: {'learning_rate': 0.011116273823285856, 'sigma_multiplier': 1.1661232675376574, 'num_layers': 2, 'initialization_multiplier': 0.6416931370424532}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 942 final loss: -0.00037772
Trial 943:
  Learning Rate: 0.030425042427697023
  Sigma Multiplier: 1.0170648204236776
  Initialization Multiplier: 0.5752449246158906
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.60it/s, loss=-0.000398, elapsed time=0.05, total time=9.31]
[I 2025-06-07 15:38:45,603] Trial 943 finished with value: -0.00039763247160838855 and parameters: {'learning_rate': 0.030425042427697023, 'sigma_multiplier': 1.0170648204236776, 'num_layers': 2, 'initialization_multiplier': 0.5752449246158906}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 943 final loss: -0.00039763
Trial 944:
  Learning Rate: 0.007967531311995063
  Sigma Multiplier: 1.1109361253580574
  Initialization Multiplier: 0.5252057457570672
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 17.78it/s, loss=-0.000343, elapsed time=0.04, total time=8.69]
[I 2025-06-07 15:38:54,376] Trial 944 finished with value: -0.00034285263724945626 and parameters: {'learning_rate': 0.007967531311995063, 'sigma_multiplier': 1.1109361253580574, 'num_layers': 2, 'initialization_multiplier': 0.5252057457570672}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 944 final loss: -0.00034285
Trial 945:
  Learning Rate: 0.01395434235935516
  Sigma Multiplier: 1.0758837447520766
  Initialization Multiplier: 0.5939847496348638
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.99it/s, loss=-0.000432, elapsed time=0.06, total time=10.4]
[I 2025-06-07 15:39:04,887] Trial 945 finished with value: -0.0004323944052831441 and parameters: {'learning_rate': 0.01395434235935516, 'sigma_multiplier': 1.0758837447520766, 'num_layers': 2, 'initialization_multiplier': 0.5939847496348638}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 945 final loss: -0.00043239
Trial 946:
  Learning Rate: 0.009638491452377074
  Sigma Multiplier: 0.984579482183042
  Initialization Multiplier: 0.6395291556032228
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.40it/s, loss=-0.000366, elapsed time=0.05, total time=10.7]
[I 2025-06-07 15:39:15,656] Trial 946 finished with value: -0.0003664023171585835 and parameters: {'learning_rate': 0.009638491452377074, 'sigma_multiplier': 0.984579482183042, 'num_layers': 2, 'initialization_multiplier': 0.6395291556032228}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 946 final loss: -0.00036640
Trial 947:
  Learning Rate: 0.005975486851035149
  Sigma Multiplier: 1.0340900893265703
  Initialization Multiplier: 0.47814715508193856
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.04it/s, loss=-0.000363, elapsed time=0.05, total time=10.3]
[I 2025-06-07 15:39:26,039] Trial 947 finished with value: -0.00036258801251993876 and parameters: {'learning_rate': 0.005975486851035149, 'sigma_multiplier': 1.0340900893265703, 'num_layers': 2, 'initialization_multiplier': 0.47814715508193856}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 947 final loss: -0.00036259
Trial 948:
  Learning Rate: 0.007595871540359409
  Sigma Multiplier: 0.9665641760824144
  Initialization Multiplier: 0.5409679667261157
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.59it/s, loss=-0.000372, elapsed time=0.07, total time=10.5]
[I 2025-06-07 15:39:36,588] Trial 948 finished with value: -0.0003716224636718591 and parameters: {'learning_rate': 0.007595871540359409, 'sigma_multiplier': 0.9665641760824144, 'num_layers': 2, 'initialization_multiplier': 0.5409679667261157}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 948 final loss: -0.00037162
Trial 949:
  Learning Rate: 0.001546284224046698
  Sigma Multiplier: 0.3543045777257604
  Initialization Multiplier: 0.683302140244652
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:13<00:00, 11.52it/s, loss=0.002882, elapsed time=0.1, total time=13.3] 
[I 2025-06-07 15:39:49,965] Trial 949 finished with value: 0.0028819624862078553 and parameters: {'learning_rate': 0.001546284224046698, 'sigma_multiplier': 0.3543045777257604, 'num_layers': 2, 'initialization_multiplier': 0.683302140244652}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 949 final loss: 0.00288196
Trial 950:
  Learning Rate: 0.012039219453374213
  Sigma Multiplier: 1.0971585166790954
  Initialization Multiplier: 0.5852451675379516
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.55it/s, loss=-0.000376, elapsed time=0.08, total time=9.96]
[I 2025-06-07 15:39:59,979] Trial 950 finished with value: -0.0003758975389242505 and parameters: {'learning_rate': 0.012039219453374213, 'sigma_multiplier': 1.0971585166790954, 'num_layers': 2, 'initialization_multiplier': 0.5852451675379516}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 950 final loss: -0.00037590
Trial 951:
  Learning Rate: 0.015893129811290697
  Sigma Multiplier: 1.0428923696585009
  Initialization Multiplier: 0.501175259434903
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.35it/s, loss=-0.000346, elapsed time=0.06, total time=10.1]
[I 2025-06-07 15:40:10,150] Trial 951 finished with value: -0.00034628412610587854 and parameters: {'learning_rate': 0.015893129811290697, 'sigma_multiplier': 1.0428923696585009, 'num_layers': 2, 'initialization_multiplier': 0.501175259434903}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 951 final loss: -0.00034628
Trial 952:
  Learning Rate: 0.008603520944606664
  Sigma Multiplier: 1.0036593595993968
  Initialization Multiplier: 0.5501228166870145
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:10<00:00, 14.59it/s, loss=-0.000284, elapsed time=0.06, total time=10.7]
[I 2025-06-07 15:40:20,988] Trial 952 finished with value: -0.0002836667672844112 and parameters: {'learning_rate': 0.008603520944606664, 'sigma_multiplier': 1.0036593595993968, 'num_layers': 2, 'initialization_multiplier': 0.5501228166870145}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 952 final loss: -0.00028367
Trial 953:
  Learning Rate: 0.018465621806595195
  Sigma Multiplier: 1.1496533011859917
  Initialization Multiplier: 0.6084226145793199
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 16.72it/s, loss=-0.000405, elapsed time=0.05, total time=9.26]
[I 2025-06-07 15:40:30,308] Trial 953 finished with value: -0.00040510285179630413 and parameters: {'learning_rate': 0.018465621806595195, 'sigma_multiplier': 1.1496533011859917, 'num_layers': 2, 'initialization_multiplier': 0.6084226145793199}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 953 final loss: -0.00040510
Trial 954:
  Learning Rate: 0.006849503166090917
  Sigma Multiplier: 0.933161820703618
  Initialization Multiplier: 0.48126203572807236
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.67it/s, loss=-0.000362, elapsed time=0.05, total time=9.81]
[I 2025-06-07 15:40:40,174] Trial 954 finished with value: -0.00036222075413327364 and parameters: {'learning_rate': 0.006849503166090917, 'sigma_multiplier': 0.933161820703618, 'num_layers': 2, 'initialization_multiplier': 0.48126203572807236}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 954 final loss: -0.00036222
Trial 955:
  Learning Rate: 0.010185736800508126
  Sigma Multiplier: 1.067979760617431
  Initialization Multiplier: 0.6437358495685264
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.92it/s, loss=-0.000366, elapsed time=0.06, total time=9.68]
[I 2025-06-07 15:40:49,907] Trial 955 finished with value: -0.0003656437304779344 and parameters: {'learning_rate': 0.010185736800508126, 'sigma_multiplier': 1.067979760617431, 'num_layers': 2, 'initialization_multiplier': 0.6437358495685264}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 955 final loss: -0.00036564
Trial 956:
  Learning Rate: 0.013262849912806395
  Sigma Multiplier: 0.9739103742164696
  Initialization Multiplier: 0.5218291597253819
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.44it/s, loss=-0.000350, elapsed time=0.04, total time=9.39]
[I 2025-06-07 15:40:59,358] Trial 956 finished with value: -0.00034982327796379264 and parameters: {'learning_rate': 0.013262849912806395, 'sigma_multiplier': 0.9739103742164696, 'num_layers': 2, 'initialization_multiplier': 0.5218291597253819}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 956 final loss: -0.00034982
Trial 957:
  Learning Rate: 0.005666714826798747
  Sigma Multiplier: 1.0114170927651873
  Initialization Multiplier: 0.5671525275599781
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.10it/s, loss=-0.000387, elapsed time=0.06, total time=9.54]
[I 2025-06-07 15:41:09,006] Trial 957 finished with value: -0.00038737956849639355 and parameters: {'learning_rate': 0.005666714826798747, 'sigma_multiplier': 1.0114170927651873, 'num_layers': 2, 'initialization_multiplier': 0.5671525275599781}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 957 final loss: -0.00038738
Trial 958:
  Learning Rate: 0.008714650392563666
  Sigma Multiplier: 1.0498717613368282
  Initialization Multiplier: 0.15865387993426544
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.87it/s, loss=-0.000205, elapsed time=0.05, total time=9.71]
[I 2025-06-07 15:41:18,775] Trial 958 finished with value: -0.00020533207750420552 and parameters: {'learning_rate': 0.008714650392563666, 'sigma_multiplier': 1.0498717613368282, 'num_layers': 2, 'initialization_multiplier': 0.15865387993426544}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 958 final loss: -0.00020533
Trial 959:
  Learning Rate: 0.011162347804194379
  Sigma Multiplier: 1.10712873406219
  Initialization Multiplier: 0.4389303343624797
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 16.92it/s, loss=-0.000354, elapsed time=0.07, total time=9.11]
[I 2025-06-07 15:41:27,951] Trial 959 finished with value: -0.0003544233048280788 and parameters: {'learning_rate': 0.011162347804194379, 'sigma_multiplier': 1.10712873406219, 'num_layers': 2, 'initialization_multiplier': 0.4389303343624797}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 959 final loss: -0.00035442
Trial 960:
  Learning Rate: 0.01483490043218006
  Sigma Multiplier: 0.9255681164399694
  Initialization Multiplier: 0.7071787717452751
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.05it/s, loss=-0.000308, elapsed time=0.06, total time=10.2]
[I 2025-06-07 15:41:38,228] Trial 960 finished with value: -0.0003083092048626354 and parameters: {'learning_rate': 0.01483490043218006, 'sigma_multiplier': 0.9255681164399694, 'num_layers': 2, 'initialization_multiplier': 0.7071787717452751}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 960 final loss: -0.00030831
Trial 961:
  Learning Rate: 0.02333267088050482
  Sigma Multiplier: 1.0158618444392917
  Initialization Multiplier: 0.6149055061884463
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.82it/s, loss=-0.000182, elapsed time=0.05, total time=9.79]
[I 2025-06-07 15:41:48,138] Trial 961 finished with value: -0.00018178461278823237 and parameters: {'learning_rate': 0.02333267088050482, 'sigma_multiplier': 1.0158618444392917, 'num_layers': 2, 'initialization_multiplier': 0.6149055061884463}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 961 final loss: -0.00018178
Trial 962:
  Learning Rate: 0.007585040071803404
  Sigma Multiplier: 1.2028043726547386
  Initialization Multiplier: 0.5182800990875237
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 17.26it/s, loss=-0.000387, elapsed time=0.04, total time=8.98]
[I 2025-06-07 15:41:57,172] Trial 962 finished with value: -0.0003868799282252299 and parameters: {'learning_rate': 0.007585040071803404, 'sigma_multiplier': 1.2028043726547386, 'num_layers': 2, 'initialization_multiplier': 0.5182800990875237}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 962 final loss: -0.00038688
Trial 963:
  Learning Rate: 0.00984393877666644
  Sigma Multiplier: 0.9647488945852623
  Initialization Multiplier: 0.5526901188182229
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.16it/s, loss=-0.000398, elapsed time=0.06, total time=9.58]
[I 2025-06-07 15:42:06,814] Trial 963 finished with value: -0.00039824336090687297 and parameters: {'learning_rate': 0.00984393877666644, 'sigma_multiplier': 0.9647488945852623, 'num_layers': 2, 'initialization_multiplier': 0.5526901188182229}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 963 final loss: -0.00039824
Trial 964:
  Learning Rate: 0.011876307282291193
  Sigma Multiplier: 1.0716532020492702
  Initialization Multiplier: 1.403894940387699
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.19it/s, loss=-0.000212, elapsed time=0.04, total time=9.58]
[I 2025-06-07 15:42:16,459] Trial 964 finished with value: -0.000211691822203972 and parameters: {'learning_rate': 0.011876307282291193, 'sigma_multiplier': 1.0716532020492702, 'num_layers': 2, 'initialization_multiplier': 1.403894940387699}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 964 final loss: -0.00021169
Trial 965:
  Learning Rate: 0.0068967560834984655
  Sigma Multiplier: 0.995627239857176
  Initialization Multiplier: 0.6513062495075763
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.92it/s, loss=-0.000410, elapsed time=0.05, total time=9.74]
[I 2025-06-07 15:42:26,256] Trial 965 finished with value: -0.0004097679286125485 and parameters: {'learning_rate': 0.0068967560834984655, 'sigma_multiplier': 0.995627239857176, 'num_layers': 2, 'initialization_multiplier': 0.6513062495075763}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 965 final loss: -0.00040977
Trial 966:
  Learning Rate: 0.019632148720670765
  Sigma Multiplier: 1.1371146528551124
  Initialization Multiplier: 0.4656951524219086
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 17.02it/s, loss=-0.000368, elapsed time=0.05, total time=9.09]
[I 2025-06-07 15:42:35,405] Trial 966 finished with value: -0.00036778910967494516 and parameters: {'learning_rate': 0.019632148720670765, 'sigma_multiplier': 1.1371146528551124, 'num_layers': 2, 'initialization_multiplier': 0.4656951524219086}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 966 final loss: -0.00036779
Trial 967:
  Learning Rate: 0.008835329162678918
  Sigma Multiplier: 1.0384654030773899
  Initialization Multiplier: 0.5851912891382413
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 16.71it/s, loss=-0.000467, elapsed time=0.04, total time=9.26]
[I 2025-06-07 15:42:44,725] Trial 967 finished with value: -0.0004667630646655456 and parameters: {'learning_rate': 0.008835329162678918, 'sigma_multiplier': 1.0384654030773899, 'num_layers': 2, 'initialization_multiplier': 0.5851912891382413}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 967 final loss: -0.00046676
Trial 968:
  Learning Rate: 0.007683588709335489
  Sigma Multiplier: 0.9591762058823726
  Initialization Multiplier: 0.6101157402277106
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.90it/s, loss=-0.000275, elapsed time=0.06, total time=9.75]
[I 2025-06-07 15:42:54,537] Trial 968 finished with value: -0.00027455577525511976 and parameters: {'learning_rate': 0.007683588709335489, 'sigma_multiplier': 0.9591762058823726, 'num_layers': 2, 'initialization_multiplier': 0.6101157402277106}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 968 final loss: -0.00027456
Trial 969:
  Learning Rate: 0.008662577655818245
  Sigma Multiplier: 1.0325364099997092
  Initialization Multiplier: 0.6656659056653977
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.16it/s, loss=-0.000429, elapsed time=0.04, total time=9.59]
[I 2025-06-07 15:43:04,199] Trial 969 finished with value: -0.00042874007451961806 and parameters: {'learning_rate': 0.008662577655818245, 'sigma_multiplier': 1.0325364099997092, 'num_layers': 2, 'initialization_multiplier': 0.6656659056653977}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 969 final loss: -0.00042874
Trial 970:
  Learning Rate: 0.006465028186559955
  Sigma Multiplier: 0.8965082880163318
  Initialization Multiplier: 0.5903639272403488
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.90it/s, loss=-0.000355, elapsed time=0.05, total time=9.66]
[I 2025-06-07 15:43:13,916] Trial 970 finished with value: -0.0003547006745513932 and parameters: {'learning_rate': 0.006465028186559955, 'sigma_multiplier': 0.8965082880163318, 'num_layers': 2, 'initialization_multiplier': 0.5903639272403488}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 970 final loss: -0.00035470
Trial 971:
  Learning Rate: 0.009007040480783476
  Sigma Multiplier: 0.9858401262236977
  Initialization Multiplier: 0.6368639029493617
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.14it/s, loss=-0.000359, elapsed time=0.06, total time=9.58]
[I 2025-06-07 15:43:23,560] Trial 971 finished with value: -0.0003587925452527878 and parameters: {'learning_rate': 0.009007040480783476, 'sigma_multiplier': 0.9858401262236977, 'num_layers': 2, 'initialization_multiplier': 0.6368639029493617}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 971 final loss: -0.00035879
Trial 972:
  Learning Rate: 0.005600401249061966
  Sigma Multiplier: 0.8117284586606787
  Initialization Multiplier: 0.5792802967024884
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.48it/s, loss=-0.000297, elapsed time=0.06, total time=10]  
[I 2025-06-07 15:43:33,634] Trial 972 finished with value: -0.0002973446536005685 and parameters: {'learning_rate': 0.005600401249061966, 'sigma_multiplier': 0.8117284586606787, 'num_layers': 2, 'initialization_multiplier': 0.5792802967024884}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 972 final loss: -0.00029734
Trial 973:
  Learning Rate: 0.007393278582469441
  Sigma Multiplier: 0.9260875126651358
  Initialization Multiplier: 0.6933715068516902
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.66it/s, loss=-0.000257, elapsed time=0.05, total time=9.85]
[I 2025-06-07 15:43:43,539] Trial 973 finished with value: -0.00025734809984033426 and parameters: {'learning_rate': 0.007393278582469441, 'sigma_multiplier': 0.9260875126651358, 'num_layers': 2, 'initialization_multiplier': 0.6933715068516902}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 973 final loss: -0.00025735
Trial 974:
  Learning Rate: 0.00982336299736804
  Sigma Multiplier: 1.0230224726307975
  Initialization Multiplier: 0.6247303892792383
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.22it/s, loss=-0.000371, elapsed time=0.07, total time=9.49]
[I 2025-06-07 15:43:53,090] Trial 974 finished with value: -0.0003710526272834612 and parameters: {'learning_rate': 0.00982336299736804, 'sigma_multiplier': 1.0230224726307975, 'num_layers': 2, 'initialization_multiplier': 0.6247303892792383}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 974 final loss: -0.00037105
Trial 975:
  Learning Rate: 0.008374151393007245
  Sigma Multiplier: 0.9664992710786987
  Initialization Multiplier: 0.5704909150980324
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.02it/s, loss=-0.000308, elapsed time=0.05, total time=9.66]
[I 2025-06-07 15:44:02,811] Trial 975 finished with value: -0.0003083430137396861 and parameters: {'learning_rate': 0.008374151393007245, 'sigma_multiplier': 0.9664992710786987, 'num_layers': 2, 'initialization_multiplier': 0.5704909150980324}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 975 final loss: -0.00030834
Trial 976:
  Learning Rate: 0.006615148036069185
  Sigma Multiplier: 1.0049041459620354
  Initialization Multiplier: 0.5976948099414148
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.68it/s, loss=-0.000463, elapsed time=0.06, total time=9.91]
[I 2025-06-07 15:44:12,792] Trial 976 finished with value: -0.0004632974191307725 and parameters: {'learning_rate': 0.006615148036069185, 'sigma_multiplier': 1.0049041459620354, 'num_layers': 2, 'initialization_multiplier': 0.5976948099414148}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 976 final loss: -0.00046330
Trial 977:
  Learning Rate: 0.004905889748881154
  Sigma Multiplier: 0.9372448668085999
  Initialization Multiplier: 0.66562842314024
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.83it/s, loss=-0.000346, elapsed time=0.05, total time=9.75]
[I 2025-06-07 15:44:22,603] Trial 977 finished with value: -0.00034625457329405697 and parameters: {'learning_rate': 0.004905889748881154, 'sigma_multiplier': 0.9372448668085999, 'num_layers': 2, 'initialization_multiplier': 0.66562842314024}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 977 final loss: -0.00034625
Trial 978:
  Learning Rate: 0.005402026130885621
  Sigma Multiplier: 0.9929862892631617
  Initialization Multiplier: 0.6169266607304988
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.72it/s, loss=-0.000431, elapsed time=0.07, total time=9.81]
[I 2025-06-07 15:44:32,471] Trial 978 finished with value: -0.0004312095456862956 and parameters: {'learning_rate': 0.005402026130885621, 'sigma_multiplier': 0.9929862892631617, 'num_layers': 2, 'initialization_multiplier': 0.6169266607304988}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 978 final loss: -0.00043121
Trial 979:
  Learning Rate: 0.00612681727760583
  Sigma Multiplier: 0.9677789456812217
  Initialization Multiplier: 0.5585829700908691
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.97it/s, loss=-0.000346, elapsed time=0.05, total time=9.65]
[I 2025-06-07 15:44:42,169] Trial 979 finished with value: -0.0003461382622802457 and parameters: {'learning_rate': 0.00612681727760583, 'sigma_multiplier': 0.9677789456812217, 'num_layers': 2, 'initialization_multiplier': 0.5585829700908691}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 979 final loss: -0.00034614
Trial 980:
  Learning Rate: 0.0059880079563173145
  Sigma Multiplier: 0.9257089347235798
  Initialization Multiplier: 0.637474868309978
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:13<00:00, 10.96it/s, loss=-0.000243, elapsed time=0.08, total time=14]  
[I 2025-06-07 15:44:56,263] Trial 980 finished with value: -0.00024345381618111263 and parameters: {'learning_rate': 0.0059880079563173145, 'sigma_multiplier': 0.9257089347235798, 'num_layers': 2, 'initialization_multiplier': 0.637474868309978}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 980 final loss: -0.00024345
Trial 981:
  Learning Rate: 0.004295028034625137
  Sigma Multiplier: 0.9984686555219818
  Initialization Multiplier: 0.5883155579965132
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.16it/s, loss=-0.000386, elapsed time=0.04, total time=9.57]
[I 2025-06-07 15:45:05,910] Trial 981 finished with value: -0.0003857876241830675 and parameters: {'learning_rate': 0.004295028034625137, 'sigma_multiplier': 0.9984686555219818, 'num_layers': 2, 'initialization_multiplier': 0.5883155579965132}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 981 final loss: -0.00038579
Trial 982:
  Learning Rate: 0.006678590836042641
  Sigma Multiplier: 0.8683355266676767
  Initialization Multiplier: 0.6937049033019124
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.24it/s, loss=-0.000222, elapsed time=0.05, total time=10.1]
[I 2025-06-07 15:45:16,111] Trial 982 finished with value: -0.00022240997948880315 and parameters: {'learning_rate': 0.006678590836042641, 'sigma_multiplier': 0.8683355266676767, 'num_layers': 2, 'initialization_multiplier': 0.6937049033019124}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 982 final loss: -0.00022241
Trial 983:
  Learning Rate: 0.005306362858751222
  Sigma Multiplier: 0.9586590934979102
  Initialization Multiplier: 0.5340397710237585
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.69it/s, loss=-0.000309, elapsed time=0.05, total time=9.82]
[I 2025-06-07 15:45:25,991] Trial 983 finished with value: -0.00030912752068786367 and parameters: {'learning_rate': 0.005306362858751222, 'sigma_multiplier': 0.9586590934979102, 'num_layers': 2, 'initialization_multiplier': 0.5340397710237585}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 983 final loss: -0.00030913
Trial 984:
  Learning Rate: 0.007368721717467708
  Sigma Multiplier: 1.027807810411579
  Initialization Multiplier: 0.6090648266352094
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 17.10it/s, loss=-0.000474, elapsed time=0.05, total time=9]   
[I 2025-06-07 15:45:35,045] Trial 984 finished with value: -0.0004736571262072813 and parameters: {'learning_rate': 0.007368721717467708, 'sigma_multiplier': 1.027807810411579, 'num_layers': 2, 'initialization_multiplier': 0.6090648266352094}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 984 final loss: -0.00047366
Trial 985:
  Learning Rate: 0.006030849509432131
  Sigma Multiplier: 1.0333976070981423
  Initialization Multiplier: 0.6643601243305577
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 17.24it/s, loss=-0.000302, elapsed time=0.06, total time=8.97]
[I 2025-06-07 15:45:44,082] Trial 985 finished with value: -0.0003024229838217218 and parameters: {'learning_rate': 0.006030849509432131, 'sigma_multiplier': 1.0333976070981423, 'num_layers': 2, 'initialization_multiplier': 0.6643601243305577}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 985 final loss: -0.00030242
Trial 986:
  Learning Rate: 0.006836542785672479
  Sigma Multiplier: 1.0398492489134923
  Initialization Multiplier: 0.7414882934138918
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.06it/s, loss=-0.000111, elapsed time=0.06, total time=9.63]
[I 2025-06-07 15:45:53,768] Trial 986 finished with value: -0.0001113720052846334 and parameters: {'learning_rate': 0.006836542785672479, 'sigma_multiplier': 1.0398492489134923, 'num_layers': 2, 'initialization_multiplier': 0.7414882934138918}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 986 final loss: -0.00011137
Trial 987:
  Learning Rate: 0.0049149483539041245
  Sigma Multiplier: 1.0095321074421495
  Initialization Multiplier: 0.632813878128251
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.17it/s, loss=-0.000384, elapsed time=0.07, total time=9.54]
[I 2025-06-07 15:46:03,385] Trial 987 finished with value: -0.00038406111521000565 and parameters: {'learning_rate': 0.0049149483539041245, 'sigma_multiplier': 1.0095321074421495, 'num_layers': 2, 'initialization_multiplier': 0.632813878128251}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 987 final loss: -0.00038406
Trial 988:
  Learning Rate: 0.006317600864127386
  Sigma Multiplier: 1.0473107050096218
  Initialization Multiplier: 0.686961500358222
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.98it/s, loss=-0.000417, elapsed time=0.07, total time=9.66]
[I 2025-06-07 15:46:13,134] Trial 988 finished with value: -0.0004170177566190985 and parameters: {'learning_rate': 0.006317600864127386, 'sigma_multiplier': 1.0473107050096218, 'num_layers': 2, 'initialization_multiplier': 0.686961500358222}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 988 final loss: -0.00041702
Trial 989:
  Learning Rate: 0.00720715090403372
  Sigma Multiplier: 1.0066833323088196
  Initialization Multiplier: 0.723162873222938
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.03it/s, loss=-0.000394, elapsed time=0.05, total time=9.65]
[I 2025-06-07 15:46:22,847] Trial 989 finished with value: -0.0003937622152473024 and parameters: {'learning_rate': 0.00720715090403372, 'sigma_multiplier': 1.0066833323088196, 'num_layers': 2, 'initialization_multiplier': 0.723162873222938}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 989 final loss: -0.00039376
Trial 990:
  Learning Rate: 0.004322611828992662
  Sigma Multiplier: 1.0498607010789582
  Initialization Multiplier: 0.6404768141396155
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.92it/s, loss=-0.000293, elapsed time=0.05, total time=9.7] 
[I 2025-06-07 15:46:32,603] Trial 990 finished with value: -0.0002930881307424643 and parameters: {'learning_rate': 0.004322611828992662, 'sigma_multiplier': 1.0498607010789582, 'num_layers': 2, 'initialization_multiplier': 0.6404768141396155}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 990 final loss: -0.00029309
Trial 991:
  Learning Rate: 0.003138696537243557
  Sigma Multiplier: 0.9990997804502083
  Initialization Multiplier: 0.602711231133788
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 18.11it/s, loss=-0.000325, elapsed time=0.04, total time=8.58]
[I 2025-06-07 15:46:41,248] Trial 991 finished with value: -0.0003246842442475048 and parameters: {'learning_rate': 0.003138696537243557, 'sigma_multiplier': 0.9990997804502083, 'num_layers': 2, 'initialization_multiplier': 0.602711231133788}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 991 final loss: -0.00032468
Trial 992:
  Learning Rate: 0.005583274089707817
  Sigma Multiplier: 0.9543569941129825
  Initialization Multiplier: 1.035056100469835
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 18.73it/s, loss=0.000601, elapsed time=0.04, total time=8.27]
[I 2025-06-07 15:46:49,587] Trial 992 finished with value: 0.0006013461737401668 and parameters: {'learning_rate': 0.005583274089707817, 'sigma_multiplier': 0.9543569941129825, 'num_layers': 2, 'initialization_multiplier': 1.035056100469835}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 992 final loss: 0.00060135
Trial 993:
  Learning Rate: 0.007404304781391532
  Sigma Multiplier: 1.0272130155380497
  Initialization Multiplier: 0.5968185599110258
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 19.12it/s, loss=-0.000366, elapsed time=0.04, total time=8.25]
[I 2025-06-07 15:46:57,945] Trial 993 finished with value: -0.0003664970706170483 and parameters: {'learning_rate': 0.007404304781391532, 'sigma_multiplier': 1.0272130155380497, 'num_layers': 2, 'initialization_multiplier': 0.5968185599110258}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 993 final loss: -0.00036650
Trial 994:
  Learning Rate: 0.006468855759732768
  Sigma Multiplier: 1.0654710628988562
  Initialization Multiplier: 0.658159387836087
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 18.28it/s, loss=-0.000379, elapsed time=0.05, total time=8.64]
[I 2025-06-07 15:47:06,824] Trial 994 finished with value: -0.0003785112473632533 and parameters: {'learning_rate': 0.006468855759732768, 'sigma_multiplier': 1.0654710628988562, 'num_layers': 2, 'initialization_multiplier': 0.658159387836087}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 994 final loss: -0.00037851
Trial 995:
  Learning Rate: 0.005398190925630912
  Sigma Multiplier: 0.9016917897387358
  Initialization Multiplier: 0.6166130228038761
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.46it/s, loss=-0.000269, elapsed time=0.05, total time=10.1]
[I 2025-06-07 15:47:17,122] Trial 995 finished with value: -0.0002694635615636314 and parameters: {'learning_rate': 0.005398190925630912, 'sigma_multiplier': 0.9016917897387358, 'num_layers': 2, 'initialization_multiplier': 0.6166130228038761}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 995 final loss: -0.00026946
Trial 996:
  Learning Rate: 0.007732462385378712
  Sigma Multiplier: 0.9816834432590008
  Initialization Multiplier: 0.5703732296822861
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.41it/s, loss=-0.000278, elapsed time=0.05, total time=9.4] 
[I 2025-06-07 15:47:26,579] Trial 996 finished with value: -0.0002782105569957163 and parameters: {'learning_rate': 0.007732462385378712, 'sigma_multiplier': 0.9816834432590008, 'num_layers': 2, 'initialization_multiplier': 0.5703732296822861}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 996 final loss: -0.00027821
Trial 997:
  Learning Rate: 0.006829369172147538
  Sigma Multiplier: 1.0252400901128047
  Initialization Multiplier: 0.6126278526509031
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 17.04it/s, loss=-0.000382, elapsed time=0.05, total time=9.03]
[I 2025-06-07 15:47:35,668] Trial 997 finished with value: -0.0003821121324590795 and parameters: {'learning_rate': 0.006829369172147538, 'sigma_multiplier': 1.0252400901128047, 'num_layers': 2, 'initialization_multiplier': 0.6126278526509031}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 997 final loss: -0.00038211
Trial 998:
  Learning Rate: 0.0037553398241162773
  Sigma Multiplier: 0.9667225575577277
  Initialization Multiplier: 0.6707339856105733
  Number of Layers: 5


Training Progress: 100%|██████████| 150/150 [00:14<00:00, 10.27it/s, loss=-0.000012, elapsed time=0.1, total time=14.9] 
[I 2025-06-07 15:47:50,607] Trial 998 finished with value: -1.225031571757472e-05 and parameters: {'learning_rate': 0.0037553398241162773, 'sigma_multiplier': 0.9667225575577277, 'num_layers': 5, 'initialization_multiplier': 0.6707339856105733}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 998 final loss: -0.00001225
Trial 999:
  Learning Rate: 0.008082341164931207
  Sigma Multiplier: 1.0567413158765422
  Initialization Multiplier: 0.5627801212154002
  Number of Layers: 2


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 16.54it/s, loss=-0.000476, elapsed time=0.05, total time=9.37]
[I 2025-06-07 15:48:00,041] Trial 999 finished with value: -0.00047559306693846395 and parameters: {'learning_rate': 0.008082341164931207, 'sigma_multiplier': 1.0567413158765422, 'num_layers': 2, 'initialization_multiplier': 0.5627801212154002}. Best is trial 248 with value: -0.00048807768206494604.


Training has not converged after 150 steps
Trial 999 final loss: -0.00047559


In [8]:
best_hyperparams = study.best_params
best_loss_value = study.best_value

print("\nOptimization Finished!")
print(f"Best hyperparameters found: {best_hyperparams}")
print(f"Best loss value: {best_loss_value}")


Optimization Finished!
Best hyperparameters found: {'learning_rate': 0.006115521401940773, 'sigma_multiplier': 1.0372657813112909, 'num_layers': 2, 'initialization_multiplier': 0.6303388864180629}
Best loss value: -0.00048807768206494604


In [9]:
LR = best_hyperparams['learning_rate']
SIGMA_M = best_hyperparams['sigma_multiplier']
NUM_LAYERS = best_hyperparams['num_layers']
INIT_M = best_hyperparams['initialization_multiplier']

In [10]:
def train_on_dataset(dataset=train_ds):
    grid_conn= aachen_connectivity()
    num_qubits = NODES * (NODES - 1) // 2
    gates = efficient_connectivity_gates(grid_conn, num_qubits, NUM_LAYERS)
    
    circuit = iqp.IqpSimulator(num_qubits, gates, device="lightning.qubit")
    
    initial_params = initialize_from_data(gates, dataset) * INIT_M
    loss = iqp.gen_qml.mmd_loss_iqp
    learning_rate = LR
    sigma = median_heuristic(dataset) * SIGMA_M
    
    loss_kwarg = {
        "params": initial_params,
        "iqp_circuit": circuit,
        "ground_truth": dataset,
        "sigma": [sigma],
        "n_ops": 2000,
        "n_samples": 2000,
        "key": jax.random.PRNGKey(42),
    }
    
    trainer = iqp.Trainer("Adam", loss, stepsize=learning_rate)
    trainer.train(n_iters= 2000,loss_kwargs=loss_kwarg, turbo=1)
    
    return trainer.final_params

In [11]:
params = train_on_dataset(train_ds)

Training Progress: 100%|██████████| 2000/2000 [02:15<00:00, 14.73it/s, loss=-0.000344, elapsed time=0.04, total time=136] 

Training has not converged after 2000 steps





In [12]:
import numpy as np
np.save(f'./results/params/params_{NODES}N_{TYPE}_{CONN}_LR{LR}_SIGMA{SIGMA_M}_INIT{INIT_M}_NUMLAYERS{NUM_LAYERS}.npy', params)