# HPO

In [1]:
import iqpopt as iqp
from iqpopt.utils import initialize_from_data, local_gates
import iqpopt.gen_qml as genq
from iqpopt.gen_qml.utils import median_heuristic
import optuna
import pennylane as qml
import jax
from jax import numpy as jnp
from utils.nisq import aachen_connectivity, efficient_connectivity_gates
from datasets.bipartites import BipartiteGraphDataset
from datasets.er import ErdosRenyiGraphDataset
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
NODES = 8
TYPE = "Bipartite"
CONN = "Sparse"
NUM_LAYERS = 1
QUBITS = NODES * (NODES - 1) //2

In [3]:
ds_path = f'./datasets/raw_data/{NODES}N_{TYPE}_{CONN}.pkl'
train_ds = jnp.array(BipartiteGraphDataset(nodes = 1, edge_prob=0.1).from_file(ds_path).vectors.copy())

[Dataset] Loaded 133 samples from ./datasets/raw_data/8N_Bipartite_Sparse.pkl
  Created: 2025-05-30T13:15:11.229435
  Unique graphs: 133
  Version: 1.0


In [4]:
grid_conn = aachen_connectivity()
gates = efficient_connectivity_gates(grid_conn, QUBITS, 1) 
circ = iqp.IqpSimulator(QUBITS, gates, device='lightning.qubit')

base_key = jax.random.PRNGKey(42)

In [5]:
base_sigma = median_heuristic(train_ds)

In [6]:
from utils.hpo import run_hpo

In [7]:
study = run_hpo(
    grid_conn,
    QUBITS,
    base_sigma,
    train_ds = train_ds,
    n_trials = 1000,
    n_iters_hpo = 150,
    n_ops = 2000,
    n_samples = 2000,
)

[I 2025-06-07 22:56:52,028] A new study created in memory with name: no-name-8ec56977-c3c7-4eb1-8011-c117a8581f19


Trial 0:
  Learning Rate: 0.00016447782401758112
  Sigma Multiplier: 1.5576749343064962
  Initialization Multiplier: 0.8999379301424444


Training Progress: 100%|██████████| 150/150 [00:04<00:00, 32.41it/s, loss=0.044775, elapsed time=0.03, total time=5.31]
[I 2025-06-07 22:56:57,626] Trial 0 finished with value: 0.044774692897688824 and parameters: {'learning_rate': 0.00016447782401758112, 'sigma_multiplier': 1.5576749343064962, 'initialization_multiplier': 0.8999379301424444}. Best is trial 0 with value: 0.044774692897688824.


Training has not converged after 150 steps
Trial 0 final loss: 0.04477469
Trial 1:
  Learning Rate: 0.00042400540010504586
  Sigma Multiplier: 0.761427310862141
  Initialization Multiplier: 1.902673145232935


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 27.63it/s, loss=0.105482, elapsed time=0.03, total time=5.56]
[I 2025-06-07 22:57:03,212] Trial 1 finished with value: 0.10548245757799263 and parameters: {'learning_rate': 0.00042400540010504586, 'sigma_multiplier': 0.761427310862141, 'initialization_multiplier': 1.902673145232935}. Best is trial 0 with value: 0.044774692897688824.


Training has not converged after 150 steps
Trial 1 final loss: 0.10548246
Trial 2:
  Learning Rate: 0.00023607694637308308
  Sigma Multiplier: 1.0471033140324388
  Initialization Multiplier: 1.3022147349215836


Training Progress: 100%|██████████| 150/150 [00:04<00:00, 31.24it/s, loss=0.077093, elapsed time=0.03, total time=4.93]
[I 2025-06-07 22:57:08,171] Trial 2 finished with value: 0.07709345353970115 and parameters: {'learning_rate': 0.00023607694637308308, 'sigma_multiplier': 1.0471033140324388, 'initialization_multiplier': 1.3022147349215836}. Best is trial 0 with value: 0.044774692897688824.


Training has not converged after 150 steps
Trial 2 final loss: 0.07709345
Trial 3:
  Learning Rate: 0.0009722997059148208
  Sigma Multiplier: 0.2586856192449258
  Initialization Multiplier: 1.0597542699428621


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.39it/s, loss=0.002984, elapsed time=0.04, total time=6.55]
[I 2025-06-07 22:57:14,742] Trial 3 finished with value: 0.0029837589219832146 and parameters: {'learning_rate': 0.0009722997059148208, 'sigma_multiplier': 0.2586856192449258, 'initialization_multiplier': 1.0597542699428621}. Best is trial 3 with value: 0.0029837589219832146.


Training has not converged after 150 steps
Trial 3 final loss: 0.00298376
Trial 4:
  Learning Rate: 0.08921599069570095
  Sigma Multiplier: 1.8818448711637292
  Initialization Multiplier: 1.4245231951489774


Training Progress: 100%|██████████| 150/150 [00:04<00:00, 35.48it/s, loss=0.025651, elapsed time=0.03, total time=4.36]
[I 2025-06-07 22:57:19,125] Trial 4 finished with value: 0.025650530345498503 and parameters: {'learning_rate': 0.08921599069570095, 'sigma_multiplier': 1.8818448711637292, 'initialization_multiplier': 1.4245231951489774}. Best is trial 3 with value: 0.0029837589219832146.


Training has not converged after 150 steps
Trial 4 final loss: 0.02565053
Trial 5:
  Learning Rate: 0.03638864046780868
  Sigma Multiplier: 1.977750215654497
  Initialization Multiplier: 0.8666017898414853


Training Progress: 100%|██████████| 150/150 [00:04<00:00, 35.99it/s, loss=-0.000827, elapsed time=0.02, total time=4.3] 
[I 2025-06-07 22:57:23,455] Trial 5 finished with value: -0.0008271825917512904 and parameters: {'learning_rate': 0.03638864046780868, 'sigma_multiplier': 1.977750215654497, 'initialization_multiplier': 0.8666017898414853}. Best is trial 5 with value: -0.0008271825917512904.


Training has not converged after 150 steps
Trial 5 final loss: -0.00082718
Trial 6:
  Learning Rate: 0.0007397867648565587
  Sigma Multiplier: 0.7745960259230081
  Initialization Multiplier: 0.24034963677657611


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 28.27it/s, loss=0.010109, elapsed time=0.03, total time=5.44]
[I 2025-06-07 22:57:28,924] Trial 6 finished with value: 0.010109222745816396 and parameters: {'learning_rate': 0.0007397867648565587, 'sigma_multiplier': 0.7745960259230081, 'initialization_multiplier': 0.24034963677657611}. Best is trial 5 with value: -0.0008271825917512904.


Training has not converged after 150 steps
Trial 6 final loss: 0.01010922
Trial 7:
  Learning Rate: 0.00023069114478282236
  Sigma Multiplier: 0.17506738897808966
  Initialization Multiplier: 0.7101795079224068


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.81it/s, loss=0.000193, elapsed time=0.04, total time=6.43] 
[I 2025-06-07 22:57:35,384] Trial 7 finished with value: 0.0001926930011797248 and parameters: {'learning_rate': 0.00023069114478282236, 'sigma_multiplier': 0.17506738897808966, 'initialization_multiplier': 0.7101795079224068}. Best is trial 5 with value: -0.0008271825917512904.


Training has not converged after 150 steps
Trial 7 final loss: 0.00019269
Trial 8:
  Learning Rate: 5.8126976364149976e-05
  Sigma Multiplier: 1.73992662203569
  Initialization Multiplier: 1.4204423054262951


Training Progress: 100%|██████████| 150/150 [00:04<00:00, 35.16it/s, loss=0.037151, elapsed time=0.02, total time=4.41]
[I 2025-06-07 22:57:39,820] Trial 8 finished with value: 0.03715055071087456 and parameters: {'learning_rate': 5.8126976364149976e-05, 'sigma_multiplier': 1.73992662203569, 'initialization_multiplier': 1.4204423054262951}. Best is trial 5 with value: -0.0008271825917512904.


Training has not converged after 150 steps
Trial 8 final loss: 0.03715055
Trial 9:
  Learning Rate: 0.00018130166448989158
  Sigma Multiplier: 0.5699404755599715
  Initialization Multiplier: 1.1510539378793674


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.38it/s, loss=0.083517, elapsed time=0.04, total time=6.04]
[I 2025-06-07 22:57:45,887] Trial 9 finished with value: 0.0835169831918569 and parameters: {'learning_rate': 0.00018130166448989158, 'sigma_multiplier': 0.5699404755599715, 'initialization_multiplier': 1.1510539378793674}. Best is trial 5 with value: -0.0008271825917512904.


Training has not converged after 150 steps
Trial 9 final loss: 0.08351698
Trial 10:
  Learning Rate: 0.022732881169456935
  Sigma Multiplier: 1.3878857027773481
  Initialization Multiplier: 0.4257301411867176


Training Progress: 100%|██████████| 150/150 [00:04<00:00, 33.53it/s, loss=-0.001502, elapsed time=0.03, total time=4.61]
[I 2025-06-07 22:57:50,532] Trial 10 finished with value: -0.0015017211564621782 and parameters: {'learning_rate': 0.022732881169456935, 'sigma_multiplier': 1.3878857027773481, 'initialization_multiplier': 0.4257301411867176}. Best is trial 10 with value: -0.0015017211564621782.


Training has not converged after 150 steps
Trial 10 final loss: -0.00150172
Trial 11:
  Learning Rate: 0.023683500048486662
  Sigma Multiplier: 1.3894006830972794
  Initialization Multiplier: 0.37782344627627396


Training Progress: 100%|██████████| 150/150 [00:04<00:00, 34.03it/s, loss=-0.001426, elapsed time=0.03, total time=4.54]
[I 2025-06-07 22:57:55,104] Trial 11 finished with value: -0.0014264853174868908 and parameters: {'learning_rate': 0.023683500048486662, 'sigma_multiplier': 1.3894006830972794, 'initialization_multiplier': 0.37782344627627396}. Best is trial 10 with value: -0.0015017211564621782.


Training has not converged after 150 steps
Trial 11 final loss: -0.00142649
Trial 12:
  Learning Rate: 0.0076247150075657005
  Sigma Multiplier: 1.4051069064946968
  Initialization Multiplier: 0.07331105943151323


Training Progress: 100%|██████████| 150/150 [00:04<00:00, 33.76it/s, loss=-0.001377, elapsed time=0.03, total time=4.58]
[I 2025-06-07 22:57:59,713] Trial 12 finished with value: -0.0013772923845470268 and parameters: {'learning_rate': 0.0076247150075657005, 'sigma_multiplier': 1.4051069064946968, 'initialization_multiplier': 0.07331105943151323}. Best is trial 10 with value: -0.0015017211564621782.


Training has not converged after 150 steps
Trial 12 final loss: -0.00137729
Trial 13:
  Learning Rate: 0.006103884006960817
  Sigma Multiplier: 1.268537980079308
  Initialization Multiplier: 0.4208185534344365


Training Progress: 100%|██████████| 150/150 [00:04<00:00, 33.01it/s, loss=-0.001684, elapsed time=0.03, total time=4.67]
[I 2025-06-07 22:58:04,419] Trial 13 finished with value: -0.0016842281188082628 and parameters: {'learning_rate': 0.006103884006960817, 'sigma_multiplier': 1.268537980079308, 'initialization_multiplier': 0.4208185534344365}. Best is trial 13 with value: -0.0016842281188082628.


Training has not converged after 150 steps
Trial 13 final loss: -0.00168423
Trial 14:
  Learning Rate: 0.004156387031652671
  Sigma Multiplier: 1.1582087201733615
  Initialization Multiplier: 0.6056438339024942


Training Progress: 100%|██████████| 150/150 [00:04<00:00, 31.88it/s, loss=-0.001787, elapsed time=0.03, total time=4.85]
[I 2025-06-07 22:58:09,299] Trial 14 finished with value: -0.0017868881312023772 and parameters: {'learning_rate': 0.004156387031652671, 'sigma_multiplier': 1.1582087201733615, 'initialization_multiplier': 0.6056438339024942}. Best is trial 14 with value: -0.0017868881312023772.


Training has not converged after 150 steps
Trial 14 final loss: -0.00178689
Trial 15:
  Learning Rate: 0.003529188213072762
  Sigma Multiplier: 1.1177322272451675
  Initialization Multiplier: 0.580404244090612


Training Progress: 100%|██████████| 150/150 [00:04<00:00, 31.20it/s, loss=-0.001887, elapsed time=0.03, total time=4.93]
[I 2025-06-07 22:58:14,265] Trial 15 finished with value: -0.0018873816475501349 and parameters: {'learning_rate': 0.003529188213072762, 'sigma_multiplier': 1.1177322272451675, 'initialization_multiplier': 0.580404244090612}. Best is trial 15 with value: -0.0018873816475501349.


Training has not converged after 150 steps
Trial 15 final loss: -0.00188738
Trial 16:
  Learning Rate: 0.002887897649444538
  Sigma Multiplier: 0.9805091629953959
  Initialization Multiplier: 0.6539216838383932


Training Progress: 100%|██████████| 150/150 [00:04<00:00, 30.13it/s, loss=-0.002131, elapsed time=0.03, total time=5.12]
[I 2025-06-07 22:58:19,414] Trial 16 finished with value: -0.0021314049893293575 and parameters: {'learning_rate': 0.002887897649444538, 'sigma_multiplier': 0.9805091629953959, 'initialization_multiplier': 0.6539216838383932}. Best is trial 16 with value: -0.0021314049893293575.


Training has not converged after 150 steps
Trial 16 final loss: -0.00213140
Trial 17:
  Learning Rate: 0.0027380796154217023
  Sigma Multiplier: 0.9509445020904321
  Initialization Multiplier: 0.5995189401840573


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 29.79it/s, loss=-0.002207, elapsed time=0.03, total time=5.18]
[I 2025-06-07 22:58:24,622] Trial 17 finished with value: -0.0022070685180452356 and parameters: {'learning_rate': 0.0027380796154217023, 'sigma_multiplier': 0.9509445020904321, 'initialization_multiplier': 0.5995189401840573}. Best is trial 17 with value: -0.0022070685180452356.


Training has not converged after 150 steps
Trial 17 final loss: -0.00220707
Trial 18:
  Learning Rate: 0.0020836937613985616
  Sigma Multiplier: 0.8658155781170134
  Initialization Multiplier: 0.016366931223795178


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 28.72it/s, loss=0.003160, elapsed time=0.03, total time=5.36]
[I 2025-06-07 22:58:30,017] Trial 18 finished with value: 0.003159675031603939 and parameters: {'learning_rate': 0.0020836937613985616, 'sigma_multiplier': 0.8658155781170134, 'initialization_multiplier': 0.016366931223795178}. Best is trial 17 with value: -0.0022070685180452356.


Training has not converged after 150 steps
Trial 18 final loss: 0.00315968
Trial 19:
  Learning Rate: 0.0015031987926721717
  Sigma Multiplier: 0.549468950385583
  Initialization Multiplier: 1.738335415774951


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.49it/s, loss=0.077069, elapsed time=0.04, total time=6.26]
[I 2025-06-07 22:58:36,309] Trial 19 finished with value: 0.07706871720471986 and parameters: {'learning_rate': 0.0015031987926721717, 'sigma_multiplier': 0.549468950385583, 'initialization_multiplier': 1.738335415774951}. Best is trial 17 with value: -0.0022070685180452356.


Training has not converged after 150 steps
Trial 19 final loss: 0.07706872
Trial 20:
  Learning Rate: 0.013401961205832624
  Sigma Multiplier: 0.9533626168844295
  Initialization Multiplier: 0.7354233706040657


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 29.73it/s, loss=-0.002432, elapsed time=0.03, total time=5.17]
[I 2025-06-07 22:58:41,512] Trial 20 finished with value: -0.00243243894219657 and parameters: {'learning_rate': 0.013401961205832624, 'sigma_multiplier': 0.9533626168844295, 'initialization_multiplier': 0.7354233706040657}. Best is trial 20 with value: -0.00243243894219657.


Training has not converged after 150 steps
Trial 20 final loss: -0.00243244
Trial 21:
  Learning Rate: 0.011270912875653417
  Sigma Multiplier: 0.9474401345890219
  Initialization Multiplier: 0.7410799823684752


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 29.85it/s, loss=-0.002291, elapsed time=0.03, total time=5.16]
[I 2025-06-07 22:58:46,700] Trial 21 finished with value: -0.002291455872759753 and parameters: {'learning_rate': 0.011270912875653417, 'sigma_multiplier': 0.9474401345890219, 'initialization_multiplier': 0.7410799823684752}. Best is trial 20 with value: -0.00243243894219657.


Training has not converged after 150 steps
Trial 21 final loss: -0.00229146
Trial 22:
  Learning Rate: 0.011829347155081368
  Sigma Multiplier: 0.5897561197937436
  Initialization Multiplier: 0.7777106886593712


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.84it/s, loss=-0.003214, elapsed time=0.04, total time=5.94]
[I 2025-06-07 22:58:52,666] Trial 22 finished with value: -0.003214287404462813 and parameters: {'learning_rate': 0.011829347155081368, 'sigma_multiplier': 0.5897561197937436, 'initialization_multiplier': 0.7777106886593712}. Best is trial 22 with value: -0.003214287404462813.


Training has not converged after 150 steps
Trial 22 final loss: -0.00321429
Trial 23:
  Learning Rate: 0.016601005718047807
  Sigma Multiplier: 0.5493944730256446
  Initialization Multiplier: 0.8480728451825669


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.03it/s, loss=-0.003276, elapsed time=0.04, total time=6.13]
[I 2025-06-07 22:58:58,826] Trial 23 finished with value: -0.0032758510827150637 and parameters: {'learning_rate': 0.016601005718047807, 'sigma_multiplier': 0.5493944730256446, 'initialization_multiplier': 0.8480728451825669}. Best is trial 23 with value: -0.0032758510827150637.


Training has not converged after 150 steps
Trial 23 final loss: -0.00327585
Trial 24:
  Learning Rate: 0.0521665954630128
  Sigma Multiplier: 0.42783730911476847
  Initialization Multiplier: 0.9566170227196759


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.21it/s, loss=-0.002329, elapsed time=0.04, total time=6.32]
[I 2025-06-07 22:59:05,184] Trial 24 finished with value: -0.002328715606917984 and parameters: {'learning_rate': 0.0521665954630128, 'sigma_multiplier': 0.42783730911476847, 'initialization_multiplier': 0.9566170227196759}. Best is trial 23 with value: -0.0032758510827150637.


Training has not converged after 150 steps
Trial 24 final loss: -0.00232872
Trial 25:
  Learning Rate: 0.01382301375599847
  Sigma Multiplier: 0.6315492004289958
  Initialization Multiplier: 1.1096115508774766


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 26.02it/s, loss=0.012049, elapsed time=0.03, total time=5.9] 
[I 2025-06-07 22:59:11,116] Trial 25 finished with value: 0.01204869024052426 and parameters: {'learning_rate': 0.01382301375599847, 'sigma_multiplier': 0.6315492004289958, 'initialization_multiplier': 1.1096115508774766}. Best is trial 23 with value: -0.0032758510827150637.


Training has not converged after 150 steps
Trial 25 final loss: 0.01204869
Trial 26:
  Learning Rate: 0.016381995511402007
  Sigma Multiplier: 0.3806667144711243
  Initialization Multiplier: 0.7969196123811959


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.28it/s, loss=-0.002727, elapsed time=0.04, total time=6.32]
[I 2025-06-07 22:59:17,465] Trial 26 finished with value: -0.002726991439603182 and parameters: {'learning_rate': 0.016381995511402007, 'sigma_multiplier': 0.3806667144711243, 'initialization_multiplier': 0.7969196123811959}. Best is trial 23 with value: -0.0032758510827150637.


Training has not converged after 150 steps
Trial 26 final loss: -0.00272699
Trial 27:
  Learning Rate: 0.08910168085852176
  Sigma Multiplier: 0.3863521790211073
  Initialization Multiplier: 1.2556192271295372


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.76it/s, loss=0.024947, elapsed time=0.04, total time=6.45]
[I 2025-06-07 22:59:23,952] Trial 27 finished with value: 0.0249469051178082 and parameters: {'learning_rate': 0.08910168085852176, 'sigma_multiplier': 0.3863521790211073, 'initialization_multiplier': 1.2556192271295372}. Best is trial 23 with value: -0.0032758510827150637.


Training has not converged after 150 steps
Trial 27 final loss: 0.02494691
Trial 28:
  Learning Rate: 0.035720060557837996
  Sigma Multiplier: 0.3255322393722565
  Initialization Multiplier: 0.8523347571866557


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.37it/s, loss=-0.001059, elapsed time=0.04, total time=6.3] 
[I 2025-06-07 22:59:30,280] Trial 28 finished with value: -0.0010586940529320426 and parameters: {'learning_rate': 0.035720060557837996, 'sigma_multiplier': 0.3255322393722565, 'initialization_multiplier': 0.8523347571866557}. Best is trial 23 with value: -0.0032758510827150637.


Training has not converged after 150 steps
Trial 28 final loss: -0.00105869
Trial 29:
  Learning Rate: 0.007108352266999596
  Sigma Multiplier: 0.16619116379805565
  Initialization Multiplier: 0.950167987380318


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.60it/s, loss=0.000061, elapsed time=0.04, total time=6.49] 
[I 2025-06-07 22:59:36,800] Trial 29 finished with value: 6.131846742925252e-05 and parameters: {'learning_rate': 0.007108352266999596, 'sigma_multiplier': 0.16619116379805565, 'initialization_multiplier': 0.950167987380318}. Best is trial 23 with value: -0.0032758510827150637.


Training has not converged after 150 steps
Trial 29 final loss: 0.00006132
Trial 30:
  Learning Rate: 0.024701292758443724
  Sigma Multiplier: 0.46960624590214906
  Initialization Multiplier: 0.8538688298962374


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.10it/s, loss=-0.003419, elapsed time=0.04, total time=6.11]
[I 2025-06-07 22:59:42,945] Trial 30 finished with value: -0.0034194822797494412 and parameters: {'learning_rate': 0.024701292758443724, 'sigma_multiplier': 0.46960624590214906, 'initialization_multiplier': 0.8538688298962374}. Best is trial 30 with value: -0.0034194822797494412.


Training has not converged after 150 steps
Trial 30 final loss: -0.00341948
Trial 31:
  Learning Rate: 0.021757536033547194
  Sigma Multiplier: 0.4562297664839305
  Initialization Multiplier: 0.8032434888852021


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.08it/s, loss=-0.003193, elapsed time=0.04, total time=6.11]
[I 2025-06-07 22:59:49,089] Trial 31 finished with value: -0.0031928183548810517 and parameters: {'learning_rate': 0.021757536033547194, 'sigma_multiplier': 0.4562297664839305, 'initialization_multiplier': 0.8032434888852021}. Best is trial 30 with value: -0.0034194822797494412.


Training has not converged after 150 steps
Trial 31 final loss: -0.00319282
Trial 32:
  Learning Rate: 0.050239186926498605
  Sigma Multiplier: 0.6697162144063
  Initialization Multiplier: 0.5080818768094586


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 27.36it/s, loss=-0.003414, elapsed time=0.03, total time=5.61]
[I 2025-06-07 22:59:54,732] Trial 32 finished with value: -0.003413936039472234 and parameters: {'learning_rate': 0.050239186926498605, 'sigma_multiplier': 0.6697162144063, 'initialization_multiplier': 0.5080818768094586}. Best is trial 30 with value: -0.0034194822797494412.


Training has not converged after 150 steps
Trial 32 final loss: -0.00341394
Trial 33:
  Learning Rate: 0.05133050288930026
  Sigma Multiplier: 0.7005154112463948
  Initialization Multiplier: 0.50942786666203


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 27.83it/s, loss=-0.003657, elapsed time=0.04, total time=5.51]
[I 2025-06-07 23:00:00,277] Trial 33 finished with value: -0.003657383810985977 and parameters: {'learning_rate': 0.05133050288930026, 'sigma_multiplier': 0.7005154112463948, 'initialization_multiplier': 0.50942786666203}. Best is trial 33 with value: -0.003657383810985977.


Training has not converged after 150 steps
Trial 33 final loss: -0.00365738
Trial 34:
  Learning Rate: 0.056824875514037525
  Sigma Multiplier: 0.7148341262977851
  Initialization Multiplier: 0.2649679080004061


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 27.95it/s, loss=-0.003155, elapsed time=0.03, total time=5.5] 
[I 2025-06-07 23:00:05,813] Trial 34 finished with value: -0.003155065070149876 and parameters: {'learning_rate': 0.056824875514037525, 'sigma_multiplier': 0.7148341262977851, 'initialization_multiplier': 0.2649679080004061}. Best is trial 33 with value: -0.003657383810985977.


Training has not converged after 150 steps
Trial 34 final loss: -0.00315507
Trial 35:
  Learning Rate: 0.04521046897605833
  Sigma Multiplier: 0.49767491000157066
  Initialization Multiplier: 0.49093167204684834


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.45it/s, loss=-0.002926, elapsed time=0.04, total time=6.02]
[I 2025-06-07 23:00:11,867] Trial 35 finished with value: -0.002926265810915008 and parameters: {'learning_rate': 0.04521046897605833, 'sigma_multiplier': 0.49767491000157066, 'initialization_multiplier': 0.49093167204684834}. Best is trial 33 with value: -0.003657383810985977.


Training has not converged after 150 steps
Trial 35 final loss: -0.00292627
Trial 36:
  Learning Rate: 0.09159143429352432
  Sigma Multiplier: 0.6830814036893667
  Initialization Multiplier: 0.25803593190356755


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 27.37it/s, loss=-0.001905, elapsed time=0.03, total time=5.62]
[I 2025-06-07 23:00:17,516] Trial 36 finished with value: -0.0019048052113152113 and parameters: {'learning_rate': 0.09159143429352432, 'sigma_multiplier': 0.6830814036893667, 'initialization_multiplier': 0.25803593190356755}. Best is trial 33 with value: -0.003657383810985977.


Training has not converged after 150 steps
Trial 36 final loss: -0.00190481
Trial 37:
  Learning Rate: 0.05715749116987739
  Sigma Multiplier: 0.8240666768402893
  Initialization Multiplier: 0.5187449010420151


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 29.51it/s, loss=-0.002900, elapsed time=0.03, total time=5.22]
[I 2025-06-07 23:00:22,766] Trial 37 finished with value: -0.0028997916711621255 and parameters: {'learning_rate': 0.05715749116987739, 'sigma_multiplier': 0.8240666768402893, 'initialization_multiplier': 0.5187449010420151}. Best is trial 33 with value: -0.003657383810985977.


Training has not converged after 150 steps
Trial 37 final loss: -0.00289979
Trial 38:
  Learning Rate: 0.030251414623859264
  Sigma Multiplier: 0.29502101334066166
  Initialization Multiplier: 1.0282857002297066


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.90it/s, loss=0.000185, elapsed time=0.04, total time=6.41]
[I 2025-06-07 23:00:29,207] Trial 38 finished with value: 0.00018457633913528343 and parameters: {'learning_rate': 0.030251414623859264, 'sigma_multiplier': 0.29502101334066166, 'initialization_multiplier': 1.0282857002297066}. Best is trial 33 with value: -0.003657383810985977.


Training has not converged after 150 steps
Trial 38 final loss: 0.00018458
Trial 39:
  Learning Rate: 0.0005992937015852355
  Sigma Multiplier: 0.7327478457526697
  Initialization Multiplier: 1.2234851832652989


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 27.90it/s, loss=0.099174, elapsed time=0.03, total time=5.5] 
[I 2025-06-07 23:00:34,739] Trial 39 finished with value: 0.09917388366984539 and parameters: {'learning_rate': 0.0005992937015852355, 'sigma_multiplier': 0.7327478457526697, 'initialization_multiplier': 1.2234851832652989}. Best is trial 33 with value: -0.003657383810985977.


Training has not converged after 150 steps
Trial 39 final loss: 0.09917388
Trial 40:
  Learning Rate: 0.07165996422208029
  Sigma Multiplier: 0.8346339800568795
  Initialization Multiplier: 0.162757911520035


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 29.57it/s, loss=-0.002488, elapsed time=0.03, total time=5.21]
[I 2025-06-07 23:00:39,977] Trial 40 finished with value: -0.002488392002127948 and parameters: {'learning_rate': 0.07165996422208029, 'sigma_multiplier': 0.8346339800568795, 'initialization_multiplier': 0.162757911520035}. Best is trial 33 with value: -0.003657383810985977.


Training has not converged after 150 steps
Trial 40 final loss: -0.00248839
Trial 41:
  Learning Rate: 0.030727814682606092
  Sigma Multiplier: 0.5938506938616938
  Initialization Multiplier: 0.8950949686846763


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 26.26it/s, loss=-0.003605, elapsed time=0.04, total time=5.84]
[I 2025-06-07 23:00:45,853] Trial 41 finished with value: -0.003605005996796896 and parameters: {'learning_rate': 0.030727814682606092, 'sigma_multiplier': 0.5938506938616938, 'initialization_multiplier': 0.8950949686846763}. Best is trial 33 with value: -0.003657383810985977.


Training has not converged after 150 steps
Trial 41 final loss: -0.00360501
Trial 42:
  Learning Rate: 0.032188093699605944
  Sigma Multiplier: 0.6406873450165506
  Initialization Multiplier: 0.8993878970797656


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 27.08it/s, loss=-0.003488, elapsed time=0.03, total time=5.67]
[I 2025-06-07 23:00:51,550] Trial 42 finished with value: -0.0034876623090694223 and parameters: {'learning_rate': 0.032188093699605944, 'sigma_multiplier': 0.6406873450165506, 'initialization_multiplier': 0.8993878970797656}. Best is trial 33 with value: -0.003657383810985977.


Training has not converged after 150 steps
Trial 42 final loss: -0.00348766
Trial 43:
  Learning Rate: 0.034862430814124414
  Sigma Multiplier: 0.6587405247053385
  Initialization Multiplier: 1.4485173267757936


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 26.43it/s, loss=0.102290, elapsed time=0.04, total time=5.91]
[I 2025-06-07 23:00:57,489] Trial 43 finished with value: 0.10228981871593967 and parameters: {'learning_rate': 0.034862430814124414, 'sigma_multiplier': 0.6587405247053385, 'initialization_multiplier': 1.4485173267757936}. Best is trial 33 with value: -0.003657383810985977.


Training has not converged after 150 steps
Trial 43 final loss: 0.10228982
Trial 44:
  Learning Rate: 0.023077441889567448
  Sigma Multiplier: 0.2273520155867288
  Initialization Multiplier: 0.34178291988455267


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.84it/s, loss=-0.000498, elapsed time=0.04, total time=6.43]
[I 2025-06-07 23:01:03,947] Trial 44 finished with value: -0.0004976951838938528 and parameters: {'learning_rate': 0.023077441889567448, 'sigma_multiplier': 0.2273520155867288, 'initialization_multiplier': 0.34178291988455267}. Best is trial 33 with value: -0.003657383810985977.


Training has not converged after 150 steps
Trial 44 final loss: -0.00049770
Trial 45:
  Learning Rate: 0.042162321003310375
  Sigma Multiplier: 0.771070204114995
  Initialization Multiplier: 1.0475842142190208


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 28.11it/s, loss=-0.001739, elapsed time=0.03, total time=5.47]
[I 2025-06-07 23:01:09,447] Trial 45 finished with value: -0.0017389425422096507 and parameters: {'learning_rate': 0.042162321003310375, 'sigma_multiplier': 0.771070204114995, 'initialization_multiplier': 1.0475842142190208}. Best is trial 33 with value: -0.003657383810985977.


Training has not converged after 150 steps
Trial 45 final loss: -0.00173894
Trial 46:
  Learning Rate: 4.457774168099214e-05
  Sigma Multiplier: 0.4946704906061757
  Initialization Multiplier: 0.6713297803139343


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.99it/s, loss=0.045243, elapsed time=0.04, total time=6.13]
[I 2025-06-07 23:01:15,606] Trial 46 finished with value: 0.045242630610076835 and parameters: {'learning_rate': 4.457774168099214e-05, 'sigma_multiplier': 0.4946704906061757, 'initialization_multiplier': 0.6713297803139343}. Best is trial 33 with value: -0.003657383810985977.


Training has not converged after 150 steps
Trial 46 final loss: 0.04524263
Trial 47:
  Learning Rate: 8.944386871583892e-05
  Sigma Multiplier: 0.635325665813526
  Initialization Multiplier: 0.9242604871524723


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 26.45it/s, loss=0.090300, elapsed time=0.04, total time=5.81]
[I 2025-06-07 23:01:21,449] Trial 47 finished with value: 0.09030029206731044 and parameters: {'learning_rate': 8.944386871583892e-05, 'sigma_multiplier': 0.635325665813526, 'initialization_multiplier': 0.9242604871524723}. Best is trial 33 with value: -0.003657383810985977.


Training has not converged after 150 steps
Trial 47 final loss: 0.09030029
Trial 48:
  Learning Rate: 0.028831671364710078
  Sigma Multiplier: 0.8416537528554399
  Initialization Multiplier: 1.3825129949871278


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 28.78it/s, loss=0.096056, elapsed time=0.03, total time=5.34]
[I 2025-06-07 23:01:26,818] Trial 48 finished with value: 0.096055653807267 and parameters: {'learning_rate': 0.028831671364710078, 'sigma_multiplier': 0.8416537528554399, 'initialization_multiplier': 1.3825129949871278}. Best is trial 33 with value: -0.003657383810985977.


Training has not converged after 150 steps
Trial 48 final loss: 0.09605565
Trial 49:
  Learning Rate: 0.008823460030637026
  Sigma Multiplier: 1.6491883110204077
  Initialization Multiplier: 1.1674481860817711


Training Progress: 100%|██████████| 150/150 [00:04<00:00, 32.75it/s, loss=0.029274, elapsed time=0.03, total time=4.72]
[I 2025-06-07 23:01:31,571] Trial 49 finished with value: 0.029273516202962736 and parameters: {'learning_rate': 0.008823460030637026, 'sigma_multiplier': 1.6491883110204077, 'initialization_multiplier': 1.1674481860817711}. Best is trial 33 with value: -0.003657383810985977.


Training has not converged after 150 steps
Trial 49 final loss: 0.02927352
Trial 50:
  Learning Rate: 0.005185559219485216
  Sigma Multiplier: 0.10421394469444328
  Initialization Multiplier: 0.4881724927981342


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.02it/s, loss=-0.000144, elapsed time=0.04, total time=6.37]
[I 2025-06-07 23:01:37,974] Trial 50 finished with value: -0.00014441492160442964 and parameters: {'learning_rate': 0.005185559219485216, 'sigma_multiplier': 0.10421394469444328, 'initialization_multiplier': 0.4881724927981342}. Best is trial 33 with value: -0.003657383810985977.


Training has not converged after 150 steps
Trial 50 final loss: -0.00014441
Trial 51:
  Learning Rate: 0.01729105914836037
  Sigma Multiplier: 0.520465919359461
  Initialization Multiplier: 0.8801105258556899


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.54it/s, loss=-0.003298, elapsed time=0.04, total time=6]   
[I 2025-06-07 23:01:44,010] Trial 51 finished with value: -0.0032981765868275357 and parameters: {'learning_rate': 0.01729105914836037, 'sigma_multiplier': 0.520465919359461, 'initialization_multiplier': 0.8801105258556899}. Best is trial 33 with value: -0.003657383810985977.


Training has not converged after 150 steps
Trial 51 final loss: -0.00329818
Trial 52:
  Learning Rate: 0.01860118869218048
  Sigma Multiplier: 0.5181406482295235
  Initialization Multiplier: 0.9939024651673487


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.25it/s, loss=-0.002640, elapsed time=0.04, total time=6.06]
[I 2025-06-07 23:01:50,106] Trial 52 finished with value: -0.0026400950879231534 and parameters: {'learning_rate': 0.01860118869218048, 'sigma_multiplier': 0.5181406482295235, 'initialization_multiplier': 0.9939024651673487}. Best is trial 33 with value: -0.003657383810985977.


Training has not converged after 150 steps
Trial 52 final loss: -0.00264010
Trial 53:
  Learning Rate: 0.06168376092442265
  Sigma Multiplier: 0.4098346545038747
  Initialization Multiplier: 0.8896439355788709


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.54it/s, loss=-0.001941, elapsed time=0.04, total time=6.24]
[I 2025-06-07 23:01:56,375] Trial 53 finished with value: -0.0019413430297038207 and parameters: {'learning_rate': 0.06168376092442265, 'sigma_multiplier': 0.4098346545038747, 'initialization_multiplier': 0.8896439355788709}. Best is trial 33 with value: -0.003657383810985977.


Training has not converged after 150 steps
Trial 53 final loss: -0.00194134
Trial 54:
  Learning Rate: 0.03268836600574447
  Sigma Multiplier: 0.5893341884255775
  Initialization Multiplier: 0.5522392476378724


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 26.37it/s, loss=-0.003765, elapsed time=0.04, total time=5.82]
[I 2025-06-07 23:02:02,229] Trial 54 finished with value: -0.0037650862695321648 and parameters: {'learning_rate': 0.03268836600574447, 'sigma_multiplier': 0.5893341884255775, 'initialization_multiplier': 0.5522392476378724}. Best is trial 54 with value: -0.0037650862695321648.


Training has not converged after 150 steps
Trial 54 final loss: -0.00376509
Trial 55:
  Learning Rate: 0.02810172812118618
  Sigma Multiplier: 0.6128734934683772
  Initialization Multiplier: 0.33913434235488504


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 26.38it/s, loss=-0.003374, elapsed time=0.04, total time=5.82]
[I 2025-06-07 23:02:08,076] Trial 55 finished with value: -0.003374331996905899 and parameters: {'learning_rate': 0.02810172812118618, 'sigma_multiplier': 0.6128734934683772, 'initialization_multiplier': 0.33913434235488504}. Best is trial 54 with value: -0.0037650862695321648.


Training has not converged after 150 steps
Trial 55 final loss: -0.00337433
Trial 56:
  Learning Rate: 0.07785590598075481
  Sigma Multiplier: 0.6985041465898045
  Initialization Multiplier: 0.6196993324337036


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 27.34it/s, loss=-0.003406, elapsed time=0.03, total time=5.62]
[I 2025-06-07 23:02:13,730] Trial 56 finished with value: -0.0034062091711049183 and parameters: {'learning_rate': 0.07785590598075481, 'sigma_multiplier': 0.6985041465898045, 'initialization_multiplier': 0.6196993324337036}. Best is trial 54 with value: -0.0037650862695321648.


Training has not converged after 150 steps
Trial 56 final loss: -0.00340621
Trial 57:
  Learning Rate: 0.04242786786990854
  Sigma Multiplier: 1.0819935153262763
  Initialization Multiplier: 0.5456517561881706


Training Progress: 100%|██████████| 150/150 [00:04<00:00, 31.14it/s, loss=-0.002219, elapsed time=0.03, total time=4.95]
[I 2025-06-07 23:02:18,711] Trial 57 finished with value: -0.002219333112064074 and parameters: {'learning_rate': 0.04242786786990854, 'sigma_multiplier': 1.0819935153262763, 'initialization_multiplier': 0.5456517561881706}. Best is trial 54 with value: -0.0037650862695321648.


Training has not converged after 150 steps
Trial 57 final loss: -0.00221933
Trial 58:
  Learning Rate: 0.0992103177538078
  Sigma Multiplier: 1.2234948388184566
  Initialization Multiplier: 0.4148955688973924


Training Progress: 100%|██████████| 150/150 [00:04<00:00, 31.86it/s, loss=-0.001655, elapsed time=0.03, total time=4.84]
[I 2025-06-07 23:02:23,582] Trial 58 finished with value: -0.001655371978582289 and parameters: {'learning_rate': 0.0992103177538078, 'sigma_multiplier': 1.2234948388184566, 'initialization_multiplier': 0.4148955688973924}. Best is trial 54 with value: -0.0037650862695321648.


Training has not converged after 150 steps
Trial 58 final loss: -0.00165537
Trial 59:
  Learning Rate: 0.010056845321010534
  Sigma Multiplier: 0.7758517949348809
  Initialization Multiplier: 0.6996673019128877


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 27.97it/s, loss=-0.002660, elapsed time=0.03, total time=5.49]
[I 2025-06-07 23:02:29,103] Trial 59 finished with value: -0.0026600881483821626 and parameters: {'learning_rate': 0.010056845321010534, 'sigma_multiplier': 0.7758517949348809, 'initialization_multiplier': 0.6996673019128877}. Best is trial 54 with value: -0.0037650862695321648.


Training has not converged after 150 steps
Trial 59 final loss: -0.00266009
Trial 60:
  Learning Rate: 0.025709753512714657
  Sigma Multiplier: 0.34525277057308623
  Initialization Multiplier: 1.9890735593826572


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.44it/s, loss=0.018344, elapsed time=0.04, total time=6.53]
[I 2025-06-07 23:02:35,667] Trial 60 finished with value: 0.01834426836932701 and parameters: {'learning_rate': 0.025709753512714657, 'sigma_multiplier': 0.34525277057308623, 'initialization_multiplier': 1.9890735593826572}. Best is trial 54 with value: -0.0037650862695321648.


Training has not converged after 150 steps
Trial 60 final loss: 0.01834427
Trial 61:
  Learning Rate: 0.0763450590429375
  Sigma Multiplier: 0.9065013218562584
  Initialization Multiplier: 0.6267148662889825


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 29.19it/s, loss=-0.002326, elapsed time=0.03, total time=5.28]
[I 2025-06-07 23:02:40,974] Trial 61 finished with value: -0.0023258252012949444 and parameters: {'learning_rate': 0.0763450590429375, 'sigma_multiplier': 0.9065013218562584, 'initialization_multiplier': 0.6267148662889825}. Best is trial 54 with value: -0.0037650862695321648.


Training has not converged after 150 steps
Trial 61 final loss: -0.00232583
Trial 62:
  Learning Rate: 0.0658112987663904
  Sigma Multiplier: 0.686017348541401
  Initialization Multiplier: 0.5720073834988959


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 26.64it/s, loss=-0.003554, elapsed time=0.04, total time=5.76]
[I 2025-06-07 23:02:46,768] Trial 62 finished with value: -0.0035540380356356006 and parameters: {'learning_rate': 0.0658112987663904, 'sigma_multiplier': 0.686017348541401, 'initialization_multiplier': 0.5720073834988959}. Best is trial 54 with value: -0.0037650862695321648.


Training has not converged after 150 steps
Trial 62 final loss: -0.00355404
Trial 63:
  Learning Rate: 0.04596714129124107
  Sigma Multiplier: 0.5942331745317574
  Initialization Multiplier: 0.4511630781170609


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.65it/s, loss=-0.003878, elapsed time=0.04, total time=5.98]
[I 2025-06-07 23:02:52,781] Trial 63 finished with value: -0.0038784944302675314 and parameters: {'learning_rate': 0.04596714129124107, 'sigma_multiplier': 0.5942331745317574, 'initialization_multiplier': 0.4511630781170609}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 63 final loss: -0.00387849
Trial 64:
  Learning Rate: 0.034530819741638394
  Sigma Multiplier: 1.023949146445557
  Initialization Multiplier: 0.17798691164188118


Training Progress: 100%|██████████| 150/150 [00:04<00:00, 30.21it/s, loss=-0.002138, elapsed time=0.03, total time=5.1] 
[I 2025-06-07 23:02:58,034] Trial 64 finished with value: -0.002137543769102134 and parameters: {'learning_rate': 0.034530819741638394, 'sigma_multiplier': 1.023949146445557, 'initialization_multiplier': 0.17798691164188118}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 64 final loss: -0.00213754
Trial 65:
  Learning Rate: 0.06405575769079479
  Sigma Multiplier: 0.5540420206368001
  Initialization Multiplier: 0.4398989088431567


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.93it/s, loss=-0.003015, elapsed time=0.04, total time=6.15]
[I 2025-06-07 23:03:04,218] Trial 65 finished with value: -0.0030145974579513287 and parameters: {'learning_rate': 0.06405575769079479, 'sigma_multiplier': 0.5540420206368001, 'initialization_multiplier': 0.4398989088431567}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 65 final loss: -0.00301460
Trial 66:
  Learning Rate: 0.04264940118860188
  Sigma Multiplier: 0.6014806899690123
  Initialization Multiplier: 0.7435359522908314


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.35it/s, loss=-0.003424, elapsed time=0.04, total time=6.04]
[I 2025-06-07 23:03:10,296] Trial 66 finished with value: -0.0034242937251089244 and parameters: {'learning_rate': 0.04264940118860188, 'sigma_multiplier': 0.6014806899690123, 'initialization_multiplier': 0.7435359522908314}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 66 final loss: -0.00342429
Trial 67:
  Learning Rate: 0.050206616914200954
  Sigma Multiplier: 0.5953214317170712
  Initialization Multiplier: 0.757618527295095


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.13it/s, loss=-0.003798, elapsed time=0.04, total time=6.1] 
[I 2025-06-07 23:03:16,426] Trial 67 finished with value: -0.0037977153382970385 and parameters: {'learning_rate': 0.050206616914200954, 'sigma_multiplier': 0.5953214317170712, 'initialization_multiplier': 0.757618527295095}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 67 final loss: -0.00379772
Trial 68:
  Learning Rate: 0.001174653525771172
  Sigma Multiplier: 0.7767207245050052
  Initialization Multiplier: 0.5536314207461406


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 26.36it/s, loss=-0.001134, elapsed time=0.04, total time=5.82]
[I 2025-06-07 23:03:22,274] Trial 68 finished with value: -0.0011341975083602799 and parameters: {'learning_rate': 0.001174653525771172, 'sigma_multiplier': 0.7767207245050052, 'initialization_multiplier': 0.5536314207461406}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 68 final loss: -0.00113420
Trial 69:
  Learning Rate: 0.013770082373472261
  Sigma Multiplier: 0.5887581902488964
  Initialization Multiplier: 0.6603349220429153


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.08it/s, loss=-0.003134, elapsed time=0.04, total time=6.11]
[I 2025-06-07 23:03:28,421] Trial 69 finished with value: -0.0031342834005782727 and parameters: {'learning_rate': 0.013770082373472261, 'sigma_multiplier': 0.5887581902488964, 'initialization_multiplier': 0.6603349220429153}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 69 final loss: -0.00313428
Trial 70:
  Learning Rate: 0.049081967178305735
  Sigma Multiplier: 0.44176907961444556
  Initialization Multiplier: 0.3181008139839861


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.63it/s, loss=-0.003090, elapsed time=0.04, total time=6.47]
[I 2025-06-07 23:03:34,926] Trial 70 finished with value: -0.0030902821906297817 and parameters: {'learning_rate': 0.049081967178305735, 'sigma_multiplier': 0.44176907961444556, 'initialization_multiplier': 0.3181008139839861}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 70 final loss: -0.00309028
Trial 71:
  Learning Rate: 0.04092509999357284
  Sigma Multiplier: 0.5955615777785078
  Initialization Multiplier: 0.7929536825912727


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 25.00it/s, loss=-0.003831, elapsed time=0.04, total time=6.13]
[I 2025-06-07 23:03:41,086] Trial 71 finished with value: -0.0038310282938575535 and parameters: {'learning_rate': 0.04092509999357284, 'sigma_multiplier': 0.5955615777785078, 'initialization_multiplier': 0.7929536825912727}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 71 final loss: -0.00383103
Trial 72:
  Learning Rate: 0.03637575322931222
  Sigma Multiplier: 0.7257448573814484
  Initialization Multiplier: 0.7433891234347682


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 26.23it/s, loss=-0.003236, elapsed time=0.04, total time=5.86]
[I 2025-06-07 23:03:46,978] Trial 72 finished with value: -0.0032360038864768566 and parameters: {'learning_rate': 0.03637575322931222, 'sigma_multiplier': 0.7257448573814484, 'initialization_multiplier': 0.7433891234347682}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 72 final loss: -0.00323600
Trial 73:
  Learning Rate: 0.06928920923633866
  Sigma Multiplier: 0.6498471999183019
  Initialization Multiplier: 0.8102314404137297


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.31it/s, loss=-0.002906, elapsed time=0.04, total time=6.06]
[I 2025-06-07 23:03:53,068] Trial 73 finished with value: -0.002905548689756263 and parameters: {'learning_rate': 0.06928920923633866, 'sigma_multiplier': 0.6498471999183019, 'initialization_multiplier': 0.8102314404137297}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 73 final loss: -0.00290555
Trial 74:
  Learning Rate: 0.0002950555434658183
  Sigma Multiplier: 1.887464799306128
  Initialization Multiplier: 0.45108098731673346


Training Progress: 100%|██████████| 150/150 [00:04<00:00, 32.01it/s, loss=0.003469, elapsed time=0.03, total time=4.82]
[I 2025-06-07 23:03:57,917] Trial 74 finished with value: 0.0034687740711433573 and parameters: {'learning_rate': 0.0002950555434658183, 'sigma_multiplier': 1.887464799306128, 'initialization_multiplier': 0.45108098731673346}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 74 final loss: 0.00346877
Trial 75:
  Learning Rate: 0.020810403833820493
  Sigma Multiplier: 0.5557021176823386
  Initialization Multiplier: 0.577844009351795


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.47it/s, loss=-0.003564, elapsed time=0.04, total time=6.52]
[I 2025-06-07 23:04:04,475] Trial 75 finished with value: -0.0035641404004902173 and parameters: {'learning_rate': 0.020810403833820493, 'sigma_multiplier': 0.5557021176823386, 'initialization_multiplier': 0.577844009351795}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 75 final loss: -0.00356414
Trial 76:
  Learning Rate: 0.01991377752273208
  Sigma Multiplier: 0.8891399223672038
  Initialization Multiplier: 0.5756605380566711


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 26.54it/s, loss=-0.002599, elapsed time=0.03, total time=5.79]
[I 2025-06-07 23:04:10,298] Trial 76 finished with value: -0.0025985367957324615 and parameters: {'learning_rate': 0.01991377752273208, 'sigma_multiplier': 0.8891399223672038, 'initialization_multiplier': 0.5756605380566711}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 76 final loss: -0.00259854
Trial 77:
  Learning Rate: 0.05374265302280529
  Sigma Multiplier: 0.36588936847339104
  Initialization Multiplier: 0.3767125694968326


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.73it/s, loss=-0.002308, elapsed time=0.04, total time=6.73]
[I 2025-06-07 23:04:17,061] Trial 77 finished with value: -0.0023079516288031687 and parameters: {'learning_rate': 0.05374265302280529, 'sigma_multiplier': 0.36588936847339104, 'initialization_multiplier': 0.3767125694968326}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 77 final loss: -0.00230795
Trial 78:
  Learning Rate: 0.07896706427258822
  Sigma Multiplier: 0.5551156321268117
  Initialization Multiplier: 0.7027272000313883


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.90it/s, loss=-0.003710, elapsed time=0.04, total time=6.41]
[I 2025-06-07 23:04:23,505] Trial 78 finished with value: -0.0037101899368906914 and parameters: {'learning_rate': 0.07896706427258822, 'sigma_multiplier': 0.5551156321268117, 'initialization_multiplier': 0.7027272000313883}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 78 final loss: -0.00371019
Trial 79:
  Learning Rate: 0.041149686367663176
  Sigma Multiplier: 0.27945841007369043
  Initialization Multiplier: 0.6865753595368219


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.04it/s, loss=-0.000883, elapsed time=0.04, total time=6.94]
[I 2025-06-07 23:04:30,479] Trial 79 finished with value: -0.0008830049392639807 and parameters: {'learning_rate': 0.041149686367663176, 'sigma_multiplier': 0.27945841007369043, 'initialization_multiplier': 0.6865753595368219}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 79 final loss: -0.00088300
Trial 80:
  Learning Rate: 0.07899632849827953
  Sigma Multiplier: 0.4698225946500604
  Initialization Multiplier: 0.7608001756182632


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.73it/s, loss=-0.002845, elapsed time=0.04, total time=6.74]
[I 2025-06-07 23:04:37,253] Trial 80 finished with value: -0.002844528609214634 and parameters: {'learning_rate': 0.07899632849827953, 'sigma_multiplier': 0.4698225946500604, 'initialization_multiplier': 0.7608001756182632}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 80 final loss: -0.00284453
Trial 81:
  Learning Rate: 0.05951741420542154
  Sigma Multiplier: 0.5701784969506788
  Initialization Multiplier: 0.6233148634569474


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.72it/s, loss=-0.003448, elapsed time=0.04, total time=6.46]
[I 2025-06-07 23:04:43,747] Trial 81 finished with value: -0.0034477416212832825 and parameters: {'learning_rate': 0.05951741420542154, 'sigma_multiplier': 0.5701784969506788, 'initialization_multiplier': 0.6233148634569474}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 81 final loss: -0.00344774
Trial 82:
  Learning Rate: 0.0898002459107441
  Sigma Multiplier: 0.5534015707330691
  Initialization Multiplier: 0.8215132986652495


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.11it/s, loss=-0.002724, elapsed time=0.04, total time=6.63]
[I 2025-06-07 23:04:50,414] Trial 82 finished with value: -0.0027240308335834246 and parameters: {'learning_rate': 0.0898002459107441, 'sigma_multiplier': 0.5534015707330691, 'initialization_multiplier': 0.8215132986652495}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 82 final loss: -0.00272403
Trial 83:
  Learning Rate: 0.04948643199641188
  Sigma Multiplier: 0.694513773121592
  Initialization Multiplier: 0.48625107727015826


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.67it/s, loss=-0.003346, elapsed time=0.04, total time=6.21]
[I 2025-06-07 23:04:56,660] Trial 83 finished with value: -0.003346035264354552 and parameters: {'learning_rate': 0.04948643199641188, 'sigma_multiplier': 0.694513773121592, 'initialization_multiplier': 0.48625107727015826}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 83 final loss: -0.00334604
Trial 84:
  Learning Rate: 0.028289611433082573
  Sigma Multiplier: 0.8002632893538126
  Initialization Multiplier: 0.5745230510599029


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 26.17it/s, loss=-0.003229, elapsed time=0.04, total time=5.86]
[I 2025-06-07 23:05:02,561] Trial 84 finished with value: -0.003228943193328414 and parameters: {'learning_rate': 0.028289611433082573, 'sigma_multiplier': 0.8002632893538126, 'initialization_multiplier': 0.5745230510599029}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 84 final loss: -0.00322894
Trial 85:
  Learning Rate: 0.09910982915835069
  Sigma Multiplier: 0.4168824509149305
  Initialization Multiplier: 0.7066747396689781


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.58it/s, loss=-0.002603, elapsed time=0.04, total time=6.78]
[I 2025-06-07 23:05:09,373] Trial 85 finished with value: -0.00260285865651806 and parameters: {'learning_rate': 0.09910982915835069, 'sigma_multiplier': 0.4168824509149305, 'initialization_multiplier': 0.7066747396689781}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 85 final loss: -0.00260286
Trial 86:
  Learning Rate: 0.022045911938345774
  Sigma Multiplier: 0.5165573303079648
  Initialization Multiplier: 0.9840783393860091


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.99it/s, loss=-0.003182, elapsed time=0.04, total time=6.66]
[I 2025-06-07 23:05:16,066] Trial 86 finished with value: -0.0031822331808165415 and parameters: {'learning_rate': 0.022045911938345774, 'sigma_multiplier': 0.5165573303079648, 'initialization_multiplier': 0.9840783393860091}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 86 final loss: -0.00318223
Trial 87:
  Learning Rate: 0.039614111326291626
  Sigma Multiplier: 0.6213332649536962
  Initialization Multiplier: 0.5444683390540401


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.69it/s, loss=-0.003707, elapsed time=0.04, total time=6.47]
[I 2025-06-07 23:05:22,567] Trial 87 finished with value: -0.0037067733318676484 and parameters: {'learning_rate': 0.039614111326291626, 'sigma_multiplier': 0.6213332649536962, 'initialization_multiplier': 0.5444683390540401}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 87 final loss: -0.00370677
Trial 88:
  Learning Rate: 0.015592144596838218
  Sigma Multiplier: 0.7356434045343639
  Initialization Multiplier: 0.40195076391382334


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.49it/s, loss=-0.003007, elapsed time=0.04, total time=6.01]
[I 2025-06-07 23:05:28,616] Trial 88 finished with value: -0.0030067485541420175 and parameters: {'learning_rate': 0.015592144596838218, 'sigma_multiplier': 0.7356434045343639, 'initialization_multiplier': 0.40195076391382334}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 88 final loss: -0.00300675
Trial 89:
  Learning Rate: 0.037807207347157853
  Sigma Multiplier: 0.6183541786339892
  Initialization Multiplier: 1.7331542967537175


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.11it/s, loss=0.085129, elapsed time=0.04, total time=6.62]
[I 2025-06-07 23:05:35,273] Trial 89 finished with value: 0.08512930014783834 and parameters: {'learning_rate': 0.037807207347157853, 'sigma_multiplier': 0.6183541786339892, 'initialization_multiplier': 1.7331542967537175}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 89 final loss: 0.08512930
Trial 90:
  Learning Rate: 0.031897459869597314
  Sigma Multiplier: 0.4678630852072841
  Initialization Multiplier: 0.6423799865551244


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.64it/s, loss=-0.003491, elapsed time=0.04, total time=6.75]
[I 2025-06-07 23:05:42,060] Trial 90 finished with value: -0.0034909910588565582 and parameters: {'learning_rate': 0.031897459869597314, 'sigma_multiplier': 0.4678630852072841, 'initialization_multiplier': 0.6423799865551244}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 90 final loss: -0.00349099
Trial 91:
  Learning Rate: 0.06574079117937924
  Sigma Multiplier: 0.6706002003774082
  Initialization Multiplier: 0.5507313333753413


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.31it/s, loss=-0.003584, elapsed time=0.04, total time=6.3] 
[I 2025-06-07 23:05:48,392] Trial 91 finished with value: -0.0035837362816553008 and parameters: {'learning_rate': 0.06574079117937924, 'sigma_multiplier': 0.6706002003774082, 'initialization_multiplier': 0.5507313333753413}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 91 final loss: -0.00358374
Trial 92:
  Learning Rate: 0.049245975829987326
  Sigma Multiplier: 0.574018072615166
  Initialization Multiplier: 0.5233227895908653


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.45it/s, loss=-0.003756, elapsed time=0.04, total time=6.52]
[I 2025-06-07 23:05:54,952] Trial 92 finished with value: -0.003756469801181959 and parameters: {'learning_rate': 0.049245975829987326, 'sigma_multiplier': 0.574018072615166, 'initialization_multiplier': 0.5233227895908653}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 92 final loss: -0.00375647
Trial 93:
  Learning Rate: 0.048290728030034764
  Sigma Multiplier: 0.6563188776247567
  Initialization Multiplier: 0.527853571651873


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.06it/s, loss=-0.003528, elapsed time=0.04, total time=6.37]
[I 2025-06-07 23:06:01,360] Trial 93 finished with value: -0.0035277807695157574 and parameters: {'learning_rate': 0.048290728030034764, 'sigma_multiplier': 0.6563188776247567, 'initialization_multiplier': 0.527853571651873}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 93 final loss: -0.00352778
Trial 94:
  Learning Rate: 0.05671548285060752
  Sigma Multiplier: 0.5990681347024525
  Initialization Multiplier: 0.46023199674921655


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.61it/s, loss=-0.003374, elapsed time=0.04, total time=6.49]
[I 2025-06-07 23:06:07,880] Trial 94 finished with value: -0.003374231424177392 and parameters: {'learning_rate': 0.05671548285060752, 'sigma_multiplier': 0.5990681347024525, 'initialization_multiplier': 0.46023199674921655}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 94 final loss: -0.00337423
Trial 95:
  Learning Rate: 0.08371907745876604
  Sigma Multiplier: 0.511564878647681
  Initialization Multiplier: 0.78434335194909


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.67it/s, loss=-0.002762, elapsed time=0.04, total time=6.75]
[I 2025-06-07 23:06:14,666] Trial 95 finished with value: -0.002761518321081811 and parameters: {'learning_rate': 0.08371907745876604, 'sigma_multiplier': 0.511564878647681, 'initialization_multiplier': 0.78434335194909}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 95 final loss: -0.00276152
Trial 96:
  Learning Rate: 0.06621446401008463
  Sigma Multiplier: 0.734508555490336
  Initialization Multiplier: 0.30868888453415816


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.94it/s, loss=-0.003166, elapsed time=0.04, total time=6.15]
[I 2025-06-07 23:06:20,848] Trial 96 finished with value: -0.003165932305190566 and parameters: {'learning_rate': 0.06621446401008463, 'sigma_multiplier': 0.734508555490336, 'initialization_multiplier': 0.30868888453415816}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 96 final loss: -0.00316593
Trial 97:
  Learning Rate: 0.03931563351160205
  Sigma Multiplier: 1.5152924248401693
  Initialization Multiplier: 0.38335139392522494


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 29.38it/s, loss=-0.001215, elapsed time=0.03, total time=5.24]
[I 2025-06-07 23:06:26,127] Trial 97 finished with value: -0.0012152594400927591 and parameters: {'learning_rate': 0.03931563351160205, 'sigma_multiplier': 1.5152924248401693, 'initialization_multiplier': 0.38335139392522494}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 97 final loss: -0.00121526
Trial 98:
  Learning Rate: 0.024150322660918974
  Sigma Multiplier: 0.40838659914505526
  Initialization Multiplier: 0.5229202507371796


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.14it/s, loss=-0.002361, elapsed time=0.04, total time=6.92]
[I 2025-06-07 23:06:33,084] Trial 98 finished with value: -0.002361410799080644 and parameters: {'learning_rate': 0.024150322660918974, 'sigma_multiplier': 0.40838659914505526, 'initialization_multiplier': 0.5229202507371796}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 98 final loss: -0.00236141
Trial 99:
  Learning Rate: 0.030087624524035243
  Sigma Multiplier: 0.48818499754225164
  Initialization Multiplier: 0.7097463579645493


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.46it/s, loss=-0.003186, elapsed time=0.04, total time=6.82]
[I 2025-06-07 23:06:39,937] Trial 99 finished with value: -0.0031858278067672776 and parameters: {'learning_rate': 0.030087624524035243, 'sigma_multiplier': 0.48818499754225164, 'initialization_multiplier': 0.7097463579645493}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 99 final loss: -0.00318583
Trial 100:
  Learning Rate: 0.046722333933044605
  Sigma Multiplier: 0.6225461353228776
  Initialization Multiplier: 0.6609384062894047


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.14it/s, loss=-0.003699, elapsed time=0.04, total time=6.62]
[I 2025-06-07 23:06:46,593] Trial 100 finished with value: -0.003698965885222954 and parameters: {'learning_rate': 0.046722333933044605, 'sigma_multiplier': 0.6225461353228776, 'initialization_multiplier': 0.6609384062894047}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 100 final loss: -0.00369897
Trial 101:
  Learning Rate: 0.049859047659522704
  Sigma Multiplier: 0.6653684882359315
  Initialization Multiplier: 0.6624442279183023


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.24it/s, loss=-0.003749, elapsed time=0.04, total time=6.6] 
[I 2025-06-07 23:06:53,237] Trial 101 finished with value: -0.003748688302838703 and parameters: {'learning_rate': 0.049859047659522704, 'sigma_multiplier': 0.6653684882359315, 'initialization_multiplier': 0.6624442279183023}. Best is trial 63 with value: -0.0038784944302675314.


Training has not converged after 150 steps
Trial 101 final loss: -0.00374869
Trial 102:
  Learning Rate: 0.04525218789975379
  Sigma Multiplier: 0.6261488416336246
  Initialization Multiplier: 0.6570443469097785


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.55it/s, loss=-0.003891, elapsed time=0.04, total time=6.5] 
[I 2025-06-07 23:06:59,772] Trial 102 finished with value: -0.003891269792786481 and parameters: {'learning_rate': 0.04525218789975379, 'sigma_multiplier': 0.6261488416336246, 'initialization_multiplier': 0.6570443469097785}. Best is trial 102 with value: -0.003891269792786481.


Training has not converged after 150 steps
Trial 102 final loss: -0.00389127
Trial 103:
  Learning Rate: 0.046822632872554316
  Sigma Multiplier: 0.6251006487701033
  Initialization Multiplier: 0.6674456366090311


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.42it/s, loss=-0.004040, elapsed time=0.04, total time=6.54]
[I 2025-06-07 23:07:06,351] Trial 103 finished with value: -0.004039796935804129 and parameters: {'learning_rate': 0.046822632872554316, 'sigma_multiplier': 0.6251006487701033, 'initialization_multiplier': 0.6674456366090311}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 103 final loss: -0.00403980
Trial 104:
  Learning Rate: 0.042799207022512595
  Sigma Multiplier: 0.5638431553929178
  Initialization Multiplier: 0.6486062710594187


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.85it/s, loss=-0.003739, elapsed time=0.04, total time=6.69]
[I 2025-06-07 23:07:13,077] Trial 104 finished with value: -0.0037391202322748767 and parameters: {'learning_rate': 0.042799207022512595, 'sigma_multiplier': 0.5638431553929178, 'initialization_multiplier': 0.6486062710594187}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 104 final loss: -0.00373912
Trial 105:
  Learning Rate: 0.038192447657798195
  Sigma Multiplier: 0.5320013088842838
  Initialization Multiplier: 0.6140241513761188


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.62it/s, loss=-0.003499, elapsed time=0.04, total time=6.76]
[I 2025-06-07 23:07:19,877] Trial 105 finished with value: -0.0034987957796718725 and parameters: {'learning_rate': 0.038192447657798195, 'sigma_multiplier': 0.5320013088842838, 'initialization_multiplier': 0.6140241513761188}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 105 final loss: -0.00349880
Trial 106:
  Learning Rate: 0.07484914538747371
  Sigma Multiplier: 0.3223855201264307
  Initialization Multiplier: 0.721661416358045


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 21.62it/s, loss=-0.001213, elapsed time=0.05, total time=7.07]
[I 2025-06-07 23:07:26,976] Trial 106 finished with value: -0.0012126902270229083 and parameters: {'learning_rate': 0.07484914538747371, 'sigma_multiplier': 0.3223855201264307, 'initialization_multiplier': 0.721661416358045}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 106 final loss: -0.00121269
Trial 107:
  Learning Rate: 0.02560509198637813
  Sigma Multiplier: 0.57811609947753
  Initialization Multiplier: 0.8302939467322711


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.02it/s, loss=-0.003787, elapsed time=0.04, total time=6.64]
[I 2025-06-07 23:07:33,657] Trial 107 finished with value: -0.003786504277709724 and parameters: {'learning_rate': 0.02560509198637813, 'sigma_multiplier': 0.57811609947753, 'initialization_multiplier': 0.8302939467322711}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 107 final loss: -0.00378650
Trial 108:
  Learning Rate: 0.05620143239871242
  Sigma Multiplier: 0.44312891643814123
  Initialization Multiplier: 0.8485917666044503


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.16it/s, loss=-0.003155, elapsed time=0.04, total time=6.9] 
[I 2025-06-07 23:07:40,593] Trial 108 finished with value: -0.0031547532706673416 and parameters: {'learning_rate': 0.05620143239871242, 'sigma_multiplier': 0.44312891643814123, 'initialization_multiplier': 0.8485917666044503}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 108 final loss: -0.00315475
Trial 109:
  Learning Rate: 0.02544115566598887
  Sigma Multiplier: 0.5613842738749789
  Initialization Multiplier: 0.775267004523362


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.02it/s, loss=-0.003627, elapsed time=0.04, total time=6.65]
[I 2025-06-07 23:07:47,277] Trial 109 finished with value: -0.0036272376551157305 and parameters: {'learning_rate': 0.02544115566598887, 'sigma_multiplier': 0.5613842738749789, 'initialization_multiplier': 0.775267004523362}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 109 final loss: -0.00362724
Trial 110:
  Learning Rate: 0.04534277007212712
  Sigma Multiplier: 0.5862825452968553
  Initialization Multiplier: 0.9362039602943923


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.14it/s, loss=-0.003266, elapsed time=0.04, total time=6.61]
[I 2025-06-07 23:07:53,920] Trial 110 finished with value: -0.0032661021021328216 and parameters: {'learning_rate': 0.04534277007212712, 'sigma_multiplier': 0.5862825452968553, 'initialization_multiplier': 0.9362039602943923}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 110 final loss: -0.00326610
Trial 111:
  Learning Rate: 0.034030873925469954
  Sigma Multiplier: 0.638888809956718
  Initialization Multiplier: 0.6692710498909146


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.62it/s, loss=-0.003629, elapsed time=0.04, total time=6.49]
[I 2025-06-07 23:08:00,442] Trial 111 finished with value: -0.0036285001146142744 and parameters: {'learning_rate': 0.034030873925469954, 'sigma_multiplier': 0.638888809956718, 'initialization_multiplier': 0.6692710498909146}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 111 final loss: -0.00362850
Trial 112:
  Learning Rate: 0.03889187053730404
  Sigma Multiplier: 0.7457432724862425
  Initialization Multiplier: 0.8083417207764257


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.06it/s, loss=-0.003382, elapsed time=0.04, total time=6.13]
[I 2025-06-07 23:08:06,606] Trial 112 finished with value: -0.003382125424676492 and parameters: {'learning_rate': 0.03889187053730404, 'sigma_multiplier': 0.7457432724862425, 'initialization_multiplier': 0.8083417207764257}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 112 final loss: -0.00338213
Trial 113:
  Learning Rate: 0.053768061350672934
  Sigma Multiplier: 0.8034752857997536
  Initialization Multiplier: 0.6006776048281087


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.57it/s, loss=-0.002839, elapsed time=0.04, total time=6]   
[I 2025-06-07 23:08:12,640] Trial 113 finished with value: -0.0028389455009062373 and parameters: {'learning_rate': 0.053768061350672934, 'sigma_multiplier': 0.8034752857997536, 'initialization_multiplier': 0.6006776048281087}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 113 final loss: -0.00283895
Trial 114:
  Learning Rate: 0.043614827875223895
  Sigma Multiplier: 0.49411379654053295
  Initialization Multiplier: 0.48243623005411107


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.53it/s, loss=-0.003178, elapsed time=0.04, total time=6.8] 
[I 2025-06-07 23:08:19,481] Trial 114 finished with value: -0.0031782249940132757 and parameters: {'learning_rate': 0.043614827875223895, 'sigma_multiplier': 0.49411379654053295, 'initialization_multiplier': 0.48243623005411107}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 114 final loss: -0.00317822
Trial 115:
  Learning Rate: 0.027342366963177017
  Sigma Multiplier: 0.6906657514336999
  Initialization Multiplier: 0.7320873563667757


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.72it/s, loss=-0.003719, elapsed time=0.04, total time=6.2] 
[I 2025-06-07 23:08:25,713] Trial 115 finished with value: -0.003719234593598112 and parameters: {'learning_rate': 0.027342366963177017, 'sigma_multiplier': 0.6906657514336999, 'initialization_multiplier': 0.7320873563667757}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 115 final loss: -0.00371923
Trial 116:
  Learning Rate: 0.026817527829114257
  Sigma Multiplier: 0.6975489178105826
  Initialization Multiplier: 0.7558928533860373


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.06it/s, loss=-0.003236, elapsed time=0.04, total time=6.36]
[I 2025-06-07 23:08:32,109] Trial 116 finished with value: -0.003235613699599891 and parameters: {'learning_rate': 0.026817527829114257, 'sigma_multiplier': 0.6975489178105826, 'initialization_multiplier': 0.7558928533860373}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 116 final loss: -0.00323561
Trial 117:
  Learning Rate: 0.01936266471180746
  Sigma Multiplier: 0.5436159499989449
  Initialization Multiplier: 0.836232951680477


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.28it/s, loss=-0.003377, elapsed time=0.04, total time=6.58]
[I 2025-06-07 23:08:38,721] Trial 117 finished with value: -0.0033769106856259947 and parameters: {'learning_rate': 0.01936266471180746, 'sigma_multiplier': 0.5436159499989449, 'initialization_multiplier': 0.836232951680477}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 117 final loss: -0.00337691
Trial 118:
  Learning Rate: 0.00010528471045043918
  Sigma Multiplier: 0.659211435052965
  Initialization Multiplier: 0.7095957731513046


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.99it/s, loss=0.071227, elapsed time=0.04, total time=6.38]
[I 2025-06-07 23:08:45,140] Trial 118 finished with value: 0.07122650734725551 and parameters: {'learning_rate': 0.00010528471045043918, 'sigma_multiplier': 0.659211435052965, 'initialization_multiplier': 0.7095957731513046}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 118 final loss: 0.07122651
Trial 119:
  Learning Rate: 0.08423667995899424
  Sigma Multiplier: 0.5712657295131236
  Initialization Multiplier: 1.087315110228929


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.50it/s, loss=0.013862, elapsed time=0.04, total time=6.79]
[I 2025-06-07 23:08:51,969] Trial 119 finished with value: 0.013862364253547482 and parameters: {'learning_rate': 0.08423667995899424, 'sigma_multiplier': 0.5712657295131236, 'initialization_multiplier': 1.087315110228929}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 119 final loss: 0.01386236
Trial 120:
  Learning Rate: 0.0333770069743541
  Sigma Multiplier: 0.4577829845512603
  Initialization Multiplier: 0.6470555692496568


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.62it/s, loss=-0.003282, elapsed time=0.04, total time=6.77]
[I 2025-06-07 23:08:58,782] Trial 120 finished with value: -0.003282408244732297 and parameters: {'learning_rate': 0.0333770069743541, 'sigma_multiplier': 0.4577829845512603, 'initialization_multiplier': 0.6470555692496568}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 120 final loss: -0.00328241
Trial 121:
  Learning Rate: 0.06748244826856245
  Sigma Multiplier: 0.6182945552845659
  Initialization Multiplier: 0.6809333986413666


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.78it/s, loss=-0.003737, elapsed time=0.04, total time=6.44]
[I 2025-06-07 23:09:05,261] Trial 121 finished with value: -0.00373663124044369 and parameters: {'learning_rate': 0.06748244826856245, 'sigma_multiplier': 0.6182945552845659, 'initialization_multiplier': 0.6809333986413666}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 121 final loss: -0.00373663
Trial 122:
  Learning Rate: 0.07124721974680498
  Sigma Multiplier: 0.6047047412157915
  Initialization Multiplier: 0.6840286639404344


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.21it/s, loss=-0.003680, elapsed time=0.04, total time=6.6] 
[I 2025-06-07 23:09:11,897] Trial 122 finished with value: -0.003680076416783305 and parameters: {'learning_rate': 0.07124721974680498, 'sigma_multiplier': 0.6047047412157915, 'initialization_multiplier': 0.6840286639404344}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 122 final loss: -0.00368008
Trial 123:
  Learning Rate: 0.05737523401262752
  Sigma Multiplier: 0.5300836428914837
  Initialization Multiplier: 0.6048240187320573


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.06it/s, loss=-0.003639, elapsed time=0.04, total time=6.66]
[I 2025-06-07 23:09:18,588] Trial 123 finished with value: -0.0036387766936566007 and parameters: {'learning_rate': 0.05737523401262752, 'sigma_multiplier': 0.5300836428914837, 'initialization_multiplier': 0.6048240187320573}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 123 final loss: -0.00363878
Trial 124:
  Learning Rate: 0.046545331572135765
  Sigma Multiplier: 0.6464885854561591
  Initialization Multiplier: 0.7407216326258416


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.33it/s, loss=-0.003742, elapsed time=0.04, total time=6.3] 
[I 2025-06-07 23:09:24,920] Trial 124 finished with value: -0.0037418809527223597 and parameters: {'learning_rate': 0.046545331572135765, 'sigma_multiplier': 0.6464885854561591, 'initialization_multiplier': 0.7407216326258416}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 124 final loss: -0.00374188
Trial 125:
  Learning Rate: 0.04873587164354045
  Sigma Multiplier: 0.6749219488979568
  Initialization Multiplier: 0.7516209820215283


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.51it/s, loss=-0.003450, elapsed time=0.04, total time=6.26]
[I 2025-06-07 23:09:31,211] Trial 125 finished with value: -0.003450356610634296 and parameters: {'learning_rate': 0.04873587164354045, 'sigma_multiplier': 0.6749219488979568, 'initialization_multiplier': 0.7516209820215283}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 125 final loss: -0.00345036
Trial 126:
  Learning Rate: 0.002314266235708288
  Sigma Multiplier: 0.7172167723576124
  Initialization Multiplier: 0.8854437069594987


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.48it/s, loss=-0.001077, elapsed time=0.04, total time=6.27]
[I 2025-06-07 23:09:37,511] Trial 126 finished with value: -0.0010774914356514722 and parameters: {'learning_rate': 0.002314266235708288, 'sigma_multiplier': 0.7172167723576124, 'initialization_multiplier': 0.8854437069594987}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 126 final loss: -0.00107749
Trial 127:
  Learning Rate: 0.03265327990214034
  Sigma Multiplier: 0.7503068220824495
  Initialization Multiplier: 0.7758084165256741


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.37it/s, loss=-0.003241, elapsed time=0.04, total time=6.3] 
[I 2025-06-07 23:09:43,844] Trial 127 finished with value: -0.003241181356270138 and parameters: {'learning_rate': 0.03265327990214034, 'sigma_multiplier': 0.7503068220824495, 'initialization_multiplier': 0.7758084165256741}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 127 final loss: -0.00324118
Trial 128:
  Learning Rate: 0.0005909031063722801
  Sigma Multiplier: 0.6467651144345088
  Initialization Multiplier: 0.6423412506264982


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.26it/s, loss=0.026000, elapsed time=0.04, total time=6.58]
[I 2025-06-07 23:09:50,464] Trial 128 finished with value: 0.02599993102405888 and parameters: {'learning_rate': 0.0005909031063722801, 'sigma_multiplier': 0.6467651144345088, 'initialization_multiplier': 0.6423412506264982}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 128 final loss: 0.02599993
Trial 129:
  Learning Rate: 0.015747410348470783
  Sigma Multiplier: 0.8338513129185624
  Initialization Multiplier: 0.8482495264863193


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 26.14it/s, loss=-0.002797, elapsed time=0.04, total time=5.88]
[I 2025-06-07 23:09:56,381] Trial 129 finished with value: -0.002796848904140161 and parameters: {'learning_rate': 0.015747410348470783, 'sigma_multiplier': 0.8338513129185624, 'initialization_multiplier': 0.8482495264863193}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 129 final loss: -0.00279685
Trial 130:
  Learning Rate: 0.027445835320780067
  Sigma Multiplier: 0.779069396871531
  Initialization Multiplier: 0.7237240602748874


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.40it/s, loss=-0.003125, elapsed time=0.04, total time=6.03]
[I 2025-06-07 23:10:02,451] Trial 130 finished with value: -0.0031250238248218478 and parameters: {'learning_rate': 0.027445835320780067, 'sigma_multiplier': 0.779069396871531, 'initialization_multiplier': 0.7237240602748874}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 130 final loss: -0.00312502
Trial 131:
  Learning Rate: 0.061287494955991575
  Sigma Multiplier: 0.590349660235381
  Initialization Multiplier: 0.6838391294827544


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.28it/s, loss=-0.003857, elapsed time=0.04, total time=6.58]
[I 2025-06-07 23:10:09,067] Trial 131 finished with value: -0.0038569870908222795 and parameters: {'learning_rate': 0.061287494955991575, 'sigma_multiplier': 0.590349660235381, 'initialization_multiplier': 0.6838391294827544}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 131 final loss: -0.00385699
Trial 132:
  Learning Rate: 0.06416844109304153
  Sigma Multiplier: 0.5902877155029712
  Initialization Multiplier: 0.6799271730200996


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.12it/s, loss=-0.003639, elapsed time=0.04, total time=6.63]
[I 2025-06-07 23:10:15,734] Trial 132 finished with value: -0.0036387859566012707 and parameters: {'learning_rate': 0.06416844109304153, 'sigma_multiplier': 0.5902877155029712, 'initialization_multiplier': 0.6799271730200996}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 132 final loss: -0.00363879
Trial 133:
  Learning Rate: 0.047860892680506285
  Sigma Multiplier: 0.695394334153758
  Initialization Multiplier: 0.5957298720945472


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.03it/s, loss=-0.003561, elapsed time=0.04, total time=6.37]
[I 2025-06-07 23:10:22,140] Trial 133 finished with value: -0.0035605788948073545 and parameters: {'learning_rate': 0.047860892680506285, 'sigma_multiplier': 0.695394334153758, 'initialization_multiplier': 0.5957298720945472}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 133 final loss: -0.00356058
Trial 134:
  Learning Rate: 0.058679288078119334
  Sigma Multiplier: 0.6299969228652186
  Initialization Multiplier: 0.7917026898713697


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.61it/s, loss=-0.002788, elapsed time=0.04, total time=6.5] 
[I 2025-06-07 23:10:28,673] Trial 134 finished with value: -0.002787585343182718 and parameters: {'learning_rate': 0.058679288078119334, 'sigma_multiplier': 0.6299969228652186, 'initialization_multiplier': 0.7917026898713697}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 134 final loss: -0.00278759
Trial 135:
  Learning Rate: 0.04090054515199455
  Sigma Multiplier: 0.4964978951922462
  Initialization Multiplier: 0.6353921793821384


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.48it/s, loss=-0.003771, elapsed time=0.04, total time=6.81]
[I 2025-06-07 23:10:35,522] Trial 135 finished with value: -0.003770604161264152 and parameters: {'learning_rate': 0.04090054515199455, 'sigma_multiplier': 0.4964978951922462, 'initialization_multiplier': 0.6353921793821384}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 135 final loss: -0.00377060
Trial 136:
  Learning Rate: 0.04080902979564625
  Sigma Multiplier: 0.39186731637783606
  Initialization Multiplier: 0.5225742793919705


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.04it/s, loss=-0.002325, elapsed time=0.04, total time=6.94]
[I 2025-06-07 23:10:42,497] Trial 136 finished with value: -0.0023245599903183997 and parameters: {'learning_rate': 0.04080902979564625, 'sigma_multiplier': 0.39186731637783606, 'initialization_multiplier': 0.5225742793919705}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 136 final loss: -0.00232456
Trial 137:
  Learning Rate: 0.06946088005811721
  Sigma Multiplier: 0.5025216228280962
  Initialization Multiplier: 0.4491108082270821


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.66it/s, loss=-0.003193, elapsed time=0.04, total time=6.76]
[I 2025-06-07 23:10:49,290] Trial 137 finished with value: -0.003193046461054494 and parameters: {'learning_rate': 0.06946088005811721, 'sigma_multiplier': 0.5025216228280962, 'initialization_multiplier': 0.4491108082270821}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 137 final loss: -0.00319305
Trial 138:
  Learning Rate: 0.05182503017937822
  Sigma Multiplier: 0.5846170553150075
  Initialization Multiplier: 0.6446032112377553


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.36it/s, loss=-0.003550, elapsed time=0.04, total time=6.55]
[I 2025-06-07 23:10:55,877] Trial 138 finished with value: -0.0035495265081711275 and parameters: {'learning_rate': 0.05182503017937822, 'sigma_multiplier': 0.5846170553150075, 'initialization_multiplier': 0.6446032112377553}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 138 final loss: -0.00354953
Trial 139:
  Learning Rate: 0.03470095435365702
  Sigma Multiplier: 0.48071032297224414
  Initialization Multiplier: 0.5572225480317687


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.49it/s, loss=-0.003399, elapsed time=0.04, total time=6.81]
[I 2025-06-07 23:11:02,725] Trial 139 finished with value: -0.0033988772963992045 and parameters: {'learning_rate': 0.03470095435365702, 'sigma_multiplier': 0.48071032297224414, 'initialization_multiplier': 0.5572225480317687}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 139 final loss: -0.00339888
Trial 140:
  Learning Rate: 0.003562170443980114
  Sigma Multiplier: 0.5284772098306731
  Initialization Multiplier: 0.6107579213757608


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.06it/s, loss=-0.002459, elapsed time=0.04, total time=6.63]
[I 2025-06-07 23:11:09,393] Trial 140 finished with value: -0.0024586815697764932 and parameters: {'learning_rate': 0.003562170443980114, 'sigma_multiplier': 0.5284772098306731, 'initialization_multiplier': 0.6107579213757608}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 140 final loss: -0.00245868
Trial 141:
  Learning Rate: 0.022764022808562705
  Sigma Multiplier: 0.621344405730485
  Initialization Multiplier: 0.7377192043830196


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.99it/s, loss=-0.003612, elapsed time=0.04, total time=6.38]
[I 2025-06-07 23:11:15,807] Trial 141 finished with value: -0.0036119856653405976 and parameters: {'learning_rate': 0.022764022808562705, 'sigma_multiplier': 0.621344405730485, 'initialization_multiplier': 0.7377192043830196}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 141 final loss: -0.00361199
Trial 142:
  Learning Rate: 0.04423067880309946
  Sigma Multiplier: 0.5768211108836576
  Initialization Multiplier: 0.6772522210312604


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.55it/s, loss=-0.003656, elapsed time=0.04, total time=6.51]
[I 2025-06-07 23:11:22,347] Trial 142 finished with value: -0.0036557031035029655 and parameters: {'learning_rate': 0.04423067880309946, 'sigma_multiplier': 0.5768211108836576, 'initialization_multiplier': 0.6772522210312604}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 142 final loss: -0.00365570
Trial 143:
  Learning Rate: 0.02855250575224251
  Sigma Multiplier: 0.6528051087954593
  Initialization Multiplier: 0.8159618615047289


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.30it/s, loss=-0.003551, elapsed time=0.04, total time=6.31]
[I 2025-06-07 23:11:28,693] Trial 143 finished with value: -0.003550972368446168 and parameters: {'learning_rate': 0.02855250575224251, 'sigma_multiplier': 0.6528051087954593, 'initialization_multiplier': 0.8159618615047289}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 143 final loss: -0.00355097
Trial 144:
  Learning Rate: 0.037296982126231386
  Sigma Multiplier: 0.6929352955350139
  Initialization Multiplier: 0.7271699016567232


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.78it/s, loss=-0.003449, elapsed time=0.04, total time=6.18]
[I 2025-06-07 23:11:34,905] Trial 144 finished with value: -0.0034485517923821445 and parameters: {'learning_rate': 0.037296982126231386, 'sigma_multiplier': 0.6929352955350139, 'initialization_multiplier': 0.7271699016567232}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 144 final loss: -0.00344855
Trial 145:
  Learning Rate: 0.06063439366149134
  Sigma Multiplier: 1.3408313211297562
  Initialization Multiplier: 0.6353758211084248


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 29.28it/s, loss=-0.001508, elapsed time=0.03, total time=5.26]
[I 2025-06-07 23:11:40,197] Trial 145 finished with value: -0.0015082887868443878 and parameters: {'learning_rate': 0.06063439366149134, 'sigma_multiplier': 1.3408313211297562, 'initialization_multiplier': 0.6353758211084248}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 145 final loss: -0.00150829
Trial 146:
  Learning Rate: 0.04982015209031563
  Sigma Multiplier: 0.5421707718447856
  Initialization Multiplier: 0.5764737398471824


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.16it/s, loss=-0.003579, elapsed time=0.04, total time=6.62]
[I 2025-06-07 23:11:46,850] Trial 146 finished with value: -0.003579246840554639 and parameters: {'learning_rate': 0.04982015209031563, 'sigma_multiplier': 0.5421707718447856, 'initialization_multiplier': 0.5764737398471824}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 146 final loss: -0.00357925
Trial 147:
  Learning Rate: 0.03118769058167163
  Sigma Multiplier: 0.44793146820040464
  Initialization Multiplier: 0.7639076283133015


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.27it/s, loss=-0.003246, elapsed time=0.04, total time=6.87]
[I 2025-06-07 23:11:53,755] Trial 147 finished with value: -0.0032455556689858544 and parameters: {'learning_rate': 0.03118769058167163, 'sigma_multiplier': 0.44793146820040464, 'initialization_multiplier': 0.7639076283133015}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 147 final loss: -0.00324556
Trial 148:
  Learning Rate: 0.09079760068677993
  Sigma Multiplier: 0.6044464303221082
  Initialization Multiplier: 0.6744018836918506


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.51it/s, loss=-0.003535, elapsed time=0.05, total time=6.52]
[I 2025-06-07 23:12:00,308] Trial 148 finished with value: -0.0035348457416133332 and parameters: {'learning_rate': 0.09079760068677993, 'sigma_multiplier': 0.6044464303221082, 'initialization_multiplier': 0.6744018836918506}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 148 final loss: -0.00353485
Trial 149:
  Learning Rate: 0.04140921957973263
  Sigma Multiplier: 0.5062993604613751
  Initialization Multiplier: 0.9129123804726156


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.68it/s, loss=-0.003661, elapsed time=0.04, total time=6.75]
[I 2025-06-07 23:12:07,091] Trial 149 finished with value: -0.003661266423316798 and parameters: {'learning_rate': 0.04140921957973263, 'sigma_multiplier': 0.5062993604613751, 'initialization_multiplier': 0.9129123804726156}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 149 final loss: -0.00366127
Trial 150:
  Learning Rate: 0.07127980624755823
  Sigma Multiplier: 0.6676056721383737
  Initialization Multiplier: 0.4783570904662195


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.80it/s, loss=-0.003636, elapsed time=0.04, total time=6.19]
[I 2025-06-07 23:12:13,576] Trial 150 finished with value: -0.0036356777053930195 and parameters: {'learning_rate': 0.07127980624755823, 'sigma_multiplier': 0.6676056721383737, 'initialization_multiplier': 0.4783570904662195}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 150 final loss: -0.00363568
Trial 151:
  Learning Rate: 0.055039020243816524
  Sigma Multiplier: 0.5528064994720242
  Initialization Multiplier: 0.7026617767881351


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.19it/s, loss=-0.003476, elapsed time=0.04, total time=6.6] 
[I 2025-06-07 23:12:20,213] Trial 151 finished with value: -0.0034762156620271392 and parameters: {'learning_rate': 0.055039020243816524, 'sigma_multiplier': 0.5528064994720242, 'initialization_multiplier': 0.7026617767881351}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 151 final loss: -0.00347622
Trial 152:
  Learning Rate: 0.08153747174960077
  Sigma Multiplier: 0.5765922270476292
  Initialization Multiplier: 0.7958512701117761


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.47it/s, loss=-0.003542, elapsed time=0.04, total time=6.53]
[I 2025-06-07 23:12:26,773] Trial 152 finished with value: -0.0035418541027388544 and parameters: {'learning_rate': 0.08153747174960077, 'sigma_multiplier': 0.5765922270476292, 'initialization_multiplier': 0.7958512701117761}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 152 final loss: -0.00354185
Trial 153:
  Learning Rate: 3.187172407573367e-05
  Sigma Multiplier: 0.6286640970554241
  Initialization Multiplier: 0.7012810339110255


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.84it/s, loss=0.072946, elapsed time=0.04, total time=6.42]
[I 2025-06-07 23:12:33,231] Trial 153 finished with value: 0.07294636593179843 and parameters: {'learning_rate': 3.187172407573367e-05, 'sigma_multiplier': 0.6286640970554241, 'initialization_multiplier': 0.7012810339110255}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 153 final loss: 0.07294637
Trial 154:
  Learning Rate: 0.07625028154869301
  Sigma Multiplier: 0.5358427549546654
  Initialization Multiplier: 0.6232490990954614


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.11it/s, loss=-0.003333, elapsed time=0.04, total time=6.62]
[I 2025-06-07 23:12:39,886] Trial 154 finished with value: -0.0033327433213429596 and parameters: {'learning_rate': 0.07625028154869301, 'sigma_multiplier': 0.5358427549546654, 'initialization_multiplier': 0.6232490990954614}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 154 final loss: -0.00333274
Trial 155:
  Learning Rate: 0.04519529740855186
  Sigma Multiplier: 0.6074922184140314
  Initialization Multiplier: 0.5200183801925878


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.74it/s, loss=-0.003390, elapsed time=0.04, total time=6.46]
[I 2025-06-07 23:12:46,385] Trial 155 finished with value: -0.003390223854596733 and parameters: {'learning_rate': 0.04519529740855186, 'sigma_multiplier': 0.6074922184140314, 'initialization_multiplier': 0.5200183801925878}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 155 final loss: -0.00339022
Trial 156:
  Learning Rate: 0.06216152039345463
  Sigma Multiplier: 0.4741720835533154
  Initialization Multiplier: 0.8600764035386046


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.63it/s, loss=-0.003280, elapsed time=0.04, total time=6.76]
[I 2025-06-07 23:12:53,183] Trial 156 finished with value: -0.00327970330005893 and parameters: {'learning_rate': 0.06216152039345463, 'sigma_multiplier': 0.4741720835533154, 'initialization_multiplier': 0.8600764035386046}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 156 final loss: -0.00327970
Trial 157:
  Learning Rate: 0.034927815101987295
  Sigma Multiplier: 0.7138291327380628
  Initialization Multiplier: 0.7381419361572374


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.85it/s, loss=-0.003031, elapsed time=0.04, total time=6.18]
[I 2025-06-07 23:12:59,392] Trial 157 finished with value: -0.003030889802130787 and parameters: {'learning_rate': 0.034927815101987295, 'sigma_multiplier': 0.7138291327380628, 'initialization_multiplier': 0.7381419361572374}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 157 final loss: -0.00303089
Trial 158:
  Learning Rate: 0.05039927150644832
  Sigma Multiplier: 0.5593456083380464
  Initialization Multiplier: 0.5807912774954517


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.18it/s, loss=-0.003669, elapsed time=0.04, total time=6.61]
[I 2025-06-07 23:13:06,035] Trial 158 finished with value: -0.003668880497647592 and parameters: {'learning_rate': 0.05039927150644832, 'sigma_multiplier': 0.5593456083380464, 'initialization_multiplier': 0.5807912774954517}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 158 final loss: -0.00366888
Trial 159:
  Learning Rate: 0.04219953774517216
  Sigma Multiplier: 0.42284293544906093
  Initialization Multiplier: 0.6493559429356714


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.27it/s, loss=-0.002786, elapsed time=0.04, total time=6.86]
[I 2025-06-07 23:13:12,930] Trial 159 finished with value: -0.0027864472205736436 and parameters: {'learning_rate': 0.04219953774517216, 'sigma_multiplier': 0.42284293544906093, 'initialization_multiplier': 0.6493559429356714}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 159 final loss: -0.00278645
Trial 160:
  Learning Rate: 0.024774140522739335
  Sigma Multiplier: 0.6488606567714692
  Initialization Multiplier: 0.694940458240674


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.04it/s, loss=-0.003746, elapsed time=0.04, total time=6.37]
[I 2025-06-07 23:13:19,335] Trial 160 finished with value: -0.003745634780479938 and parameters: {'learning_rate': 0.024774140522739335, 'sigma_multiplier': 0.6488606567714692, 'initialization_multiplier': 0.694940458240674}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 160 final loss: -0.00374563
Trial 161:
  Learning Rate: 0.023438729637058446
  Sigma Multiplier: 0.6644587483290453
  Initialization Multiplier: 0.6813890067213889


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.22it/s, loss=-0.003606, elapsed time=0.04, total time=6.32]
[I 2025-06-07 23:13:25,691] Trial 161 finished with value: -0.0036058472696915145 and parameters: {'learning_rate': 0.023438729637058446, 'sigma_multiplier': 0.6644587483290453, 'initialization_multiplier': 0.6813890067213889}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 161 final loss: -0.00360585
Trial 162:
  Learning Rate: 0.029074990054698657
  Sigma Multiplier: 0.6344235484011722
  Initialization Multiplier: 0.7627891300241866


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.80it/s, loss=-0.003731, elapsed time=0.04, total time=6.44]
[I 2025-06-07 23:13:32,169] Trial 162 finished with value: -0.0037314795080705854 and parameters: {'learning_rate': 0.029074990054698657, 'sigma_multiplier': 0.6344235484011722, 'initialization_multiplier': 0.7627891300241866}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 162 final loss: -0.00373148
Trial 163:
  Learning Rate: 0.018182704129200932
  Sigma Multiplier: 0.633609324321989
  Initialization Multiplier: 0.8175412359986822


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.83it/s, loss=-0.003234, elapsed time=0.04, total time=6.43]
[I 2025-06-07 23:13:38,635] Trial 163 finished with value: -0.003233982663841925 and parameters: {'learning_rate': 0.018182704129200932, 'sigma_multiplier': 0.633609324321989, 'initialization_multiplier': 0.8175412359986822}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 163 final loss: -0.00323398
Trial 164:
  Learning Rate: 0.026514440117969496
  Sigma Multiplier: 0.7570576717040628
  Initialization Multiplier: 0.7666829861384588


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.24it/s, loss=-0.003360, elapsed time=0.04, total time=6.08]
[I 2025-06-07 23:13:44,753] Trial 164 finished with value: -0.0033598966216268266 and parameters: {'learning_rate': 0.026514440117969496, 'sigma_multiplier': 0.7570576717040628, 'initialization_multiplier': 0.7666829861384588}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 164 final loss: -0.00335990
Trial 165:
  Learning Rate: 0.01144148115379827
  Sigma Multiplier: 0.6939597610629543
  Initialization Multiplier: 0.7325842898598707


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.45it/s, loss=-0.002827, elapsed time=0.04, total time=6.27]
[I 2025-06-07 23:13:51,058] Trial 165 finished with value: -0.0028268573772960385 and parameters: {'learning_rate': 0.01144148115379827, 'sigma_multiplier': 0.6939597610629543, 'initialization_multiplier': 0.7325842898598707}. Best is trial 103 with value: -0.004039796935804129.


Training has not converged after 150 steps
Trial 165 final loss: -0.00282686
Trial 166:
  Learning Rate: 0.03297501232362791
  Sigma Multiplier: 0.5931140851110797
  Initialization Multiplier: 0.6042046367376791


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.33it/s, loss=-0.004106, elapsed time=0.04, total time=6.56]
[I 2025-06-07 23:13:57,657] Trial 166 finished with value: -0.00410615678882426 and parameters: {'learning_rate': 0.03297501232362791, 'sigma_multiplier': 0.5931140851110797, 'initialization_multiplier': 0.6042046367376791}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 166 final loss: -0.00410616
Trial 167:
  Learning Rate: 0.03209155909005825
  Sigma Multiplier: 0.5909452984828143
  Initialization Multiplier: 0.5989323026838708


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.37it/s, loss=-0.003867, elapsed time=0.04, total time=6.56]
[I 2025-06-07 23:14:04,253] Trial 167 finished with value: -0.003866947780741347 and parameters: {'learning_rate': 0.03209155909005825, 'sigma_multiplier': 0.5909452984828143, 'initialization_multiplier': 0.5989323026838708}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 167 final loss: -0.00386695
Trial 168:
  Learning Rate: 0.03525591124972983
  Sigma Multiplier: 0.5914141192027372
  Initialization Multiplier: 0.5523129937083183


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.27it/s, loss=-0.003904, elapsed time=0.04, total time=6.59]
[I 2025-06-07 23:14:10,875] Trial 168 finished with value: -0.0039039379915667685 and parameters: {'learning_rate': 0.03525591124972983, 'sigma_multiplier': 0.5914141192027372, 'initialization_multiplier': 0.5523129937083183}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 168 final loss: -0.00390394
Trial 169:
  Learning Rate: 0.036020011827379544
  Sigma Multiplier: 0.5076138318703936
  Initialization Multiplier: 0.5443291373483438


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.67it/s, loss=-0.003438, elapsed time=0.04, total time=6.75]
[I 2025-06-07 23:14:17,664] Trial 169 finished with value: -0.0034375245799667083 and parameters: {'learning_rate': 0.036020011827379544, 'sigma_multiplier': 0.5076138318703936, 'initialization_multiplier': 0.5443291373483438}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 169 final loss: -0.00343752
Trial 170:
  Learning Rate: 0.032048561523832085
  Sigma Multiplier: 0.5904941332848931
  Initialization Multiplier: 0.5052224572110039


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.38it/s, loss=-0.003568, elapsed time=0.04, total time=6.55]
[I 2025-06-07 23:14:24,246] Trial 170 finished with value: -0.003567691518536008 and parameters: {'learning_rate': 0.032048561523832085, 'sigma_multiplier': 0.5904941332848931, 'initialization_multiplier': 0.5052224572110039}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 170 final loss: -0.00356769
Trial 171:
  Learning Rate: 0.03776897383677352
  Sigma Multiplier: 0.5828058910365321
  Initialization Multiplier: 0.5922637060856768


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.30it/s, loss=-0.003771, elapsed time=0.04, total time=6.58]
[I 2025-06-07 23:14:30,864] Trial 171 finished with value: -0.0037707186142379046 and parameters: {'learning_rate': 0.03776897383677352, 'sigma_multiplier': 0.5828058910365321, 'initialization_multiplier': 0.5922637060856768}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 171 final loss: -0.00377072
Trial 172:
  Learning Rate: 0.038747974365370114
  Sigma Multiplier: 0.5767149989671557
  Initialization Multiplier: 0.5885548190585014


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.19it/s, loss=-0.003730, elapsed time=0.04, total time=6.6] 
[I 2025-06-07 23:14:37,495] Trial 172 finished with value: -0.0037299050647106135 and parameters: {'learning_rate': 0.038747974365370114, 'sigma_multiplier': 0.5767149989671557, 'initialization_multiplier': 0.5885548190585014}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 172 final loss: -0.00372991
Trial 173:
  Learning Rate: 0.021269265742009092
  Sigma Multiplier: 0.5180017754859706
  Initialization Multiplier: 0.612073247520911


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.88it/s, loss=-0.003864, elapsed time=0.04, total time=6.7] 
[I 2025-06-07 23:14:44,226] Trial 173 finished with value: -0.003863506202270552 and parameters: {'learning_rate': 0.021269265742009092, 'sigma_multiplier': 0.5180017754859706, 'initialization_multiplier': 0.612073247520911}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 173 final loss: -0.00386351
Trial 174:
  Learning Rate: 0.02419627565481726
  Sigma Multiplier: 0.524451971897815
  Initialization Multiplier: 0.4221070657131082


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.76it/s, loss=-0.003186, elapsed time=0.04, total time=6.73]
[I 2025-06-07 23:14:50,989] Trial 174 finished with value: -0.0031855728579974815 and parameters: {'learning_rate': 0.02419627565481726, 'sigma_multiplier': 0.524451971897815, 'initialization_multiplier': 0.4221070657131082}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 174 final loss: -0.00318557
Trial 175:
  Learning Rate: 0.021057300804482128
  Sigma Multiplier: 0.4703686081295309
  Initialization Multiplier: 0.5417466807244407


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.46it/s, loss=-0.003599, elapsed time=0.04, total time=6.82]
[I 2025-06-07 23:14:57,842] Trial 175 finished with value: -0.0035989278461530903 and parameters: {'learning_rate': 0.021057300804482128, 'sigma_multiplier': 0.4703686081295309, 'initialization_multiplier': 0.5417466807244407}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 175 final loss: -0.00359893
Trial 176:
  Learning Rate: 0.0358145593669602
  Sigma Multiplier: 0.6037686904390429
  Initialization Multiplier: 0.6075812767057595


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.52it/s, loss=-0.003865, elapsed time=0.04, total time=6.51]
[I 2025-06-07 23:15:04,390] Trial 176 finished with value: -0.003865295306250588 and parameters: {'learning_rate': 0.0358145593669602, 'sigma_multiplier': 0.6037686904390429, 'initialization_multiplier': 0.6075812767057595}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 176 final loss: -0.00386530
Trial 177:
  Learning Rate: 0.031056782204887026
  Sigma Multiplier: 0.5436133685225306
  Initialization Multiplier: 0.5997093075448775


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.98it/s, loss=-0.003638, elapsed time=0.04, total time=6.67]
[I 2025-06-07 23:15:11,093] Trial 177 finished with value: -0.0036378134674315277 and parameters: {'learning_rate': 0.031056782204887026, 'sigma_multiplier': 0.5436133685225306, 'initialization_multiplier': 0.5997093075448775}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 177 final loss: -0.00363781
Trial 178:
  Learning Rate: 0.020834308051843192
  Sigma Multiplier: 0.5865822218678298
  Initialization Multiplier: 0.49774451806023123


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.34it/s, loss=-0.003382, elapsed time=0.04, total time=6.56]
[I 2025-06-07 23:15:17,687] Trial 178 finished with value: -0.003381910139869302 and parameters: {'learning_rate': 0.020834308051843192, 'sigma_multiplier': 0.5865822218678298, 'initialization_multiplier': 0.49774451806023123}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 178 final loss: -0.00338191
Trial 179:
  Learning Rate: 0.03538208809901813
  Sigma Multiplier: 0.4960695500021133
  Initialization Multiplier: 0.5663136599228644


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.71it/s, loss=-0.003859, elapsed time=0.04, total time=6.73]
[I 2025-06-07 23:15:24,457] Trial 179 finished with value: -0.003858593256368353 and parameters: {'learning_rate': 0.03538208809901813, 'sigma_multiplier': 0.4960695500021133, 'initialization_multiplier': 0.5663136599228644}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 179 final loss: -0.00385859
Trial 180:
  Learning Rate: 0.0015409096226432842
  Sigma Multiplier: 0.36970380422754834
  Initialization Multiplier: 0.450702001625621


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 21.67it/s, loss=-0.000771, elapsed time=0.04, total time=7.06]
[I 2025-06-07 23:15:31,553] Trial 180 finished with value: -0.0007706052703205718 and parameters: {'learning_rate': 0.0015409096226432842, 'sigma_multiplier': 0.36970380422754834, 'initialization_multiplier': 0.450702001625621}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 180 final loss: -0.00077061
Trial 181:
  Learning Rate: 0.035629198253623554
  Sigma Multiplier: 0.5991114979054556
  Initialization Multiplier: 0.5625278592529921


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.22it/s, loss=-0.003749, elapsed time=0.04, total time=6.59]
[I 2025-06-07 23:15:38,179] Trial 181 finished with value: -0.0037486509994157802 and parameters: {'learning_rate': 0.035629198253623554, 'sigma_multiplier': 0.5991114979054556, 'initialization_multiplier': 0.5625278592529921}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 181 final loss: -0.00374865
Trial 182:
  Learning Rate: 0.03572591450993134
  Sigma Multiplier: 0.5016282657477144
  Initialization Multiplier: 0.5736655265324859


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.19it/s, loss=-0.003515, elapsed time=0.04, total time=6.89]
[I 2025-06-07 23:15:45,108] Trial 182 finished with value: -0.0035151428406081414 and parameters: {'learning_rate': 0.03572591450993134, 'sigma_multiplier': 0.5016282657477144, 'initialization_multiplier': 0.5736655265324859}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 182 final loss: -0.00351514
Trial 183:
  Learning Rate: 0.038500167603491633
  Sigma Multiplier: 0.43810660754888264
  Initialization Multiplier: 0.5451466873367379


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 21.98it/s, loss=-0.002963, elapsed time=0.04, total time=6.95]
[I 2025-06-07 23:15:52,095] Trial 183 finished with value: -0.002963289515808719 and parameters: {'learning_rate': 0.038500167603491633, 'sigma_multiplier': 0.43810660754888264, 'initialization_multiplier': 0.5451466873367379}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 183 final loss: -0.00296329
Trial 184:
  Learning Rate: 0.03111442109251926
  Sigma Multiplier: 0.5242630955891483
  Initialization Multiplier: 0.6177479378437207


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.76it/s, loss=-0.003668, elapsed time=0.04, total time=6.73]
[I 2025-06-07 23:15:58,863] Trial 184 finished with value: -0.003667904487233279 and parameters: {'learning_rate': 0.03111442109251926, 'sigma_multiplier': 0.5242630955891483, 'initialization_multiplier': 0.6177479378437207}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 184 final loss: -0.00366790
Trial 185:
  Learning Rate: 0.053071776991199006
  Sigma Multiplier: 0.6006487169885852
  Initialization Multiplier: 0.49258672385477986


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.35it/s, loss=-0.003477, elapsed time=0.04, total time=6.56]
[I 2025-06-07 23:16:05,456] Trial 185 finished with value: -0.0034769474475513148 and parameters: {'learning_rate': 0.053071776991199006, 'sigma_multiplier': 0.6006487169885852, 'initialization_multiplier': 0.49258672385477986}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 185 final loss: -0.00347695
Trial 186:
  Learning Rate: 0.04573874164194788
  Sigma Multiplier: 0.5672648051721529
  Initialization Multiplier: 0.566563443761534


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.14it/s, loss=-0.003525, elapsed time=0.04, total time=6.62]
[I 2025-06-07 23:16:12,110] Trial 186 finished with value: -0.0035250264509515523 and parameters: {'learning_rate': 0.04573874164194788, 'sigma_multiplier': 0.5672648051721529, 'initialization_multiplier': 0.566563443761534}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 186 final loss: -0.00352503
Trial 187:
  Learning Rate: 0.03402540659961842
  Sigma Multiplier: 0.4817074955873053
  Initialization Multiplier: 0.6133302743946605


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.36it/s, loss=-0.003635, elapsed time=0.04, total time=6.83]
[I 2025-06-07 23:16:18,979] Trial 187 finished with value: -0.0036345742352883246 and parameters: {'learning_rate': 0.03402540659961842, 'sigma_multiplier': 0.4817074955873053, 'initialization_multiplier': 0.6133302743946605}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 187 final loss: -0.00363457
Trial 188:
  Learning Rate: 0.041147694522087344
  Sigma Multiplier: 0.5481485081420782
  Initialization Multiplier: 0.520414340845162


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.29it/s, loss=-0.003897, elapsed time=0.04, total time=6.58]
[I 2025-06-07 23:16:25,863] Trial 188 finished with value: -0.003896680071892787 and parameters: {'learning_rate': 0.041147694522087344, 'sigma_multiplier': 0.5481485081420782, 'initialization_multiplier': 0.520414340845162}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 188 final loss: -0.00389668
Trial 189:
  Learning Rate: 0.04270789475748173
  Sigma Multiplier: 0.5586583508195156
  Initialization Multiplier: 0.4721833849455486


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.78it/s, loss=-0.003473, elapsed time=0.04, total time=6.74]
[I 2025-06-07 23:16:32,634] Trial 189 finished with value: -0.0034729142220178916 and parameters: {'learning_rate': 0.04270789475748173, 'sigma_multiplier': 0.5586583508195156, 'initialization_multiplier': 0.4721833849455486}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 189 final loss: -0.00347291
Trial 190:
  Learning Rate: 0.057306551022441754
  Sigma Multiplier: 0.5207717636528267
  Initialization Multiplier: 0.39838547164768773


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.53it/s, loss=-0.003165, elapsed time=0.04, total time=6.79]
[I 2025-06-07 23:16:39,457] Trial 190 finished with value: -0.003165086084322252 and parameters: {'learning_rate': 0.057306551022441754, 'sigma_multiplier': 0.5207717636528267, 'initialization_multiplier': 0.39838547164768773}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 190 final loss: -0.00316509
Trial 191:
  Learning Rate: 0.038494393725077486
  Sigma Multiplier: 0.6150816001488218
  Initialization Multiplier: 0.5397114698680939


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.58it/s, loss=-0.003688, elapsed time=0.04, total time=6.5] 
[I 2025-06-07 23:16:45,993] Trial 191 finished with value: -0.0036881036990408216 and parameters: {'learning_rate': 0.038494393725077486, 'sigma_multiplier': 0.6150816001488218, 'initialization_multiplier': 0.5397114698680939}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 191 final loss: -0.00368810
Trial 192:
  Learning Rate: 0.028761230917113374
  Sigma Multiplier: 0.6069548759336296
  Initialization Multiplier: 0.6190721935107785


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.52it/s, loss=-0.003554, elapsed time=0.04, total time=6.52]
[I 2025-06-07 23:16:52,543] Trial 192 finished with value: -0.003554106564481285 and parameters: {'learning_rate': 0.028761230917113374, 'sigma_multiplier': 0.6069548759336296, 'initialization_multiplier': 0.6190721935107785}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 192 final loss: -0.00355411
Trial 193:
  Learning Rate: 0.04783076768099698
  Sigma Multiplier: 0.5435069071888925
  Initialization Multiplier: 0.5182292331343907


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.81it/s, loss=-0.003205, elapsed time=0.04, total time=6.71]
[I 2025-06-07 23:16:59,288] Trial 193 finished with value: -0.0032049803118878303 and parameters: {'learning_rate': 0.04783076768099698, 'sigma_multiplier': 0.5435069071888925, 'initialization_multiplier': 0.5182292331343907}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 193 final loss: -0.00320498
Trial 194:
  Learning Rate: 0.0342119598510611
  Sigma Multiplier: 0.4946541343124171
  Initialization Multiplier: 0.5797460273845555


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.26it/s, loss=-0.003778, elapsed time=0.04, total time=6.87]
[I 2025-06-07 23:17:06,195] Trial 194 finished with value: -0.0037779271423145815 and parameters: {'learning_rate': 0.0342119598510611, 'sigma_multiplier': 0.4946541343124171, 'initialization_multiplier': 0.5797460273845555}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 194 final loss: -0.00377793
Trial 195:
  Learning Rate: 0.029939708035467274
  Sigma Multiplier: 0.48465892612352435
  Initialization Multiplier: 0.6382331097209497


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.25it/s, loss=-0.003618, elapsed time=0.04, total time=6.88]
[I 2025-06-07 23:17:13,108] Trial 195 finished with value: -0.003617663830347336 and parameters: {'learning_rate': 0.029939708035467274, 'sigma_multiplier': 0.48465892612352435, 'initialization_multiplier': 0.6382331097209497}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 195 final loss: -0.00361766
Trial 196:
  Learning Rate: 0.04183580418629313
  Sigma Multiplier: 0.44760089353007854
  Initialization Multiplier: 0.5855964993127377


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 21.93it/s, loss=-0.003567, elapsed time=0.04, total time=6.98]
[I 2025-06-07 23:17:20,123] Trial 196 finished with value: -0.0035665110209818895 and parameters: {'learning_rate': 0.04183580418629313, 'sigma_multiplier': 0.44760089353007854, 'initialization_multiplier': 0.5855964993127377}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 196 final loss: -0.00356651
Trial 197:
  Learning Rate: 0.0048884355759013995
  Sigma Multiplier: 0.40340197414126455
  Initialization Multiplier: 0.44360507248226055


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 21.89it/s, loss=-0.001892, elapsed time=0.04, total time=6.98]
[I 2025-06-07 23:17:27,141] Trial 197 finished with value: -0.0018922728853230447 and parameters: {'learning_rate': 0.0048884355759013995, 'sigma_multiplier': 0.40340197414126455, 'initialization_multiplier': 0.44360507248226055}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 197 final loss: -0.00189227
Trial 198:
  Learning Rate: 0.05419659750513617
  Sigma Multiplier: 0.5191690736469976
  Initialization Multiplier: 0.6565721659120629


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.78it/s, loss=-0.003197, elapsed time=0.04, total time=6.72]
[I 2025-06-07 23:17:33,893] Trial 198 finished with value: -0.003196774774751007 and parameters: {'learning_rate': 0.05419659750513617, 'sigma_multiplier': 0.5191690736469976, 'initialization_multiplier': 0.6565721659120629}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 198 final loss: -0.00319677
Trial 199:
  Learning Rate: 0.026389935919107318
  Sigma Multiplier: 0.5673192089220598
  Initialization Multiplier: 0.5091631622618519


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.97it/s, loss=-0.003747, elapsed time=0.04, total time=6.67]
[I 2025-06-07 23:17:40,594] Trial 199 finished with value: -0.0037473332447791655 and parameters: {'learning_rate': 0.026389935919107318, 'sigma_multiplier': 0.5673192089220598, 'initialization_multiplier': 0.5091631622618519}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 199 final loss: -0.00374733
Trial 200:
  Learning Rate: 0.0338755779419271
  Sigma Multiplier: 0.6525366425970767
  Initialization Multiplier: 0.5938600293099467


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.92it/s, loss=-0.003648, elapsed time=0.04, total time=6.41]
[I 2025-06-07 23:17:47,041] Trial 200 finished with value: -0.003648484476164283 and parameters: {'learning_rate': 0.0338755779419271, 'sigma_multiplier': 0.6525366425970767, 'initialization_multiplier': 0.5938600293099467}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 200 final loss: -0.00364848
Trial 201:
  Learning Rate: 0.03989348716323849
  Sigma Multiplier: 0.5984334749872681
  Initialization Multiplier: 0.5580735235684774


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.27it/s, loss=-0.004003, elapsed time=0.04, total time=6.57]
[I 2025-06-07 23:17:53,652] Trial 201 finished with value: -0.004003349945012125 and parameters: {'learning_rate': 0.03989348716323849, 'sigma_multiplier': 0.5984334749872681, 'initialization_multiplier': 0.5580735235684774}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 201 final loss: -0.00400335
Trial 202:
  Learning Rate: 0.04669764142522583
  Sigma Multiplier: 0.576762381125029
  Initialization Multiplier: 0.6385710159517463


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.83it/s, loss=-0.003692, elapsed time=0.04, total time=6.7] 
[I 2025-06-07 23:18:00,395] Trial 202 finished with value: -0.0036918542904247342 and parameters: {'learning_rate': 0.04669764142522583, 'sigma_multiplier': 0.576762381125029, 'initialization_multiplier': 0.6385710159517463}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 202 final loss: -0.00369185
Trial 203:
  Learning Rate: 0.03954778442822028
  Sigma Multiplier: 0.49188738874354565
  Initialization Multiplier: 0.5452619593319178


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.27it/s, loss=-0.002925, elapsed time=0.04, total time=6.88]
[I 2025-06-07 23:18:07,310] Trial 203 finished with value: -0.002925056976045614 and parameters: {'learning_rate': 0.03954778442822028, 'sigma_multiplier': 0.49188738874354565, 'initialization_multiplier': 0.5452619593319178}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 203 final loss: -0.00292506
Trial 204:
  Learning Rate: 0.05309983166274704
  Sigma Multiplier: 0.6145418366853437
  Initialization Multiplier: 0.5953664504034449


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.44it/s, loss=-0.003460, elapsed time=0.04, total time=6.54]
[I 2025-06-07 23:18:13,881] Trial 204 finished with value: -0.0034603668639064866 and parameters: {'learning_rate': 0.05309983166274704, 'sigma_multiplier': 0.6145418366853437, 'initialization_multiplier': 0.5953664504034449}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 204 final loss: -0.00346037
Trial 205:
  Learning Rate: 0.032504074714003864
  Sigma Multiplier: 0.5417484737056757
  Initialization Multiplier: 0.47453999624300425


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.74it/s, loss=-0.003361, elapsed time=0.04, total time=6.74]
[I 2025-06-07 23:18:20,652] Trial 205 finished with value: -0.003360546991960659 and parameters: {'learning_rate': 0.032504074714003864, 'sigma_multiplier': 0.5417484737056757, 'initialization_multiplier': 0.47453999624300425}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 205 final loss: -0.00336055
Trial 206:
  Learning Rate: 0.04283284952915127
  Sigma Multiplier: 0.6641037765297363
  Initialization Multiplier: 0.6589409795997672


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.91it/s, loss=-0.003552, elapsed time=0.04, total time=6.4] 
[I 2025-06-07 23:18:27,088] Trial 206 finished with value: -0.003551778771727765 and parameters: {'learning_rate': 0.04283284952915127, 'sigma_multiplier': 0.6641037765297363, 'initialization_multiplier': 0.6589409795997672}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 206 final loss: -0.00355178
Trial 207:
  Learning Rate: 0.061441729196718975
  Sigma Multiplier: 0.5859999873551922
  Initialization Multiplier: 0.5164653429911307


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.02it/s, loss=-0.003407, elapsed time=0.04, total time=6.65]
[I 2025-06-07 23:18:33,778] Trial 207 finished with value: -0.0034065881748099767 and parameters: {'learning_rate': 0.061441729196718975, 'sigma_multiplier': 0.5859999873551922, 'initialization_multiplier': 0.5164653429911307}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 207 final loss: -0.00340659
Trial 208:
  Learning Rate: 0.03683562095391466
  Sigma Multiplier: 0.5425126188691239
  Initialization Multiplier: 0.5645608501536648


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.77it/s, loss=-0.003575, elapsed time=0.04, total time=6.72]
[I 2025-06-07 23:18:40,531] Trial 208 finished with value: -0.003575279147167618 and parameters: {'learning_rate': 0.03683562095391466, 'sigma_multiplier': 0.5425126188691239, 'initialization_multiplier': 0.5645608501536648}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 208 final loss: -0.00357528
Trial 209:
  Learning Rate: 0.0265310337912923
  Sigma Multiplier: 0.6440642798463817
  Initialization Multiplier: 0.6041629177412752


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.73it/s, loss=-0.003627, elapsed time=0.04, total time=6.46]
[I 2025-06-07 23:18:47,027] Trial 209 finished with value: -0.0036271405728532076 and parameters: {'learning_rate': 0.0265310337912923, 'sigma_multiplier': 0.6440642798463817, 'initialization_multiplier': 0.6041629177412752}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 209 final loss: -0.00362714
Trial 210:
  Learning Rate: 0.048435042155363504
  Sigma Multiplier: 0.49840978482898063
  Initialization Multiplier: 1.776196005846502


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 21.55it/s, loss=0.071943, elapsed time=0.04, total time=7.09]
[I 2025-06-07 23:18:54,159] Trial 210 finished with value: 0.07194307667858521 and parameters: {'learning_rate': 0.048435042155363504, 'sigma_multiplier': 0.49840978482898063, 'initialization_multiplier': 1.776196005846502}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 210 final loss: 0.07194308
Trial 211:
  Learning Rate: 0.0342277286912362
  Sigma Multiplier: 0.5876334667883638
  Initialization Multiplier: 0.563355531380625


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.16it/s, loss=-0.003832, elapsed time=0.04, total time=6.61]
[I 2025-06-07 23:19:00,801] Trial 211 finished with value: -0.0038324931933301208 and parameters: {'learning_rate': 0.0342277286912362, 'sigma_multiplier': 0.5876334667883638, 'initialization_multiplier': 0.563355531380625}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 211 final loss: -0.00383249
Trial 212:
  Learning Rate: 0.030939114137246638
  Sigma Multiplier: 0.6165727007667219
  Initialization Multiplier: 0.5495146152201222


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.36it/s, loss=-0.003413, elapsed time=0.04, total time=6.56]
[I 2025-06-07 23:19:07,398] Trial 212 finished with value: -0.0034125298339519245 and parameters: {'learning_rate': 0.030939114137246638, 'sigma_multiplier': 0.6165727007667219, 'initialization_multiplier': 0.5495146152201222}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 212 final loss: -0.00341253
Trial 213:
  Learning Rate: 0.0401430240170018
  Sigma Multiplier: 0.5712847209304717
  Initialization Multiplier: 0.6333432186223228


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.79it/s, loss=-0.003634, elapsed time=0.04, total time=6.73]
[I 2025-06-07 23:19:14,163] Trial 213 finished with value: -0.003633744903252471 and parameters: {'learning_rate': 0.0401430240170018, 'sigma_multiplier': 0.5712847209304717, 'initialization_multiplier': 0.6333432186223228}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 213 final loss: -0.00363374
Trial 214:
  Learning Rate: 0.0365627208478869
  Sigma Multiplier: 1.1749988495313453
  Initialization Multiplier: 0.5872173909294496


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 27.50it/s, loss=-0.001904, elapsed time=0.03, total time=5.59]
[I 2025-06-07 23:19:19,793] Trial 214 finished with value: -0.0019042499000146945 and parameters: {'learning_rate': 0.0365627208478869, 'sigma_multiplier': 1.1749988495313453, 'initialization_multiplier': 0.5872173909294496}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 214 final loss: -0.00190425
Trial 215:
  Learning Rate: 0.022800937526532918
  Sigma Multiplier: 0.6817652278971693
  Initialization Multiplier: 0.6663734103266452


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.09it/s, loss=-0.003591, elapsed time=0.04, total time=6.37]
[I 2025-06-07 23:19:26,201] Trial 215 finished with value: -0.0035909442935357434 and parameters: {'learning_rate': 0.022800937526532918, 'sigma_multiplier': 0.6817652278971693, 'initialization_multiplier': 0.6663734103266452}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 215 final loss: -0.00359094
Trial 216:
  Learning Rate: 0.045696803171957026
  Sigma Multiplier: 0.5319124236076304
  Initialization Multiplier: 0.4991131049286027


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.74it/s, loss=-0.003299, elapsed time=0.04, total time=6.73]
[I 2025-06-07 23:19:32,969] Trial 216 finished with value: -0.0032991645957472977 and parameters: {'learning_rate': 0.045696803171957026, 'sigma_multiplier': 0.5319124236076304, 'initialization_multiplier': 0.4991131049286027}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 216 final loss: -0.00329916
Trial 217:
  Learning Rate: 0.029179977040732562
  Sigma Multiplier: 0.6058580514021092
  Initialization Multiplier: 0.5347250644038497


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.28it/s, loss=-0.003800, elapsed time=0.04, total time=6.58]
[I 2025-06-07 23:19:39,587] Trial 217 finished with value: -0.0037999897666795113 and parameters: {'learning_rate': 0.029179977040732562, 'sigma_multiplier': 0.6058580514021092, 'initialization_multiplier': 0.5347250644038497}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 217 final loss: -0.00379999
Trial 218:
  Learning Rate: 0.026495674860947938
  Sigma Multiplier: 0.6025790704309076
  Initialization Multiplier: 0.5318084911808786


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.47it/s, loss=-0.003840, elapsed time=0.04, total time=6.52]
[I 2025-06-07 23:19:46,147] Trial 218 finished with value: -0.0038402307209832805 and parameters: {'learning_rate': 0.026495674860947938, 'sigma_multiplier': 0.6025790704309076, 'initialization_multiplier': 0.5318084911808786}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 218 final loss: -0.00384023
Trial 219:
  Learning Rate: 0.018282185075922122
  Sigma Multiplier: 0.6154698698295693
  Initialization Multiplier: 0.45511675967125087


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.48it/s, loss=-0.003324, elapsed time=0.04, total time=6.53]
[I 2025-06-07 23:19:52,721] Trial 219 finished with value: -0.003324007849722354 and parameters: {'learning_rate': 0.018282185075922122, 'sigma_multiplier': 0.6154698698295693, 'initialization_multiplier': 0.45511675967125087}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 219 final loss: -0.00332401
Trial 220:
  Learning Rate: 0.025381315248555525
  Sigma Multiplier: 0.5878010882914605
  Initialization Multiplier: 0.5474433236597714


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.33it/s, loss=-0.003805, elapsed time=0.04, total time=6.56]
[I 2025-06-07 23:19:59,318] Trial 220 finished with value: -0.0038048775070965626 and parameters: {'learning_rate': 0.025381315248555525, 'sigma_multiplier': 0.5878010882914605, 'initialization_multiplier': 0.5474433236597714}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 220 final loss: -0.00380488
Trial 221:
  Learning Rate: 0.025451468979941112
  Sigma Multiplier: 0.5849606372097721
  Initialization Multiplier: 0.5561165100374322


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.14it/s, loss=-0.003496, elapsed time=0.04, total time=6.63]
[I 2025-06-07 23:20:05,978] Trial 221 finished with value: -0.0034963544562178796 and parameters: {'learning_rate': 0.025451468979941112, 'sigma_multiplier': 0.5849606372097721, 'initialization_multiplier': 0.5561165100374322}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 221 final loss: -0.00349635
Trial 222:
  Learning Rate: 0.02966470627991471
  Sigma Multiplier: 0.6288151935201756
  Initialization Multiplier: 0.5282322318187017


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.86it/s, loss=-0.003654, elapsed time=0.04, total time=6.43]
[I 2025-06-07 23:20:12,446] Trial 222 finished with value: -0.003653781776606315 and parameters: {'learning_rate': 0.02966470627991471, 'sigma_multiplier': 0.6288151935201756, 'initialization_multiplier': 0.5282322318187017}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 222 final loss: -0.00365378
Trial 223:
  Learning Rate: 0.020784159004930285
  Sigma Multiplier: 0.5500300854959983
  Initialization Multiplier: 0.5792316586740925


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.84it/s, loss=-0.003218, elapsed time=0.04, total time=6.7] 
[I 2025-06-07 23:20:19,183] Trial 223 finished with value: -0.003217717087095734 and parameters: {'learning_rate': 0.020784159004930285, 'sigma_multiplier': 0.5500300854959983, 'initialization_multiplier': 0.5792316586740925}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 223 final loss: -0.00321772
Trial 224:
  Learning Rate: 0.02605574691013637
  Sigma Multiplier: 0.5969625188013541
  Initialization Multiplier: 0.48920862689550726


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.49it/s, loss=-0.003603, elapsed time=0.04, total time=6.52]
[I 2025-06-07 23:20:25,740] Trial 224 finished with value: -0.003603381619566426 and parameters: {'learning_rate': 0.02605574691013637, 'sigma_multiplier': 0.5969625188013541, 'initialization_multiplier': 0.48920862689550726}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 224 final loss: -0.00360338
Trial 225:
  Learning Rate: 0.03253644195909557
  Sigma Multiplier: 0.5054890595827386
  Initialization Multiplier: 0.36321027134606176


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.73it/s, loss=-0.003503, elapsed time=0.04, total time=6.73]
[I 2025-06-07 23:20:32,510] Trial 225 finished with value: -0.0035032368205265324 and parameters: {'learning_rate': 0.03253644195909557, 'sigma_multiplier': 0.5054890595827386, 'initialization_multiplier': 0.36321027134606176}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 225 final loss: -0.00350324
Trial 226:
  Learning Rate: 0.022412295987873445
  Sigma Multiplier: 0.559368084251784
  Initialization Multiplier: 0.4222407993101245


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.03it/s, loss=-0.003309, elapsed time=0.04, total time=6.65]
[I 2025-06-07 23:20:39,195] Trial 226 finished with value: -0.003308976659313009 and parameters: {'learning_rate': 0.022412295987873445, 'sigma_multiplier': 0.559368084251784, 'initialization_multiplier': 0.4222407993101245}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 226 final loss: -0.00330898
Trial 227:
  Learning Rate: 0.030303137253086098
  Sigma Multiplier: 0.4641446648934232
  Initialization Multiplier: 0.6254361345931241


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.46it/s, loss=-0.003335, elapsed time=0.04, total time=6.8] 
[I 2025-06-07 23:20:46,034] Trial 227 finished with value: -0.0033346077964675883 and parameters: {'learning_rate': 0.030303137253086098, 'sigma_multiplier': 0.4641446648934232, 'initialization_multiplier': 0.6254361345931241}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 227 final loss: -0.00333461
Trial 228:
  Learning Rate: 0.03493184742240498
  Sigma Multiplier: 0.6378806237737108
  Initialization Multiplier: 0.5349617107752673


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.95it/s, loss=-0.003582, elapsed time=0.04, total time=6.39]
[I 2025-06-07 23:20:52,461] Trial 228 finished with value: -0.003581677922627343 and parameters: {'learning_rate': 0.03493184742240498, 'sigma_multiplier': 0.6378806237737108, 'initialization_multiplier': 0.5349617107752673}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 228 final loss: -0.00358168
Trial 229:
  Learning Rate: 0.026571120031694716
  Sigma Multiplier: 0.5961905676248292
  Initialization Multiplier: 0.6060155371980638


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.62it/s, loss=-0.003831, elapsed time=0.04, total time=6.48]
[I 2025-06-07 23:20:58,982] Trial 229 finished with value: -0.00383073061275614 and parameters: {'learning_rate': 0.026571120031694716, 'sigma_multiplier': 0.5961905676248292, 'initialization_multiplier': 0.6060155371980638}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 229 final loss: -0.00383073
Trial 230:
  Learning Rate: 0.017680845056815823
  Sigma Multiplier: 0.6023539907835683
  Initialization Multiplier: 0.6123284852532187


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.21it/s, loss=-0.003379, elapsed time=0.04, total time=6.6] 
[I 2025-06-07 23:21:05,617] Trial 230 finished with value: -0.0033786310651028953 and parameters: {'learning_rate': 0.017680845056815823, 'sigma_multiplier': 0.6023539907835683, 'initialization_multiplier': 0.6123284852532187}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 230 final loss: -0.00337863
Trial 231:
  Learning Rate: 0.026169361832588188
  Sigma Multiplier: 0.567497044792214
  Initialization Multiplier: 0.5767513296997479


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.31it/s, loss=-0.003441, elapsed time=0.04, total time=6.57]
[I 2025-06-07 23:21:12,226] Trial 231 finished with value: -0.0034411773830266214 and parameters: {'learning_rate': 0.026169361832588188, 'sigma_multiplier': 0.567497044792214, 'initialization_multiplier': 0.5767513296997479}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 231 final loss: -0.00344118
Trial 232:
  Learning Rate: 0.02885474720386712
  Sigma Multiplier: 0.5326736719832698
  Initialization Multiplier: 0.5594439525350853


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.22it/s, loss=-0.003735, elapsed time=0.04, total time=6.95]
[I 2025-06-07 23:21:19,215] Trial 232 finished with value: -0.003734700092715157 and parameters: {'learning_rate': 0.02885474720386712, 'sigma_multiplier': 0.5326736719832698, 'initialization_multiplier': 0.5594439525350853}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 232 final loss: -0.00373470
Trial 233:
  Learning Rate: 0.03652257321826646
  Sigma Multiplier: 0.6639912245918478
  Initialization Multiplier: 0.6125469636173467


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.25it/s, loss=-0.003552, elapsed time=0.04, total time=6.33]
[I 2025-06-07 23:21:25,578] Trial 233 finished with value: -0.0035520282603114157 and parameters: {'learning_rate': 0.03652257321826646, 'sigma_multiplier': 0.6639912245918478, 'initialization_multiplier': 0.6125469636173467}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 233 final loss: -0.00355203
Trial 234:
  Learning Rate: 0.023743203438636406
  Sigma Multiplier: 0.6091150927246557
  Initialization Multiplier: 0.5054345269460851


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.61it/s, loss=-0.003422, elapsed time=0.04, total time=6.48]
[I 2025-06-07 23:21:32,095] Trial 234 finished with value: -0.0034224941428799135 and parameters: {'learning_rate': 0.023743203438636406, 'sigma_multiplier': 0.6091150927246557, 'initialization_multiplier': 0.5054345269460851}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 234 final loss: -0.00342249
Trial 235:
  Learning Rate: 0.03850593859234034
  Sigma Multiplier: 0.5789599671609102
  Initialization Multiplier: 0.683120106190594


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.43it/s, loss=-0.003716, elapsed time=0.04, total time=6.54]
[I 2025-06-07 23:21:38,668] Trial 235 finished with value: -0.003716460259416106 and parameters: {'learning_rate': 0.03850593859234034, 'sigma_multiplier': 0.5789599671609102, 'initialization_multiplier': 0.683120106190594}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 235 final loss: -0.00371646
Trial 236:
  Learning Rate: 0.001028579347855685
  Sigma Multiplier: 0.5099293453963374
  Initialization Multiplier: 0.6430443758280282


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.65it/s, loss=0.002117, elapsed time=0.04, total time=6.75]
[I 2025-06-07 23:21:45,451] Trial 236 finished with value: 0.0021169037725252207 and parameters: {'learning_rate': 0.001028579347855685, 'sigma_multiplier': 0.5099293453963374, 'initialization_multiplier': 0.6430443758280282}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 236 final loss: 0.00211690
Trial 237:
  Learning Rate: 0.029589687317318605
  Sigma Multiplier: 0.6431047641670196
  Initialization Multiplier: 0.5723730434280613


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.13it/s, loss=-0.003763, elapsed time=0.04, total time=6.36]
[I 2025-06-07 23:21:51,850] Trial 237 finished with value: -0.0037630845705127723 and parameters: {'learning_rate': 0.029589687317318605, 'sigma_multiplier': 0.6431047641670196, 'initialization_multiplier': 0.5723730434280613}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 237 final loss: -0.00376308
Trial 238:
  Learning Rate: 0.04156389988342649
  Sigma Multiplier: 0.5466197911304036
  Initialization Multiplier: 0.5333242971737566


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.05it/s, loss=-0.003764, elapsed time=0.04, total time=6.64]
[I 2025-06-07 23:21:58,523] Trial 238 finished with value: -0.003763798479295174 and parameters: {'learning_rate': 0.04156389988342649, 'sigma_multiplier': 0.5466197911304036, 'initialization_multiplier': 0.5333242971737566}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 238 final loss: -0.00376380
Trial 239:
  Learning Rate: 0.015207303298599368
  Sigma Multiplier: 0.5999353505971574
  Initialization Multiplier: 0.482138573500529


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.67it/s, loss=-0.003119, elapsed time=0.04, total time=6.47]
[I 2025-06-07 23:22:05,026] Trial 239 finished with value: -0.003119093622824919 and parameters: {'learning_rate': 0.015207303298599368, 'sigma_multiplier': 0.5999353505971574, 'initialization_multiplier': 0.482138573500529}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 239 final loss: -0.00311909
Trial 240:
  Learning Rate: 0.03249771232143619
  Sigma Multiplier: 0.6931987855113018
  Initialization Multiplier: 0.6087203446748048


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.65it/s, loss=-0.003665, elapsed time=0.04, total time=6.22]
[I 2025-06-07 23:22:11,280] Trial 240 finished with value: -0.0036651782948015394 and parameters: {'learning_rate': 0.03249771232143619, 'sigma_multiplier': 0.6931987855113018, 'initialization_multiplier': 0.6087203446748048}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 240 final loss: -0.00366518
Trial 241:
  Learning Rate: 0.04220238406608773
  Sigma Multiplier: 0.5420899057783483
  Initialization Multiplier: 0.5546487482701025


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.06it/s, loss=-0.003621, elapsed time=0.04, total time=6.63]
[I 2025-06-07 23:22:17,948] Trial 241 finished with value: -0.0036211587887331072 and parameters: {'learning_rate': 0.04220238406608773, 'sigma_multiplier': 0.5420899057783483, 'initialization_multiplier': 0.5546487482701025}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 241 final loss: -0.00362116
Trial 242:
  Learning Rate: 0.04043230238017344
  Sigma Multiplier: 1.9870520226221218
  Initialization Multiplier: 0.5241535746123643


Training Progress: 100%|██████████| 150/150 [00:04<00:00, 30.82it/s, loss=-0.000811, elapsed time=0.03, total time=5]   
[I 2025-06-07 23:22:22,979] Trial 242 finished with value: -0.000811347390311465 and parameters: {'learning_rate': 0.04043230238017344, 'sigma_multiplier': 1.9870520226221218, 'initialization_multiplier': 0.5241535746123643}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 242 final loss: -0.00081135
Trial 243:
  Learning Rate: 0.0348991464037996
  Sigma Multiplier: 0.18396410366302635
  Initialization Multiplier: 0.588410336047375


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 21.46it/s, loss=0.000300, elapsed time=0.04, total time=7.13] 
[I 2025-06-07 23:22:30,150] Trial 243 finished with value: 0.000299957936087866 and parameters: {'learning_rate': 0.0348991464037996, 'sigma_multiplier': 0.18396410366302635, 'initialization_multiplier': 0.588410336047375}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 243 final loss: 0.00029996
Trial 244:
  Learning Rate: 0.02787360702460498
  Sigma Multiplier: 1.7303966455198572
  Initialization Multiplier: 0.5209537535359052


Training Progress: 100%|██████████| 150/150 [00:04<00:00, 30.33it/s, loss=-0.001006, elapsed time=0.03, total time=5.08]
[I 2025-06-07 23:22:35,261] Trial 244 finished with value: -0.0010057878899631658 and parameters: {'learning_rate': 0.02787360702460498, 'sigma_multiplier': 1.7303966455198572, 'initialization_multiplier': 0.5209537535359052}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 244 final loss: -0.00100579
Trial 245:
  Learning Rate: 0.0002977125123170815
  Sigma Multiplier: 0.5681907037856261
  Initialization Multiplier: 0.468625868562428


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.40it/s, loss=0.010915, elapsed time=0.04, total time=6.54]
[I 2025-06-07 23:22:41,840] Trial 245 finished with value: 0.010914819860864832 and parameters: {'learning_rate': 0.0002977125123170815, 'sigma_multiplier': 0.5681907037856261, 'initialization_multiplier': 0.468625868562428}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 245 final loss: 0.01091482
Trial 246:
  Learning Rate: 0.04337842160107148
  Sigma Multiplier: 0.5224822830980133
  Initialization Multiplier: 0.6350980666528104


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.98it/s, loss=-0.003679, elapsed time=0.04, total time=6.67]
[I 2025-06-07 23:22:48,543] Trial 246 finished with value: -0.003679087661556408 and parameters: {'learning_rate': 0.04337842160107148, 'sigma_multiplier': 0.5224822830980133, 'initialization_multiplier': 0.6350980666528104}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 246 final loss: -0.00367909
Trial 247:
  Learning Rate: 0.03478268525646006
  Sigma Multiplier: 0.6206312223609145
  Initialization Multiplier: 0.5477941050341333


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.92it/s, loss=-0.003623, elapsed time=0.04, total time=6.41]
[I 2025-06-07 23:22:54,992] Trial 247 finished with value: -0.003623451854138735 and parameters: {'learning_rate': 0.03478268525646006, 'sigma_multiplier': 0.6206312223609145, 'initialization_multiplier': 0.5477941050341333}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 247 final loss: -0.00362345
Trial 248:
  Learning Rate: 0.051862675021876914
  Sigma Multiplier: 0.4899575323499341
  Initialization Multiplier: 0.70303968136398


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.62it/s, loss=-0.003421, elapsed time=0.04, total time=6.77]
[I 2025-06-07 23:23:01,797] Trial 248 finished with value: -0.003421113212140946 and parameters: {'learning_rate': 0.051862675021876914, 'sigma_multiplier': 0.4899575323499341, 'initialization_multiplier': 0.70303968136398}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 248 final loss: -0.00342111
Trial 249:
  Learning Rate: 0.02369022675794201
  Sigma Multiplier: 0.5799188628056617
  Initialization Multiplier: 0.6595448432052066


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.57it/s, loss=-0.003349, elapsed time=0.04, total time=6.51]
[I 2025-06-07 23:23:08,338] Trial 249 finished with value: -0.0033486326072037476 and parameters: {'learning_rate': 0.02369022675794201, 'sigma_multiplier': 0.5799188628056617, 'initialization_multiplier': 0.6595448432052066}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 249 final loss: -0.00334863
Trial 250:
  Learning Rate: 0.04055196547448963
  Sigma Multiplier: 0.5537574721334881
  Initialization Multiplier: 0.5902173049956476


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.31it/s, loss=-0.003545, elapsed time=0.04, total time=6.57]
[I 2025-06-07 23:23:14,943] Trial 250 finished with value: -0.003545421342416037 and parameters: {'learning_rate': 0.04055196547448963, 'sigma_multiplier': 0.5537574721334881, 'initialization_multiplier': 0.5902173049956476}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 250 final loss: -0.00354542
Trial 251:
  Learning Rate: 0.03208763575443493
  Sigma Multiplier: 0.45360995207083504
  Initialization Multiplier: 0.5138771060184499


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.52it/s, loss=-0.003441, elapsed time=0.04, total time=6.79]
[I 2025-06-07 23:23:21,774] Trial 251 finished with value: -0.00344115853942329 and parameters: {'learning_rate': 0.03208763575443493, 'sigma_multiplier': 0.45360995207083504, 'initialization_multiplier': 0.5138771060184499}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 251 final loss: -0.00344116
Trial 252:
  Learning Rate: 0.020323539674987075
  Sigma Multiplier: 0.6360138664689078
  Initialization Multiplier: 0.6176436869353094


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.06it/s, loss=-0.003417, elapsed time=0.04, total time=6.36]
[I 2025-06-07 23:23:28,173] Trial 252 finished with value: -0.003417177335654265 and parameters: {'learning_rate': 0.020323539674987075, 'sigma_multiplier': 0.6360138664689078, 'initialization_multiplier': 0.6176436869353094}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 252 final loss: -0.00341718
Trial 253:
  Learning Rate: 0.045992136365315205
  Sigma Multiplier: 0.6032811204580654
  Initialization Multiplier: 0.5606988308055514


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.77it/s, loss=-0.003487, elapsed time=0.04, total time=6.44]
[I 2025-06-07 23:23:34,645] Trial 253 finished with value: -0.0034871676695084285 and parameters: {'learning_rate': 0.045992136365315205, 'sigma_multiplier': 0.6032811204580654, 'initialization_multiplier': 0.5606988308055514}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 253 final loss: -0.00348717
Trial 254:
  Learning Rate: 0.026665974356786016
  Sigma Multiplier: 0.5239144730197154
  Initialization Multiplier: 0.4426334447624566


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.06it/s, loss=-0.003223, elapsed time=0.04, total time=6.64]
[I 2025-06-07 23:23:41,325] Trial 254 finished with value: -0.0032230840345962154 and parameters: {'learning_rate': 0.026665974356786016, 'sigma_multiplier': 0.5239144730197154, 'initialization_multiplier': 0.4426334447624566}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 254 final loss: -0.00322308
Trial 255:
  Learning Rate: 0.036656948352356736
  Sigma Multiplier: 0.6608878592400549
  Initialization Multiplier: 0.6775698501903723


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.39it/s, loss=-0.003547, elapsed time=0.04, total time=6.28]
[I 2025-06-07 23:23:47,641] Trial 255 finished with value: -0.0035469934316883223 and parameters: {'learning_rate': 0.036656948352356736, 'sigma_multiplier': 0.6608878592400549, 'initialization_multiplier': 0.6775698501903723}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 255 final loss: -0.00354699
Trial 256:
  Learning Rate: 0.059249160452352026
  Sigma Multiplier: 0.5766650400334948
  Initialization Multiplier: 0.5905127455463406


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.61it/s, loss=-0.003667, elapsed time=0.04, total time=6.48]
[I 2025-06-07 23:23:54,160] Trial 256 finished with value: -0.0036673084720514402 and parameters: {'learning_rate': 0.059249160452352026, 'sigma_multiplier': 0.5766650400334948, 'initialization_multiplier': 0.5905127455463406}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 256 final loss: -0.00366731
Trial 257:
  Learning Rate: 0.03035571610733737
  Sigma Multiplier: 0.4854151431884184
  Initialization Multiplier: 0.4825501832384135


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.73it/s, loss=-0.003199, elapsed time=0.04, total time=6.73]
[I 2025-06-07 23:24:00,932] Trial 257 finished with value: -0.003199027774740455 and parameters: {'learning_rate': 0.03035571610733737, 'sigma_multiplier': 0.4854151431884184, 'initialization_multiplier': 0.4825501832384135}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 257 final loss: -0.00319903
Trial 258:
  Learning Rate: 0.04385233581441597
  Sigma Multiplier: 0.5407886019905148
  Initialization Multiplier: 0.643677166119124


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.26it/s, loss=-0.003374, elapsed time=0.04, total time=6.58]
[I 2025-06-07 23:24:07,549] Trial 258 finished with value: -0.003374437122337127 and parameters: {'learning_rate': 0.04385233581441597, 'sigma_multiplier': 0.5407886019905148, 'initialization_multiplier': 0.643677166119124}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 258 final loss: -0.00337444
Trial 259:
  Learning Rate: 0.05183254671807872
  Sigma Multiplier: 0.6208918661207834
  Initialization Multiplier: 0.5329109068031249


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.19it/s, loss=-0.003642, elapsed time=0.04, total time=6.33]
[I 2025-06-07 23:24:13,914] Trial 259 finished with value: -0.0036415233556770215 and parameters: {'learning_rate': 0.05183254671807872, 'sigma_multiplier': 0.6208918661207834, 'initialization_multiplier': 0.5329109068031249}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 259 final loss: -0.00364152
Trial 260:
  Learning Rate: 0.03701484491487682
  Sigma Multiplier: 0.584185581986473
  Initialization Multiplier: 1.4970038673195778


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.92it/s, loss=0.072170, elapsed time=0.04, total time=6.67]
[I 2025-06-07 23:24:20,621] Trial 260 finished with value: 0.07216984410460725 and parameters: {'learning_rate': 0.03701484491487682, 'sigma_multiplier': 0.584185581986473, 'initialization_multiplier': 1.4970038673195778}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 260 final loss: 0.07216984
Trial 261:
  Learning Rate: 0.006873363166030387
  Sigma Multiplier: 0.663169069338793
  Initialization Multiplier: 0.7015868890067627


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.57it/s, loss=-0.002818, elapsed time=0.04, total time=6.24]
[I 2025-06-07 23:24:26,896] Trial 261 finished with value: -0.002818333873140115 and parameters: {'learning_rate': 0.006873363166030387, 'sigma_multiplier': 0.663169069338793, 'initialization_multiplier': 0.7015868890067627}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 261 final loss: -0.00281833
Trial 262:
  Learning Rate: 0.025241383016294833
  Sigma Multiplier: 0.7192505334499031
  Initialization Multiplier: 0.5657506367859484


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.26it/s, loss=-0.003407, elapsed time=0.04, total time=6.08]
[I 2025-06-07 23:24:33,013] Trial 262 finished with value: -0.0034071180192481246 and parameters: {'learning_rate': 0.025241383016294833, 'sigma_multiplier': 0.7192505334499031, 'initialization_multiplier': 0.5657506367859484}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 262 final loss: -0.00340712
Trial 263:
  Learning Rate: 0.031345400264423826
  Sigma Multiplier: 1.0349106972588904
  Initialization Multiplier: 0.612101995852609


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 28.18it/s, loss=-0.002515, elapsed time=0.03, total time=5.47]
[I 2025-06-07 23:24:38,518] Trial 263 finished with value: -0.002515371191613543 and parameters: {'learning_rate': 0.031345400264423826, 'sigma_multiplier': 1.0349106972588904, 'initialization_multiplier': 0.612101995852609}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 263 final loss: -0.00251537
Trial 264:
  Learning Rate: 0.03991595546800309
  Sigma Multiplier: 0.4257872408571773
  Initialization Multiplier: 0.5132885819559071


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.62it/s, loss=-0.002747, elapsed time=0.04, total time=6.76]
[I 2025-06-07 23:24:45,321] Trial 264 finished with value: -0.0027469437652585154 and parameters: {'learning_rate': 0.03991595546800309, 'sigma_multiplier': 0.4257872408571773, 'initialization_multiplier': 0.5132885819559071}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 264 final loss: -0.00274694
Trial 265:
  Learning Rate: 0.021117499228858465
  Sigma Multiplier: 0.5500613075753884
  Initialization Multiplier: 0.6609751487287272


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.56it/s, loss=-0.003930, elapsed time=0.04, total time=6.5] 
[I 2025-06-07 23:24:51,859] Trial 265 finished with value: -0.003929747684965392 and parameters: {'learning_rate': 0.021117499228858465, 'sigma_multiplier': 0.5500613075753884, 'initialization_multiplier': 0.6609751487287272}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 265 final loss: -0.00392975
Trial 266:
  Learning Rate: 0.02056356601819234
  Sigma Multiplier: 0.6203607881208729
  Initialization Multiplier: 0.6691465752252068


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.26it/s, loss=-0.003906, elapsed time=0.04, total time=6.32]
[I 2025-06-07 23:24:58,210] Trial 266 finished with value: -0.0039059971547432985 and parameters: {'learning_rate': 0.02056356601819234, 'sigma_multiplier': 0.6203607881208729, 'initialization_multiplier': 0.6691465752252068}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 266 final loss: -0.00390600
Trial 267:
  Learning Rate: 0.01348062934897561
  Sigma Multiplier: 0.6322818896300766
  Initialization Multiplier: 0.7274099109400558


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.29it/s, loss=-0.003236, elapsed time=0.04, total time=6.31]
[I 2025-06-07 23:25:04,558] Trial 267 finished with value: -0.00323592879192405 and parameters: {'learning_rate': 0.01348062934897561, 'sigma_multiplier': 0.6322818896300766, 'initialization_multiplier': 0.7274099109400558}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 267 final loss: -0.00323593
Trial 268:
  Learning Rate: 0.021929111438405535
  Sigma Multiplier: 0.5094084749862233
  Initialization Multiplier: 0.703847890594331


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.19it/s, loss=-0.003605, elapsed time=0.04, total time=6.6] 
[I 2025-06-07 23:25:11,192] Trial 268 finished with value: -0.0036050223616177703 and parameters: {'learning_rate': 0.021929111438405535, 'sigma_multiplier': 0.5094084749862233, 'initialization_multiplier': 0.703847890594331}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 268 final loss: -0.00360502
Trial 269:
  Learning Rate: 0.022236046908717076
  Sigma Multiplier: 0.6030412326942062
  Initialization Multiplier: 0.6507418834948916


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.14it/s, loss=-0.003564, elapsed time=0.04, total time=6.35]
[I 2025-06-07 23:25:17,577] Trial 269 finished with value: -0.003563786207028621 and parameters: {'learning_rate': 0.022236046908717076, 'sigma_multiplier': 0.6030412326942062, 'initialization_multiplier': 0.6507418834948916}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 269 final loss: -0.00356379
Trial 270:
  Learning Rate: 0.01857118449123913
  Sigma Multiplier: 0.6775229387690791
  Initialization Multiplier: 0.6835261964409151


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.01it/s, loss=-0.003219, elapsed time=0.04, total time=6.14]
[I 2025-06-07 23:25:23,749] Trial 270 finished with value: -0.003218662950615377 and parameters: {'learning_rate': 0.01857118449123913, 'sigma_multiplier': 0.6775229387690791, 'initialization_multiplier': 0.6835261964409151}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 270 final loss: -0.00321866
Trial 271:
  Learning Rate: 0.016419470073614287
  Sigma Multiplier: 0.5589115490371176
  Initialization Multiplier: 0.6662063166932474


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.69it/s, loss=-0.003571, elapsed time=0.04, total time=6.46]
[I 2025-06-07 23:25:30,248] Trial 271 finished with value: -0.00357129936481184 and parameters: {'learning_rate': 0.016419470073614287, 'sigma_multiplier': 0.5589115490371176, 'initialization_multiplier': 0.6662063166932474}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 271 final loss: -0.00357130
Trial 272:
  Learning Rate: 0.018419711399315416
  Sigma Multiplier: 0.6460445222346073
  Initialization Multiplier: 0.6254291519346591


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.62it/s, loss=-0.003507, elapsed time=0.04, total time=6.22]
[I 2025-06-07 23:25:36,507] Trial 272 finished with value: -0.0035067400178746713 and parameters: {'learning_rate': 0.018419711399315416, 'sigma_multiplier': 0.6460445222346073, 'initialization_multiplier': 0.6254291519346591}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 272 final loss: -0.00350674
Trial 273:
  Learning Rate: 0.02706513874639293
  Sigma Multiplier: 0.46832819211792714
  Initialization Multiplier: 0.7676749397026538


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.05it/s, loss=-0.003685, elapsed time=0.04, total time=6.64]
[I 2025-06-07 23:25:43,180] Trial 273 finished with value: -0.0036853338091652585 and parameters: {'learning_rate': 0.02706513874639293, 'sigma_multiplier': 0.46832819211792714, 'initialization_multiplier': 0.7676749397026538}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 273 final loss: -0.00368533
Trial 274:
  Learning Rate: 0.021694626792943046
  Sigma Multiplier: 0.5892333821531721
  Initialization Multiplier: 0.6067376828224217


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.06it/s, loss=-0.003638, elapsed time=0.04, total time=6.36]
[I 2025-06-07 23:25:49,578] Trial 274 finished with value: -0.0036378996534045024 and parameters: {'learning_rate': 0.021694626792943046, 'sigma_multiplier': 0.5892333821531721, 'initialization_multiplier': 0.6067376828224217}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 274 final loss: -0.00363790
Trial 275:
  Learning Rate: 0.028390229569912898
  Sigma Multiplier: 0.5147709284314468
  Initialization Multiplier: 0.7217214364190402


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.27it/s, loss=-0.003286, elapsed time=0.04, total time=6.58]
[I 2025-06-07 23:25:56,196] Trial 275 finished with value: -0.0032864155062171468 and parameters: {'learning_rate': 0.028390229569912898, 'sigma_multiplier': 0.5147709284314468, 'initialization_multiplier': 0.7217214364190402}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 275 final loss: -0.00328642
Trial 276:
  Learning Rate: 0.024119549910040665
  Sigma Multiplier: 0.62023817793786
  Initialization Multiplier: 0.6529294673658523


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.32it/s, loss=-0.003672, elapsed time=0.04, total time=6.3] 
[I 2025-06-07 23:26:02,526] Trial 276 finished with value: -0.0036723053931547334 and parameters: {'learning_rate': 0.024119549910040665, 'sigma_multiplier': 0.62023817793786, 'initialization_multiplier': 0.6529294673658523}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 276 final loss: -0.00367231
Trial 277:
  Learning Rate: 0.008877213854501955
  Sigma Multiplier: 0.5553272324382963
  Initialization Multiplier: 0.584564881250819


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.81it/s, loss=-0.002915, elapsed time=0.04, total time=6.44]
[I 2025-06-07 23:26:08,996] Trial 277 finished with value: -0.002915388810918378 and parameters: {'learning_rate': 0.008877213854501955, 'sigma_multiplier': 0.5553272324382963, 'initialization_multiplier': 0.584564881250819}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 277 final loss: -0.00291539
Trial 278:
  Learning Rate: 0.032957314256763744
  Sigma Multiplier: 0.5838453289042227
  Initialization Multiplier: 0.6337209933379792


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.96it/s, loss=-0.003807, elapsed time=0.04, total time=6.4] 
[I 2025-06-07 23:26:15,427] Trial 278 finished with value: -0.003806561150264889 and parameters: {'learning_rate': 0.032957314256763744, 'sigma_multiplier': 0.5838453289042227, 'initialization_multiplier': 0.6337209933379792}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 278 final loss: -0.00380656
Trial 279:
  Learning Rate: 0.031960455332470696
  Sigma Multiplier: 1.5055983126634063
  Initialization Multiplier: 0.6843145362717103


Training Progress: 100%|██████████| 150/150 [00:04<00:00, 30.75it/s, loss=-0.001397, elapsed time=0.03, total time=5.01]
[I 2025-06-07 23:26:20,472] Trial 279 finished with value: -0.0013968013549948745 and parameters: {'learning_rate': 0.031960455332470696, 'sigma_multiplier': 1.5055983126634063, 'initialization_multiplier': 0.6843145362717103}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 279 final loss: -0.00139680
Trial 280:
  Learning Rate: 0.02679055449463301
  Sigma Multiplier: 0.6008821923928446
  Initialization Multiplier: 0.5854916573663346


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.19it/s, loss=-0.003626, elapsed time=0.04, total time=6.33]
[I 2025-06-07 23:26:26,843] Trial 280 finished with value: -0.0036264968445325076 and parameters: {'learning_rate': 0.02679055449463301, 'sigma_multiplier': 0.6008821923928446, 'initialization_multiplier': 0.5854916573663346}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 280 final loss: -0.00362650
Trial 281:
  Learning Rate: 0.01963137801514257
  Sigma Multiplier: 0.6875660170117147
  Initialization Multiplier: 0.7948147091532476


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.19it/s, loss=-0.003154, elapsed time=0.07, total time=6.08]
[I 2025-06-07 23:26:32,959] Trial 281 finished with value: -0.0031543958615436684 and parameters: {'learning_rate': 0.01963137801514257, 'sigma_multiplier': 0.6875660170117147, 'initialization_multiplier': 0.7948147091532476}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 281 final loss: -0.00315440
Trial 282:
  Learning Rate: 0.034269903145731556
  Sigma Multiplier: 0.6426961567324911
  Initialization Multiplier: 0.624199825671779


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.62it/s, loss=-0.003644, elapsed time=0.04, total time=6.23]
[I 2025-06-07 23:26:39,230] Trial 282 finished with value: -0.003644481608182956 and parameters: {'learning_rate': 0.034269903145731556, 'sigma_multiplier': 0.6426961567324911, 'initialization_multiplier': 0.624199825671779}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 282 final loss: -0.00364448
Trial 283:
  Learning Rate: 0.02433858805739062
  Sigma Multiplier: 0.5718454965635221
  Initialization Multiplier: 0.7310030548513021


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.90it/s, loss=-0.003806, elapsed time=0.04, total time=6.41]
[I 2025-06-07 23:26:45,672] Trial 283 finished with value: -0.003806363917201472 and parameters: {'learning_rate': 0.02433858805739062, 'sigma_multiplier': 0.5718454965635221, 'initialization_multiplier': 0.7310030548513021}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 283 final loss: -0.00380636
Trial 284:
  Learning Rate: 0.024227465521916475
  Sigma Multiplier: 0.5538263605260977
  Initialization Multiplier: 0.7345523589990375


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.59it/s, loss=-0.003634, elapsed time=0.04, total time=6.78]
[I 2025-06-07 23:26:52,487] Trial 284 finished with value: -0.0036337681168907583 and parameters: {'learning_rate': 0.024227465521916475, 'sigma_multiplier': 0.5538263605260977, 'initialization_multiplier': 0.7345523589990375}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 284 final loss: -0.00363377
Trial 285:
  Learning Rate: 0.015221651055487524
  Sigma Multiplier: 0.6126253332219909
  Initialization Multiplier: 0.8354172505756686


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.32it/s, loss=-0.003266, elapsed time=0.04, total time=6.31]
[I 2025-06-07 23:26:58,827] Trial 285 finished with value: -0.0032662576436543658 and parameters: {'learning_rate': 0.015221651055487524, 'sigma_multiplier': 0.6126253332219909, 'initialization_multiplier': 0.8354172505756686}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 285 final loss: -0.00326626
Trial 286:
  Learning Rate: 0.019381225876855007
  Sigma Multiplier: 0.5369293272734715
  Initialization Multiplier: 0.7754889044353188


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.42it/s, loss=-0.003841, elapsed time=0.04, total time=6.54]
[I 2025-06-07 23:27:05,402] Trial 286 finished with value: -0.003841438034654052 and parameters: {'learning_rate': 0.019381225876855007, 'sigma_multiplier': 0.5369293272734715, 'initialization_multiplier': 0.7754889044353188}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 286 final loss: -0.00384144
Trial 287:
  Learning Rate: 0.019575455279139283
  Sigma Multiplier: 0.5742287677648669
  Initialization Multiplier: 0.808189093941364


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.96it/s, loss=-0.003407, elapsed time=0.04, total time=6.39]
[I 2025-06-07 23:27:11,821] Trial 287 finished with value: -0.00340695294111835 and parameters: {'learning_rate': 0.019575455279139283, 'sigma_multiplier': 0.5742287677648669, 'initialization_multiplier': 0.808189093941364}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 287 final loss: -0.00340695
Trial 288:
  Learning Rate: 0.01622742419656584
  Sigma Multiplier: 0.6352759568555533
  Initialization Multiplier: 0.7593077744072739


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.52it/s, loss=-0.003447, elapsed time=0.04, total time=6.25]
[I 2025-06-07 23:27:18,107] Trial 288 finished with value: -0.0034470020828572443 and parameters: {'learning_rate': 0.01622742419656584, 'sigma_multiplier': 0.6352759568555533, 'initialization_multiplier': 0.7593077744072739}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 288 final loss: -0.00344700
Trial 289:
  Learning Rate: 0.021795123930964645
  Sigma Multiplier: 0.5342235295232131
  Initialization Multiplier: 0.7143434401217297


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.62it/s, loss=-0.003402, elapsed time=0.04, total time=6.48]
[I 2025-06-07 23:27:24,627] Trial 289 finished with value: -0.0034017460880100036 and parameters: {'learning_rate': 0.021795123930964645, 'sigma_multiplier': 0.5342235295232131, 'initialization_multiplier': 0.7143434401217297}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 289 final loss: -0.00340175
Trial 290:
  Learning Rate: 0.02626970553481784
  Sigma Multiplier: 0.5862991757957058
  Initialization Multiplier: 0.9660966633278733


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.85it/s, loss=-0.002628, elapsed time=0.04, total time=6.42]
[I 2025-06-07 23:27:31,079] Trial 290 finished with value: -0.0026283572968552093 and parameters: {'learning_rate': 0.02626970553481784, 'sigma_multiplier': 0.5862991757957058, 'initialization_multiplier': 0.9660966633278733}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 290 final loss: -0.00262836
Trial 291:
  Learning Rate: 0.01977448821943554
  Sigma Multiplier: 0.7112362108926815
  Initialization Multiplier: 0.7648920841674043


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.47it/s, loss=-0.003616, elapsed time=0.05, total time=6.02]
[I 2025-06-07 23:27:37,132] Trial 291 finished with value: -0.003615521312130475 and parameters: {'learning_rate': 0.01977448821943554, 'sigma_multiplier': 0.7112362108926815, 'initialization_multiplier': 0.7648920841674043}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 291 final loss: -0.00361552
Trial 292:
  Learning Rate: 0.011740644351992626
  Sigma Multiplier: 0.6597832773504091
  Initialization Multiplier: 0.7962469123739627


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.96it/s, loss=-0.002669, elapsed time=0.04, total time=6.14]
[I 2025-06-07 23:27:43,307] Trial 292 finished with value: -0.002669484332915044 and parameters: {'learning_rate': 0.011740644351992626, 'sigma_multiplier': 0.6597832773504091, 'initialization_multiplier': 0.7962469123739627}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 292 final loss: -0.00266948
Trial 293:
  Learning Rate: 0.06099631357014855
  Sigma Multiplier: 0.5520412869927916
  Initialization Multiplier: 0.6765007276827533


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.68it/s, loss=-0.003601, elapsed time=0.04, total time=6.46]
[I 2025-06-07 23:27:49,806] Trial 293 finished with value: -0.0036014832584043044 and parameters: {'learning_rate': 0.06099631357014855, 'sigma_multiplier': 0.5520412869927916, 'initialization_multiplier': 0.6765007276827533}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 293 final loss: -0.00360148
Trial 294:
  Learning Rate: 0.028929343519633957
  Sigma Multiplier: 0.9592257148246507
  Initialization Multiplier: 0.8637228044425497


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 27.78it/s, loss=-0.002226, elapsed time=0.03, total time=5.54]
[I 2025-06-07 23:27:55,376] Trial 294 finished with value: -0.002226128798332512 and parameters: {'learning_rate': 0.028929343519633957, 'sigma_multiplier': 0.9592257148246507, 'initialization_multiplier': 0.8637228044425497}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 294 final loss: -0.00222613
Trial 295:
  Learning Rate: 0.023511740222012342
  Sigma Multiplier: 0.6077756690910764
  Initialization Multiplier: 0.7186074178590891


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.12it/s, loss=-0.003735, elapsed time=0.04, total time=6.35]
[I 2025-06-07 23:28:01,761] Trial 295 finished with value: -0.0037347377090736376 and parameters: {'learning_rate': 0.023511740222012342, 'sigma_multiplier': 0.6077756690910764, 'initialization_multiplier': 0.7186074178590891}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 295 final loss: -0.00373474
Trial 296:
  Learning Rate: 0.049294691711069934
  Sigma Multiplier: 0.5333270037877443
  Initialization Multiplier: 1.0203664118129592


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.09it/s, loss=-0.002096, elapsed time=0.04, total time=6.64]
[I 2025-06-07 23:28:08,433] Trial 296 finished with value: -0.0020963961220737816 and parameters: {'learning_rate': 0.049294691711069934, 'sigma_multiplier': 0.5333270037877443, 'initialization_multiplier': 1.0203664118129592}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 296 final loss: -0.00209640
Trial 297:
  Learning Rate: 0.01721168470486922
  Sigma Multiplier: 0.675122307751672
  Initialization Multiplier: 0.41302307254097287


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.06it/s, loss=-0.002883, elapsed time=0.04, total time=6.13]
[I 2025-06-07 23:28:14,596] Trial 297 finished with value: -0.0028830168376457483 and parameters: {'learning_rate': 0.01721168470486922, 'sigma_multiplier': 0.675122307751672, 'initialization_multiplier': 0.41302307254097287}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 297 final loss: -0.00288302
Trial 298:
  Learning Rate: 0.0007473950269793263
  Sigma Multiplier: 0.625262571894559
  Initialization Multiplier: 0.7423920306722434


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.37it/s, loss=0.029222, elapsed time=0.04, total time=6.29]
[I 2025-06-07 23:28:20,924] Trial 298 finished with value: 0.029221784513150592 and parameters: {'learning_rate': 0.0007473950269793263, 'sigma_multiplier': 0.625262571894559, 'initialization_multiplier': 0.7423920306722434}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 298 final loss: 0.02922178
Trial 299:
  Learning Rate: 0.029067345772556114
  Sigma Multiplier: 0.5782104005181525
  Initialization Multiplier: 0.6410630133276889


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.10it/s, loss=-0.003680, elapsed time=0.04, total time=6.37]
[I 2025-06-07 23:28:27,325] Trial 299 finished with value: -0.003680023381338162 and parameters: {'learning_rate': 0.029067345772556114, 'sigma_multiplier': 0.5782104005181525, 'initialization_multiplier': 0.6410630133276889}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 299 final loss: -0.00368002
Trial 300:
  Learning Rate: 0.023404255158678725
  Sigma Multiplier: 0.5103707629564782
  Initialization Multiplier: 0.6824780400183988


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.47it/s, loss=-0.003749, elapsed time=0.04, total time=6.52]
[I 2025-06-07 23:28:33,882] Trial 300 finished with value: -0.0037493789998174955 and parameters: {'learning_rate': 0.023404255158678725, 'sigma_multiplier': 0.5103707629564782, 'initialization_multiplier': 0.6824780400183988}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 300 final loss: -0.00374938
Trial 301:
  Learning Rate: 0.03334541434453416
  Sigma Multiplier: 0.5758461222250845
  Initialization Multiplier: 0.6670315353529693


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.92it/s, loss=-0.004054, elapsed time=0.04, total time=6.4] 
[I 2025-06-07 23:28:40,315] Trial 301 finished with value: -0.004054081425265247 and parameters: {'learning_rate': 0.03334541434453416, 'sigma_multiplier': 0.5758461222250845, 'initialization_multiplier': 0.6670315353529693}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 301 final loss: -0.00405408
Trial 302:
  Learning Rate: 0.05425761029307368
  Sigma Multiplier: 0.6195656737641164
  Initialization Multiplier: 0.652551942617226


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.62it/s, loss=-0.003713, elapsed time=0.04, total time=6.23]
[I 2025-06-07 23:28:46,580] Trial 302 finished with value: -0.0037128085030228306 and parameters: {'learning_rate': 0.05425761029307368, 'sigma_multiplier': 0.6195656737641164, 'initialization_multiplier': 0.652551942617226}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 302 final loss: -0.00371281
Trial 303:
  Learning Rate: 0.03174912411233245
  Sigma Multiplier: 0.5503120133707137
  Initialization Multiplier: 0.46985802071569666


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.84it/s, loss=-0.003379, elapsed time=0.04, total time=6.44]
[I 2025-06-07 23:28:53,060] Trial 303 finished with value: -0.0033793339808637682 and parameters: {'learning_rate': 0.03174912411233245, 'sigma_multiplier': 0.5503120133707137, 'initialization_multiplier': 0.46985802071569666}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 303 final loss: -0.00337933
Trial 304:
  Learning Rate: 0.04698412481726705
  Sigma Multiplier: 0.5924788842010019
  Initialization Multiplier: 0.5509403511528697


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.14it/s, loss=-0.003510, elapsed time=0.04, total time=6.35]
[I 2025-06-07 23:28:59,447] Trial 304 finished with value: -0.0035101982762754213 and parameters: {'learning_rate': 0.04698412481726705, 'sigma_multiplier': 0.5924788842010019, 'initialization_multiplier': 0.5509403511528697}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 304 final loss: -0.00351020
Trial 305:
  Learning Rate: 0.03609969690919294
  Sigma Multiplier: 1.0960807034197126
  Initialization Multiplier: 1.181812051550538


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 28.33it/s, loss=0.036482, elapsed time=0.03, total time=5.43]
[I 2025-06-07 23:29:04,909] Trial 305 finished with value: 0.03648225801244366 and parameters: {'learning_rate': 0.03609969690919294, 'sigma_multiplier': 1.0960807034197126, 'initialization_multiplier': 1.181812051550538}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 305 final loss: 0.03648226
Trial 306:
  Learning Rate: 0.06493516809123774
  Sigma Multiplier: 0.6536061990557408
  Initialization Multiplier: 0.6975983568115014


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.80it/s, loss=-0.003280, elapsed time=0.04, total time=6.19]
[I 2025-06-07 23:29:11,136] Trial 306 finished with value: -0.0032802582241605897 and parameters: {'learning_rate': 0.06493516809123774, 'sigma_multiplier': 0.6536061990557408, 'initialization_multiplier': 0.6975983568115014}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 306 final loss: -0.00328026
Trial 307:
  Learning Rate: 0.043678036817102424
  Sigma Multiplier: 0.7270371811584198
  Initialization Multiplier: 0.6274418657737105


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.53it/s, loss=-0.003544, elapsed time=0.04, total time=6.01]
[I 2025-06-07 23:29:17,175] Trial 307 finished with value: -0.003543872047178971 and parameters: {'learning_rate': 0.043678036817102424, 'sigma_multiplier': 0.7270371811584198, 'initialization_multiplier': 0.6274418657737105}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 307 final loss: -0.00354387
Trial 308:
  Learning Rate: 0.032094139531944225
  Sigma Multiplier: 0.5203280557083915
  Initialization Multiplier: 0.5059212138028972


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.54it/s, loss=-0.003507, elapsed time=0.04, total time=6.51]
[I 2025-06-07 23:29:23,725] Trial 308 finished with value: -0.0035074980650212917 and parameters: {'learning_rate': 0.032094139531944225, 'sigma_multiplier': 0.5203280557083915, 'initialization_multiplier': 0.5059212138028972}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 308 final loss: -0.00350750
Trial 309:
  Learning Rate: 0.03824293956972247
  Sigma Multiplier: 0.46112497152657106
  Initialization Multiplier: 0.6508411196935239


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.07it/s, loss=-0.003365, elapsed time=0.04, total time=6.64]
[I 2025-06-07 23:29:30,397] Trial 309 finished with value: -0.003364884074116705 and parameters: {'learning_rate': 0.03824293956972247, 'sigma_multiplier': 0.46112497152657106, 'initialization_multiplier': 0.6508411196935239}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 309 final loss: -0.00336488
Trial 310:
  Learning Rate: 0.02747082696095278
  Sigma Multiplier: 0.5696915279559904
  Initialization Multiplier: 0.6059133695701735


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.90it/s, loss=-0.003575, elapsed time=0.04, total time=6.41]
[I 2025-06-07 23:29:36,836] Trial 310 finished with value: -0.0035746854226795105 and parameters: {'learning_rate': 0.02747082696095278, 'sigma_multiplier': 0.5696915279559904, 'initialization_multiplier': 0.6059133695701735}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 310 final loss: -0.00357469
Trial 311:
  Learning Rate: 0.052565064014305794
  Sigma Multiplier: 0.6290476289431263
  Initialization Multiplier: 0.6976870179538103


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.56it/s, loss=-0.003856, elapsed time=0.04, total time=6.24]
[I 2025-06-07 23:29:43,111] Trial 311 finished with value: -0.0038562039873903512 and parameters: {'learning_rate': 0.052565064014305794, 'sigma_multiplier': 0.6290476289431263, 'initialization_multiplier': 0.6976870179538103}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 311 final loss: -0.00385620
Trial 312:
  Learning Rate: 0.013153946860912874
  Sigma Multiplier: 0.6842883197532194
  Initialization Multiplier: 0.6930575233084192


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.07it/s, loss=-0.003047, elapsed time=0.04, total time=6.11]
[I 2025-06-07 23:29:49,257] Trial 312 finished with value: -0.0030465415913867836 and parameters: {'learning_rate': 0.013153946860912874, 'sigma_multiplier': 0.6842883197532194, 'initialization_multiplier': 0.6930575233084192}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 312 final loss: -0.00304654
Trial 313:
  Learning Rate: 0.02056563038353073
  Sigma Multiplier: 0.6322023231943502
  Initialization Multiplier: 0.5493503517606284


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.64it/s, loss=-0.003681, elapsed time=0.04, total time=6.23]
[I 2025-06-07 23:29:55,523] Trial 313 finished with value: -0.0036814099474447577 and parameters: {'learning_rate': 0.02056563038353073, 'sigma_multiplier': 0.6322023231943502, 'initialization_multiplier': 0.5493503517606284}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 313 final loss: -0.00368141
Trial 314:
  Learning Rate: 0.055109889469545204
  Sigma Multiplier: 0.6040520027952379
  Initialization Multiplier: 0.6676031218187709


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.20it/s, loss=-0.003379, elapsed time=0.04, total time=6.34]
[I 2025-06-07 23:30:01,909] Trial 314 finished with value: -0.0033792123252947893 and parameters: {'learning_rate': 0.055109889469545204, 'sigma_multiplier': 0.6040520027952379, 'initialization_multiplier': 0.6676031218187709}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 314 final loss: -0.00337921
Trial 315:
  Learning Rate: 0.03440003665023205
  Sigma Multiplier: 0.6599803995267968
  Initialization Multiplier: 0.5965034343463634


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.81it/s, loss=-0.003372, elapsed time=0.04, total time=6.18]
[I 2025-06-07 23:30:08,126] Trial 315 finished with value: -0.0033723624158775263 and parameters: {'learning_rate': 0.03440003665023205, 'sigma_multiplier': 0.6599803995267968, 'initialization_multiplier': 0.5965034343463634}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 315 final loss: -0.00337236
Trial 316:
  Learning Rate: 0.0450114786913268
  Sigma Multiplier: 0.5520602154222646
  Initialization Multiplier: 0.6299523766699424


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.67it/s, loss=-0.003495, elapsed time=0.04, total time=6.48]
[I 2025-06-07 23:30:14,637] Trial 316 finished with value: -0.003495386799144592 and parameters: {'learning_rate': 0.0450114786913268, 'sigma_multiplier': 0.5520602154222646, 'initialization_multiplier': 0.6299523766699424}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 316 final loss: -0.00349539
Trial 317:
  Learning Rate: 0.02743590870796493
  Sigma Multiplier: 0.49116631143269485
  Initialization Multiplier: 0.4332850304174021


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.13it/s, loss=-0.003300, elapsed time=0.04, total time=6.62]
[I 2025-06-07 23:30:21,286] Trial 317 finished with value: -0.0032997408737089466 and parameters: {'learning_rate': 0.02743590870796493, 'sigma_multiplier': 0.49116631143269485, 'initialization_multiplier': 0.4332850304174021}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 317 final loss: -0.00329974
Trial 318:
  Learning Rate: 0.038711064356026274
  Sigma Multiplier: 0.5859105728638317
  Initialization Multiplier: 0.5694899193143701


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.01it/s, loss=-0.003862, elapsed time=0.04, total time=6.39]
[I 2025-06-07 23:30:27,707] Trial 318 finished with value: -0.003862211054776512 and parameters: {'learning_rate': 0.038711064356026274, 'sigma_multiplier': 0.5859105728638317, 'initialization_multiplier': 0.5694899193143701}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 318 final loss: -0.00386221
Trial 319:
  Learning Rate: 0.03987612073350714
  Sigma Multiplier: 0.5251315431105411
  Initialization Multiplier: 0.7208045115778006


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.46it/s, loss=-0.003758, elapsed time=0.04, total time=6.53]
[I 2025-06-07 23:30:34,272] Trial 319 finished with value: -0.003758357680229134 and parameters: {'learning_rate': 0.03987612073350714, 'sigma_multiplier': 0.5251315431105411, 'initialization_multiplier': 0.7208045115778006}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 319 final loss: -0.00375836
Trial 320:
  Learning Rate: 0.06940563683893353
  Sigma Multiplier: 0.5766187546123971
  Initialization Multiplier: 0.5772306887223523


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.02it/s, loss=-0.003562, elapsed time=0.04, total time=6.37]
[I 2025-06-07 23:30:40,681] Trial 320 finished with value: -0.003561805444284682 and parameters: {'learning_rate': 0.06940563683893353, 'sigma_multiplier': 0.5766187546123971, 'initialization_multiplier': 0.5772306887223523}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 320 final loss: -0.00356181
Trial 321:
  Learning Rate: 0.050774324558507014
  Sigma Multiplier: 0.10430427383254082
  Initialization Multiplier: 0.6605568075144704


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 21.90it/s, loss=-0.000014, elapsed time=0.04, total time=6.99]
[I 2025-06-07 23:30:47,706] Trial 321 finished with value: -1.4106489281535518e-05 and parameters: {'learning_rate': 0.050774324558507014, 'sigma_multiplier': 0.10430427383254082, 'initialization_multiplier': 0.6605568075144704}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 321 final loss: -0.00001411
Trial 322:
  Learning Rate: 0.03760460042328516
  Sigma Multiplier: 0.6352807238704881
  Initialization Multiplier: 0.6349214645735481


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.54it/s, loss=-0.003810, elapsed time=0.04, total time=6.25]
[I 2025-06-07 23:30:53,991] Trial 322 finished with value: -0.0038096164292751827 and parameters: {'learning_rate': 0.03760460042328516, 'sigma_multiplier': 0.6352807238704881, 'initialization_multiplier': 0.6349214645735481}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 322 final loss: -0.00380962
Trial 323:
  Learning Rate: 0.04503241083280134
  Sigma Multiplier: 1.896532552632441
  Initialization Multiplier: 0.6963209289034393


Training Progress: 100%|██████████| 150/150 [00:04<00:00, 31.78it/s, loss=-0.000815, elapsed time=0.03, total time=4.86]
[I 2025-06-07 23:30:58,889] Trial 323 finished with value: -0.0008153315130028848 and parameters: {'learning_rate': 0.04503241083280134, 'sigma_multiplier': 1.896532552632441, 'initialization_multiplier': 0.6963209289034393}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 323 final loss: -0.00081533
Trial 324:
  Learning Rate: 0.03739353259804973
  Sigma Multiplier: 0.7034127939789842
  Initialization Multiplier: 0.6213365256556693


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.11it/s, loss=-0.003643, elapsed time=0.04, total time=6.1] 
[I 2025-06-07 23:31:05,031] Trial 324 finished with value: -0.0036430123834354008 and parameters: {'learning_rate': 0.03739353259804973, 'sigma_multiplier': 0.7034127939789842, 'initialization_multiplier': 0.6213365256556693}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 324 final loss: -0.00364301
Trial 325:
  Learning Rate: 0.053648173236365934
  Sigma Multiplier: 1.2692261752257352
  Initialization Multiplier: 0.6467929137266836


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 29.90it/s, loss=-0.001428, elapsed time=0.03, total time=5.15]
[I 2025-06-07 23:31:10,222] Trial 325 finished with value: -0.001428252943661231 and parameters: {'learning_rate': 0.053648173236365934, 'sigma_multiplier': 1.2692261752257352, 'initialization_multiplier': 0.6467929137266836}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 325 final loss: -0.00142825
Trial 326:
  Learning Rate: 0.03993414816756762
  Sigma Multiplier: 0.6405502506658933
  Initialization Multiplier: 0.7324425243163292


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.51it/s, loss=-0.003749, elapsed time=0.04, total time=6.25]
[I 2025-06-07 23:31:16,505] Trial 326 finished with value: -0.0037487119224168574 and parameters: {'learning_rate': 0.03993414816756762, 'sigma_multiplier': 0.6405502506658933, 'initialization_multiplier': 0.7324425243163292}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 326 final loss: -0.00374871
Trial 327:
  Learning Rate: 8.498229512150512e-05
  Sigma Multiplier: 0.6694442998409333
  Initialization Multiplier: 0.6840251131059268


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.76it/s, loss=0.073192, elapsed time=0.04, total time=6.2] 
[I 2025-06-07 23:31:22,745] Trial 327 finished with value: 0.07319163021062221 and parameters: {'learning_rate': 8.498229512150512e-05, 'sigma_multiplier': 0.6694442998409333, 'initialization_multiplier': 0.6840251131059268}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 327 final loss: 0.07319163
Trial 328:
  Learning Rate: 0.034339996735702386
  Sigma Multiplier: 0.7493889773619106
  Initialization Multiplier: 0.6007770837430063


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.77it/s, loss=-0.003043, elapsed time=0.04, total time=5.96]
[I 2025-06-07 23:31:28,741] Trial 328 finished with value: -0.003042684546888017 and parameters: {'learning_rate': 0.034339996735702386, 'sigma_multiplier': 0.7493889773619106, 'initialization_multiplier': 0.6007770837430063}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 328 final loss: -0.00304268
Trial 329:
  Learning Rate: 0.061229241697637576
  Sigma Multiplier: 0.6247749983472419
  Initialization Multiplier: 0.6629965277829539


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.44it/s, loss=-0.003715, elapsed time=0.04, total time=6.54]
[I 2025-06-07 23:31:35,312] Trial 329 finished with value: -0.0037153913864529224 and parameters: {'learning_rate': 0.061229241697637576, 'sigma_multiplier': 0.6247749983472419, 'initialization_multiplier': 0.6629965277829539}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 329 final loss: -0.00371539
Trial 330:
  Learning Rate: 0.04798800834102191
  Sigma Multiplier: 0.53819795858014
  Initialization Multiplier: 0.5712008750444627


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.95it/s, loss=-0.003529, elapsed time=0.04, total time=6.67]
[I 2025-06-07 23:31:42,016] Trial 330 finished with value: -0.0035286805338448205 and parameters: {'learning_rate': 0.04798800834102191, 'sigma_multiplier': 0.53819795858014, 'initialization_multiplier': 0.5712008750444627}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 330 final loss: -0.00352868
Trial 331:
  Learning Rate: 0.002269046329932938
  Sigma Multiplier: 0.5698402626345307
  Initialization Multiplier: 0.6170342730191078


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.93it/s, loss=-0.002302, elapsed time=0.04, total time=6.41]
[I 2025-06-07 23:31:48,462] Trial 331 finished with value: -0.002301724758578694 and parameters: {'learning_rate': 0.002269046329932938, 'sigma_multiplier': 0.5698402626345307, 'initialization_multiplier': 0.6170342730191078}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 331 final loss: -0.00230172
Trial 332:
  Learning Rate: 0.04182067957222935
  Sigma Multiplier: 0.4732705729318754
  Initialization Multiplier: 0.7181462406100866


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.59it/s, loss=-0.003258, elapsed time=0.04, total time=6.78]
[I 2025-06-07 23:31:55,274] Trial 332 finished with value: -0.003257697675306278 and parameters: {'learning_rate': 0.04182067957222935, 'sigma_multiplier': 0.4732705729318754, 'initialization_multiplier': 0.7181462406100866}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 332 final loss: -0.00325770
Trial 333:
  Learning Rate: 0.03250389022394129
  Sigma Multiplier: 0.6160857306124374
  Initialization Multiplier: 0.503448099575187


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.14it/s, loss=-0.003298, elapsed time=0.04, total time=6.35]
[I 2025-06-07 23:32:01,658] Trial 333 finished with value: -0.0032979333256532048 and parameters: {'learning_rate': 0.03250389022394129, 'sigma_multiplier': 0.6160857306124374, 'initialization_multiplier': 0.503448099575187}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 333 final loss: -0.00329793
Trial 334:
  Learning Rate: 0.036469580866475004
  Sigma Multiplier: 0.5218051609909053
  Initialization Multiplier: 0.636205398327105


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.38it/s, loss=-0.003562, elapsed time=0.04, total time=6.56]
[I 2025-06-07 23:32:08,250] Trial 334 finished with value: -0.003561535123457067 and parameters: {'learning_rate': 0.036469580866475004, 'sigma_multiplier': 0.5218051609909053, 'initialization_multiplier': 0.636205398327105}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 334 final loss: -0.00356154
Trial 335:
  Learning Rate: 0.04390234422319505
  Sigma Multiplier: 0.6946742138192251
  Initialization Multiplier: 0.5845456611122196


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.10it/s, loss=-0.003417, elapsed time=0.04, total time=6.11]
[I 2025-06-07 23:32:14,398] Trial 335 finished with value: -0.0034166713543480305 and parameters: {'learning_rate': 0.04390234422319505, 'sigma_multiplier': 0.6946742138192251, 'initialization_multiplier': 0.5845456611122196}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 335 final loss: -0.00341667
Trial 336:
  Learning Rate: 0.029768665890903077
  Sigma Multiplier: 0.5672801638953436
  Initialization Multiplier: 0.7806977287607656


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.82it/s, loss=-0.003805, elapsed time=0.04, total time=6.42]
[I 2025-06-07 23:32:20,858] Trial 336 finished with value: -0.0038046952573252456 and parameters: {'learning_rate': 0.029768665890903077, 'sigma_multiplier': 0.5672801638953436, 'initialization_multiplier': 0.7806977287607656}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 336 final loss: -0.00380470
Trial 337:
  Learning Rate: 0.05706391692665526
  Sigma Multiplier: 0.6508735978618522
  Initialization Multiplier: 0.6736146695990746


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.69it/s, loss=-0.003607, elapsed time=0.04, total time=6.22]
[I 2025-06-07 23:32:27,112] Trial 337 finished with value: -0.0036074369681531374 and parameters: {'learning_rate': 0.05706391692665526, 'sigma_multiplier': 0.6508735978618522, 'initialization_multiplier': 0.6736146695990746}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 337 final loss: -0.00360744
Trial 338:
  Learning Rate: 0.07626828894237289
  Sigma Multiplier: 0.5978793725671759
  Initialization Multiplier: 0.5570560064462223


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.25it/s, loss=-0.003419, elapsed time=0.04, total time=6.32]
[I 2025-06-07 23:32:33,466] Trial 338 finished with value: -0.0034187470831305235 and parameters: {'learning_rate': 0.07626828894237289, 'sigma_multiplier': 0.5978793725671759, 'initialization_multiplier': 0.5570560064462223}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 338 final loss: -0.00341875
Trial 339:
  Learning Rate: 0.03541521219324624
  Sigma Multiplier: 0.5005597451379717
  Initialization Multiplier: 0.17766082952368734


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.14it/s, loss=-0.002417, elapsed time=0.04, total time=6.62]
[I 2025-06-07 23:32:40,125] Trial 339 finished with value: -0.0024171786715580412 and parameters: {'learning_rate': 0.03541521219324624, 'sigma_multiplier': 0.5005597451379717, 'initialization_multiplier': 0.17766082952368734}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 339 final loss: -0.00241718
Trial 340:
  Learning Rate: 0.04651295293214327
  Sigma Multiplier: 0.5513454476850979
  Initialization Multiplier: 0.7565583095063317


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.69it/s, loss=-0.003823, elapsed time=0.04, total time=6.47]
[I 2025-06-07 23:32:46,632] Trial 340 finished with value: -0.0038226548964074773 and parameters: {'learning_rate': 0.04651295293214327, 'sigma_multiplier': 0.5513454476850979, 'initialization_multiplier': 0.7565583095063317}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 340 final loss: -0.00382265
Trial 341:
  Learning Rate: 0.04892278656853795
  Sigma Multiplier: 0.5390943420447419
  Initialization Multiplier: 0.622414937759129


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.37it/s, loss=-0.003654, elapsed time=0.04, total time=6.56]
[I 2025-06-07 23:32:53,229] Trial 341 finished with value: -0.003654239097650334 and parameters: {'learning_rate': 0.04892278656853795, 'sigma_multiplier': 0.5390943420447419, 'initialization_multiplier': 0.622414937759129}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 341 final loss: -0.00365424
Trial 342:
  Learning Rate: 0.05555735707567845
  Sigma Multiplier: 0.45182585029733635
  Initialization Multiplier: 0.4979369225685594


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.89it/s, loss=-0.003401, elapsed time=0.04, total time=6.69]
[I 2025-06-07 23:32:59,953] Trial 342 finished with value: -0.003400743602482979 and parameters: {'learning_rate': 0.05555735707567845, 'sigma_multiplier': 0.45182585029733635, 'initialization_multiplier': 0.4979369225685594}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 342 final loss: -0.00340074
Trial 343:
  Learning Rate: 0.04382183750271383
  Sigma Multiplier: 0.6298671570872478
  Initialization Multiplier: 0.6903736927361928


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.05it/s, loss=-0.003534, elapsed time=0.04, total time=6.93]
[I 2025-06-07 23:33:06,920] Trial 343 finished with value: -0.003534260808357092 and parameters: {'learning_rate': 0.04382183750271383, 'sigma_multiplier': 0.6298671570872478, 'initialization_multiplier': 0.6903736927361928}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 343 final loss: -0.00353426
Trial 344:
  Learning Rate: 0.03952671483815539
  Sigma Multiplier: 0.6014479017547731
  Initialization Multiplier: 0.7641099404003483


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.13it/s, loss=-0.003658, elapsed time=0.04, total time=6.35]
[I 2025-06-07 23:33:13,305] Trial 344 finished with value: -0.0036579385378792753 and parameters: {'learning_rate': 0.03952671483815539, 'sigma_multiplier': 0.6014479017547731, 'initialization_multiplier': 0.7641099404003483}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 344 final loss: -0.00365794
Trial 345:
  Learning Rate: 0.06737039934684692
  Sigma Multiplier: 0.504202737717234
  Initialization Multiplier: 0.5944896230516447


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.26it/s, loss=-0.003309, elapsed time=0.04, total time=6.59]
[I 2025-06-07 23:33:19,927] Trial 345 finished with value: -0.003308718722702202 and parameters: {'learning_rate': 0.06737039934684692, 'sigma_multiplier': 0.504202737717234, 'initialization_multiplier': 0.5944896230516447}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 345 final loss: -0.00330872
Trial 346:
  Learning Rate: 0.0034470220947574177
  Sigma Multiplier: 0.5554775921697233
  Initialization Multiplier: 0.5399505699490101


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.77it/s, loss=-0.002422, elapsed time=0.04, total time=6.44]
[I 2025-06-07 23:33:26,406] Trial 346 finished with value: -0.0024223037901597816 and parameters: {'learning_rate': 0.0034470220947574177, 'sigma_multiplier': 0.5554775921697233, 'initialization_multiplier': 0.5399505699490101}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 346 final loss: -0.00242230
Trial 347:
  Learning Rate: 0.04870768363764184
  Sigma Multiplier: 0.4067991171426514
  Initialization Multiplier: 0.6423960357806386


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.73it/s, loss=-0.002451, elapsed time=0.04, total time=6.74]
[I 2025-06-07 23:33:33,177] Trial 347 finished with value: -0.0024514925855660483 and parameters: {'learning_rate': 0.04870768363764184, 'sigma_multiplier': 0.4067991171426514, 'initialization_multiplier': 0.6423960357806386}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 347 final loss: -0.00245149
Trial 348:
  Learning Rate: 0.032316358263879456
  Sigma Multiplier: 0.634084379238533
  Initialization Multiplier: 0.4559919602080606


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.53it/s, loss=-0.003499, elapsed time=0.04, total time=6.24]
[I 2025-06-07 23:33:39,455] Trial 348 finished with value: -0.0034985287135106337 and parameters: {'learning_rate': 0.032316358263879456, 'sigma_multiplier': 0.634084379238533, 'initialization_multiplier': 0.4559919602080606}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 348 final loss: -0.00349853
Trial 349:
  Learning Rate: 0.03946736274865925
  Sigma Multiplier: 0.8888871302493526
  Initialization Multiplier: 0.5612212101452122


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 27.18it/s, loss=-0.002582, elapsed time=0.03, total time=5.65]
[I 2025-06-07 23:33:45,137] Trial 349 finished with value: -0.002581745838997832 and parameters: {'learning_rate': 0.03946736274865925, 'sigma_multiplier': 0.8888871302493526, 'initialization_multiplier': 0.5612212101452122}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 349 final loss: -0.00258175
Trial 350:
  Learning Rate: 0.05772787368321272
  Sigma Multiplier: 0.6793325099689567
  Initialization Multiplier: 0.6612351795521777


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.01it/s, loss=-0.003420, elapsed time=0.04, total time=6.13]
[I 2025-06-07 23:33:51,308] Trial 350 finished with value: -0.0034197918319436196 and parameters: {'learning_rate': 0.05772787368321272, 'sigma_multiplier': 0.6793325099689567, 'initialization_multiplier': 0.6612351795521777}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 350 final loss: -0.00341979
Trial 351:
  Learning Rate: 0.04561612647411784
  Sigma Multiplier: 0.5874049507819469
  Initialization Multiplier: 0.6107166414652557


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.08it/s, loss=-0.003578, elapsed time=0.04, total time=6.36]
[I 2025-06-07 23:33:57,703] Trial 351 finished with value: -0.003577767558295559 and parameters: {'learning_rate': 0.04561612647411784, 'sigma_multiplier': 0.5874049507819469, 'initialization_multiplier': 0.6107166414652557}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 351 final loss: -0.00357777
Trial 352:
  Learning Rate: 0.031081905879788537
  Sigma Multiplier: 0.5260056360533212
  Initialization Multiplier: 0.7079995506778135


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.42it/s, loss=-0.003573, elapsed time=0.04, total time=6.54]
[I 2025-06-07 23:34:04,283] Trial 352 finished with value: -0.003573238394786146 and parameters: {'learning_rate': 0.031081905879788537, 'sigma_multiplier': 0.5260056360533212, 'initialization_multiplier': 0.7079995506778135}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 352 final loss: -0.00357324
Trial 353:
  Learning Rate: 0.037224720742648446
  Sigma Multiplier: 0.719994500089839
  Initialization Multiplier: 0.381592095080867


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.54it/s, loss=-0.003296, elapsed time=0.04, total time=6]   
[I 2025-06-07 23:34:10,319] Trial 353 finished with value: -0.0032955325333411897 and parameters: {'learning_rate': 0.037224720742648446, 'sigma_multiplier': 0.719994500089839, 'initialization_multiplier': 0.381592095080867}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 353 final loss: -0.00329553
Trial 354:
  Learning Rate: 0.028710088392662137
  Sigma Multiplier: 0.6044342309808692
  Initialization Multiplier: 0.5278715674925896


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.24it/s, loss=-0.003828, elapsed time=0.04, total time=6.32]
[I 2025-06-07 23:34:16,671] Trial 354 finished with value: -0.003828123841152575 and parameters: {'learning_rate': 0.028710088392662137, 'sigma_multiplier': 0.6044342309808692, 'initialization_multiplier': 0.5278715674925896}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 354 final loss: -0.00382812
Trial 355:
  Learning Rate: 0.025263800415056405
  Sigma Multiplier: 0.6570733457316569
  Initialization Multiplier: 0.5068278059059643


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.01it/s, loss=-0.003474, elapsed time=0.04, total time=6.13]
[I 2025-06-07 23:34:22,834] Trial 355 finished with value: -0.003473950881060051 and parameters: {'learning_rate': 0.025263800415056405, 'sigma_multiplier': 0.6570733457316569, 'initialization_multiplier': 0.5068278059059643}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 355 final loss: -0.00347395
Trial 356:
  Learning Rate: 0.017326668778628387
  Sigma Multiplier: 0.6172820966182162
  Initialization Multiplier: 0.4901901403468821


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.52it/s, loss=-0.003581, elapsed time=0.04, total time=6.25]
[I 2025-06-07 23:34:29,123] Trial 356 finished with value: -0.0035812970624092607 and parameters: {'learning_rate': 0.017326668778628387, 'sigma_multiplier': 0.6172820966182162, 'initialization_multiplier': 0.4901901403468821}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 356 final loss: -0.00358130
Trial 357:
  Learning Rate: 0.048812532704081806
  Sigma Multiplier: 0.4766203174326237
  Initialization Multiplier: 0.309842303836378


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.10it/s, loss=-0.003185, elapsed time=0.04, total time=6.62]
[I 2025-06-07 23:34:35,780] Trial 357 finished with value: -0.0031848388102777314 and parameters: {'learning_rate': 0.048812532704081806, 'sigma_multiplier': 0.4766203174326237, 'initialization_multiplier': 0.309842303836378}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 357 final loss: -0.00318484
Trial 358:
  Learning Rate: 0.06486731026502866
  Sigma Multiplier: 0.5513351998074927
  Initialization Multiplier: 0.05022821616531414


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.46it/s, loss=0.005634, elapsed time=0.04, total time=6.53]
[I 2025-06-07 23:34:42,347] Trial 358 finished with value: 0.00563375872124057 and parameters: {'learning_rate': 0.06486731026502866, 'sigma_multiplier': 0.5513351998074927, 'initialization_multiplier': 0.05022821616531414}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 358 final loss: 0.00563376
Trial 359:
  Learning Rate: 0.028264609484940566
  Sigma Multiplier: 0.6555353824660185
  Initialization Multiplier: 0.5422891667125009


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.90it/s, loss=-0.003567, elapsed time=0.04, total time=6.15]
[I 2025-06-07 23:34:48,539] Trial 359 finished with value: -0.003566865818881105 and parameters: {'learning_rate': 0.028264609484940566, 'sigma_multiplier': 0.6555353824660185, 'initialization_multiplier': 0.5422891667125009}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 359 final loss: -0.00356687
Trial 360:
  Learning Rate: 0.020209824319380967
  Sigma Multiplier: 0.6018158804769285
  Initialization Multiplier: 0.4541448045462555


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.31it/s, loss=-0.003429, elapsed time=0.04, total time=6.3] 
[I 2025-06-07 23:34:54,871] Trial 360 finished with value: -0.003429229379710279 and parameters: {'learning_rate': 0.020209824319380967, 'sigma_multiplier': 0.6018158804769285, 'initialization_multiplier': 0.4541448045462555}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 360 final loss: -0.00342923
Trial 361:
  Learning Rate: 0.04243611646827222
  Sigma Multiplier: 0.5522609801724985
  Initialization Multiplier: 0.5794801553526439


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.81it/s, loss=-0.003588, elapsed time=0.04, total time=6.43]
[I 2025-06-07 23:35:01,339] Trial 361 finished with value: -0.0035880351060387206 and parameters: {'learning_rate': 0.04243611646827222, 'sigma_multiplier': 0.5522609801724985, 'initialization_multiplier': 0.5794801553526439}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 361 final loss: -0.00358804
Trial 362:
  Learning Rate: 0.037355053333156266
  Sigma Multiplier: 0.4397106551970779
  Initialization Multiplier: 0.5323611532811666


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.79it/s, loss=-0.003146, elapsed time=0.04, total time=6.71]
[I 2025-06-07 23:35:08,086] Trial 362 finished with value: -0.003145877069031129 and parameters: {'learning_rate': 0.037355053333156266, 'sigma_multiplier': 0.4397106551970779, 'initialization_multiplier': 0.5323611532811666}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 362 final loss: -0.00314588
Trial 363:
  Learning Rate: 0.05300772540751584
  Sigma Multiplier: 0.6272153327737542
  Initialization Multiplier: 0.7493999654364453


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.67it/s, loss=-0.003673, elapsed time=0.04, total time=6.22]
[I 2025-06-07 23:35:14,343] Trial 363 finished with value: -0.003672582165577217 and parameters: {'learning_rate': 0.05300772540751584, 'sigma_multiplier': 0.6272153327737542, 'initialization_multiplier': 0.7493999654364453}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 363 final loss: -0.00367258
Trial 364:
  Learning Rate: 0.022759701128948363
  Sigma Multiplier: 0.5078558693144022
  Initialization Multiplier: 0.4147048214942285


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.41it/s, loss=-0.002745, elapsed time=0.04, total time=6.54]
[I 2025-06-07 23:35:20,921] Trial 364 finished with value: -0.002744611214662529 and parameters: {'learning_rate': 0.022759701128948363, 'sigma_multiplier': 0.5078558693144022, 'initialization_multiplier': 0.4147048214942285}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 364 final loss: -0.00274461
Trial 365:
  Learning Rate: 0.03072106736678969
  Sigma Multiplier: 0.5866950277750272
  Initialization Multiplier: 0.5761502377565679


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.28it/s, loss=-0.003376, elapsed time=0.04, total time=6.31]
[I 2025-06-07 23:35:27,270] Trial 365 finished with value: -0.003376440519819297 and parameters: {'learning_rate': 0.03072106736678969, 'sigma_multiplier': 0.5866950277750272, 'initialization_multiplier': 0.5761502377565679}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 365 final loss: -0.00337644
Trial 366:
  Learning Rate: 0.00038912442170349815
  Sigma Multiplier: 0.6888218748737602
  Initialization Multiplier: 0.5120593311133708


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.47it/s, loss=0.012628, elapsed time=0.04, total time=6.02]
[I 2025-06-07 23:35:33,324] Trial 366 finished with value: 0.012628061787967123 and parameters: {'learning_rate': 0.00038912442170349815, 'sigma_multiplier': 0.6888218748737602, 'initialization_multiplier': 0.5120593311133708}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 366 final loss: 0.01262806
Trial 367:
  Learning Rate: 0.014833215330036544
  Sigma Multiplier: 0.5520949685987067
  Initialization Multiplier: 0.6177799523787637


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.04it/s, loss=-0.003461, elapsed time=0.04, total time=6.38]
[I 2025-06-07 23:35:39,735] Trial 367 finished with value: -0.0034607937351136056 and parameters: {'learning_rate': 0.014833215330036544, 'sigma_multiplier': 0.5520949685987067, 'initialization_multiplier': 0.6177799523787637}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 367 final loss: -0.00346079
Trial 368:
  Learning Rate: 0.035559339112832045
  Sigma Multiplier: 0.6411113081775012
  Initialization Multiplier: 0.47251470904194076


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.90it/s, loss=-0.003191, elapsed time=0.04, total time=6.15]
[I 2025-06-07 23:35:45,925] Trial 368 finished with value: -0.003191243705435628 and parameters: {'learning_rate': 0.035559339112832045, 'sigma_multiplier': 0.6411113081775012, 'initialization_multiplier': 0.47251470904194076}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 368 final loss: -0.00319124
Trial 369:
  Learning Rate: 0.08028754663798175
  Sigma Multiplier: 0.5947398979087778
  Initialization Multiplier: 0.6779526331421758


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.41it/s, loss=-0.003440, elapsed time=0.04, total time=6.29]
[I 2025-06-07 23:35:52,246] Trial 369 finished with value: -0.003439550430923034 and parameters: {'learning_rate': 0.08028754663798175, 'sigma_multiplier': 0.5947398979087778, 'initialization_multiplier': 0.6779526331421758}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 369 final loss: -0.00343955
Trial 370:
  Learning Rate: 0.04174440543407421
  Sigma Multiplier: 0.5204534606921389
  Initialization Multiplier: 0.8038820283427298


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.77it/s, loss=-0.003549, elapsed time=0.04, total time=6.44]
[I 2025-06-07 23:35:58,727] Trial 370 finished with value: -0.0035492620837085345 and parameters: {'learning_rate': 0.04174440543407421, 'sigma_multiplier': 0.5204534606921389, 'initialization_multiplier': 0.8038820283427298}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 370 final loss: -0.00354926
Trial 371:
  Learning Rate: 0.00018063016230177948
  Sigma Multiplier: 0.570630632320128
  Initialization Multiplier: 0.5644309516562306


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.19it/s, loss=0.034589, elapsed time=0.04, total time=6.33]
[I 2025-06-07 23:36:05,097] Trial 371 finished with value: 0.0345892692237834 and parameters: {'learning_rate': 0.00018063016230177948, 'sigma_multiplier': 0.570630632320128, 'initialization_multiplier': 0.5644309516562306}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 371 final loss: 0.03458927
Trial 372:
  Learning Rate: 0.02718860285535143
  Sigma Multiplier: 0.48603702877586474
  Initialization Multiplier: 0.7104257831398334


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.59it/s, loss=-0.003705, elapsed time=0.04, total time=6.49]
[I 2025-06-07 23:36:11,621] Trial 372 finished with value: -0.003705324783723471 and parameters: {'learning_rate': 0.02718860285535143, 'sigma_multiplier': 0.48603702877586474, 'initialization_multiplier': 0.7104257831398334}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 372 final loss: -0.00370532
Trial 373:
  Learning Rate: 0.050620821335898396
  Sigma Multiplier: 0.6690701471882488
  Initialization Multiplier: 0.5901842850097333


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.19it/s, loss=-0.003798, elapsed time=0.04, total time=6.1] 
[I 2025-06-07 23:36:17,752] Trial 373 finished with value: -0.0037980450603225632 and parameters: {'learning_rate': 0.050620821335898396, 'sigma_multiplier': 0.6690701471882488, 'initialization_multiplier': 0.5901842850097333}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 373 final loss: -0.00379805
Trial 374:
  Learning Rate: 0.03307241438997085
  Sigma Multiplier: 0.3351369174989682
  Initialization Multiplier: 1.2921517529560413


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.26it/s, loss=0.011233, elapsed time=0.04, total time=6.88]
[I 2025-06-07 23:36:24,667] Trial 374 finished with value: 0.011232751627456865 and parameters: {'learning_rate': 0.03307241438997085, 'sigma_multiplier': 0.3351369174989682, 'initialization_multiplier': 1.2921517529560413}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 374 final loss: 0.01123275
Trial 375:
  Learning Rate: 0.06248658155935599
  Sigma Multiplier: 0.6233286662079074
  Initialization Multiplier: 0.6520054452795742


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.97it/s, loss=-0.003254, elapsed time=0.04, total time=6.14]
[I 2025-06-07 23:36:30,841] Trial 375 finished with value: -0.0032536879332023937 and parameters: {'learning_rate': 0.06248658155935599, 'sigma_multiplier': 0.6233286662079074, 'initialization_multiplier': 0.6520054452795742}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 375 final loss: -0.00325369
Trial 376:
  Learning Rate: 0.02355358862771398
  Sigma Multiplier: 0.724394372854964
  Initialization Multiplier: 1.0659809911648108


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.65it/s, loss=0.004720, elapsed time=0.04, total time=5.99]
[I 2025-06-07 23:36:36,866] Trial 376 finished with value: 0.004719817688176631 and parameters: {'learning_rate': 0.02355358862771398, 'sigma_multiplier': 0.724394372854964, 'initialization_multiplier': 1.0659809911648108}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 376 final loss: 0.00471982
Trial 377:
  Learning Rate: 0.0439700220466596
  Sigma Multiplier: 0.5329548673338973
  Initialization Multiplier: 0.5416215031608902


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.84it/s, loss=-0.003729, elapsed time=0.04, total time=6.42]
[I 2025-06-07 23:36:43,319] Trial 377 finished with value: -0.0037292660894549068 and parameters: {'learning_rate': 0.0439700220466596, 'sigma_multiplier': 0.5329548673338973, 'initialization_multiplier': 0.5416215031608902}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 377 final loss: -0.00372927
Trial 378:
  Learning Rate: 0.028724903026477726
  Sigma Multiplier: 0.6004169039241097
  Initialization Multiplier: 1.9495473123513012


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.39it/s, loss=0.080154, elapsed time=0.04, total time=6.56]
[I 2025-06-07 23:36:49,910] Trial 378 finished with value: 0.0801540033436402 and parameters: {'learning_rate': 0.028724903026477726, 'sigma_multiplier': 0.6004169039241097, 'initialization_multiplier': 1.9495473123513012}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 378 final loss: 0.08015400
Trial 379:
  Learning Rate: 0.001847565409458025
  Sigma Multiplier: 0.5705145033581116
  Initialization Multiplier: 0.6090614845352932


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.92it/s, loss=-0.002320, elapsed time=0.04, total time=6.4] 
[I 2025-06-07 23:36:56,348] Trial 379 finished with value: -0.002320286781425252 and parameters: {'learning_rate': 0.001847565409458025, 'sigma_multiplier': 0.5705145033581116, 'initialization_multiplier': 0.6090614845352932}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 379 final loss: -0.00232029
Trial 380:
  Learning Rate: 0.037110355382059444
  Sigma Multiplier: 0.6433245103698783
  Initialization Multiplier: 0.7479263701490931


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.98it/s, loss=-0.003634, elapsed time=0.04, total time=6.14]
[I 2025-06-07 23:37:02,527] Trial 380 finished with value: -0.0036339080321649447 and parameters: {'learning_rate': 0.037110355382059444, 'sigma_multiplier': 0.6433245103698783, 'initialization_multiplier': 0.7479263701490931}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 380 final loss: -0.00363391
Trial 381:
  Learning Rate: 0.018739501666693987
  Sigma Multiplier: 0.5400859804865624
  Initialization Multiplier: 0.9006609471187178


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.90it/s, loss=-0.003682, elapsed time=0.04, total time=6.41]
[I 2025-06-07 23:37:08,973] Trial 381 finished with value: -0.0036816340934389515 and parameters: {'learning_rate': 0.018739501666693987, 'sigma_multiplier': 0.5400859804865624, 'initialization_multiplier': 0.9006609471187178}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 381 final loss: -0.00368163
Trial 382:
  Learning Rate: 0.055550253976557634
  Sigma Multiplier: 0.6813818529669613
  Initialization Multiplier: 0.6752215031093238


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.41it/s, loss=-0.003383, elapsed time=0.04, total time=6.03]
[I 2025-06-07 23:37:15,039] Trial 382 finished with value: -0.0033831761211772942 and parameters: {'learning_rate': 0.055550253976557634, 'sigma_multiplier': 0.6813818529669613, 'initialization_multiplier': 0.6752215031093238}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 382 final loss: -0.00338318
Trial 383:
  Learning Rate: 0.03189881728173422
  Sigma Multiplier: 0.7933329143362539
  Initialization Multiplier: 0.48748110495282077


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 26.59it/s, loss=-0.003114, elapsed time=0.04, total time=5.77]
[I 2025-06-07 23:37:20,846] Trial 383 finished with value: -0.0031139014563363802 and parameters: {'learning_rate': 0.03189881728173422, 'sigma_multiplier': 0.7933329143362539, 'initialization_multiplier': 0.48748110495282077}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 383 final loss: -0.00311390
Trial 384:
  Learning Rate: 0.046805907783000957
  Sigma Multiplier: 0.6140425985795274
  Initialization Multiplier: 0.521339137284252


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.74it/s, loss=-0.003687, elapsed time=0.04, total time=6.2] 
[I 2025-06-07 23:37:27,084] Trial 384 finished with value: -0.0036868140882231823 and parameters: {'learning_rate': 0.046805907783000957, 'sigma_multiplier': 0.6140425985795274, 'initialization_multiplier': 0.521339137284252}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 384 final loss: -0.00368681
Trial 385:
  Learning Rate: 0.03934225583124355
  Sigma Multiplier: 0.5679837352049357
  Initialization Multiplier: 0.6323827345079674


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.29it/s, loss=-0.003646, elapsed time=0.04, total time=6.31]
[I 2025-06-07 23:37:33,424] Trial 385 finished with value: -0.003645999683894432 and parameters: {'learning_rate': 0.03934225583124355, 'sigma_multiplier': 0.5679837352049357, 'initialization_multiplier': 0.6323827345079674}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 385 final loss: -0.00364600
Trial 386:
  Learning Rate: 0.027473054951682657
  Sigma Multiplier: 0.4753841112678329
  Initialization Multiplier: 0.5748566260386699


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.48it/s, loss=-0.003418, elapsed time=0.04, total time=6.52]
[I 2025-06-07 23:37:39,979] Trial 386 finished with value: -0.0034175247519474426 and parameters: {'learning_rate': 0.027473054951682657, 'sigma_multiplier': 0.4753841112678329, 'initialization_multiplier': 0.5748566260386699}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 386 final loss: -0.00341752
Trial 387:
  Learning Rate: 0.02174639973612129
  Sigma Multiplier: 0.5170393190991827
  Initialization Multiplier: 0.7034081869299387


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.72it/s, loss=-0.003364, elapsed time=0.04, total time=6.46]
[I 2025-06-07 23:37:46,472] Trial 387 finished with value: -0.0033644938425269125 and parameters: {'learning_rate': 0.02174639973612129, 'sigma_multiplier': 0.5170393190991827, 'initialization_multiplier': 0.7034081869299387}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 387 final loss: -0.00336449
Trial 388:
  Learning Rate: 0.03339456517109725
  Sigma Multiplier: 0.6018947188529223
  Initialization Multiplier: 0.6488588066560211


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.65it/s, loss=-0.003702, elapsed time=0.04, total time=6.22]
[I 2025-06-07 23:37:52,722] Trial 388 finished with value: -0.0037024090692845558 and parameters: {'learning_rate': 0.03339456517109725, 'sigma_multiplier': 0.6018947188529223, 'initialization_multiplier': 0.6488588066560211}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 388 final loss: -0.00370241
Trial 389:
  Learning Rate: 0.06819500918094075
  Sigma Multiplier: 0.6338148773090307
  Initialization Multiplier: 0.5966856738593485


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.61it/s, loss=-0.003632, elapsed time=0.04, total time=6.23]
[I 2025-06-07 23:37:58,985] Trial 389 finished with value: -0.003631528207330398 and parameters: {'learning_rate': 0.06819500918094075, 'sigma_multiplier': 0.6338148773090307, 'initialization_multiplier': 0.5966856738593485}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 389 final loss: -0.00363153
Trial 390:
  Learning Rate: 0.04340676250185233
  Sigma Multiplier: 0.5555441099695473
  Initialization Multiplier: 0.5425564536148807


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.99it/s, loss=-0.003621, elapsed time=0.04, total time=6.4] 
[I 2025-06-07 23:38:05,423] Trial 390 finished with value: -0.0036205863967185598 and parameters: {'learning_rate': 0.04340676250185233, 'sigma_multiplier': 0.5555441099695473, 'initialization_multiplier': 0.5425564536148807}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 390 final loss: -0.00362059
Trial 391:
  Learning Rate: 0.02615627098614994
  Sigma Multiplier: 0.4199737848692039
  Initialization Multiplier: 0.7874656961921987


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.00it/s, loss=-0.002945, elapsed time=0.04, total time=6.65]
[I 2025-06-07 23:38:12,113] Trial 391 finished with value: -0.002944997024551063 and parameters: {'learning_rate': 0.02615627098614994, 'sigma_multiplier': 0.4199737848692039, 'initialization_multiplier': 0.7874656961921987}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 391 final loss: -0.00294500
Trial 392:
  Learning Rate: 0.036679305117273776
  Sigma Multiplier: 0.7078664420313389
  Initialization Multiplier: 1.6312480168309391


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.73it/s, loss=0.091660, elapsed time=0.04, total time=6.19]
[I 2025-06-07 23:38:18,338] Trial 392 finished with value: 0.09165953456176675 and parameters: {'learning_rate': 0.036679305117273776, 'sigma_multiplier': 0.7078664420313389, 'initialization_multiplier': 1.6312480168309391}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 392 final loss: 0.09165953
Trial 393:
  Learning Rate: 0.0058090023704543
  Sigma Multiplier: 0.586110353099774
  Initialization Multiplier: 0.8316689659037069


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.18it/s, loss=-0.002825, elapsed time=0.04, total time=6.33]
[I 2025-06-07 23:38:24,706] Trial 393 finished with value: -0.002824549591128007 and parameters: {'learning_rate': 0.0058090023704543, 'sigma_multiplier': 0.586110353099774, 'initialization_multiplier': 0.8316689659037069}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 393 final loss: -0.00282455
Trial 394:
  Learning Rate: 0.0990688795389669
  Sigma Multiplier: 0.6649507179648599
  Initialization Multiplier: 0.4600940932283194


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.07it/s, loss=-0.002870, elapsed time=0.04, total time=6.11]
[I 2025-06-07 23:38:30,853] Trial 394 finished with value: -0.002870312478886005 and parameters: {'learning_rate': 0.0990688795389669, 'sigma_multiplier': 0.6649507179648599, 'initialization_multiplier': 0.4600940932283194}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 394 final loss: -0.00287031
Trial 395:
  Learning Rate: 0.05451413759223866
  Sigma Multiplier: 1.0055073617770633
  Initialization Multiplier: 0.7473265322587802


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 28.29it/s, loss=-0.002354, elapsed time=0.03, total time=5.43]
[I 2025-06-07 23:38:36,321] Trial 395 finished with value: -0.002353698640521943 and parameters: {'learning_rate': 0.05451413759223866, 'sigma_multiplier': 1.0055073617770633, 'initialization_multiplier': 0.7473265322587802}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 395 final loss: -0.00235370
Trial 396:
  Learning Rate: 0.030660055705558072
  Sigma Multiplier: 0.4987974750627463
  Initialization Multiplier: 0.6096925306011302


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.45it/s, loss=-0.003414, elapsed time=0.04, total time=6.53]
[I 2025-06-07 23:38:42,888] Trial 396 finished with value: -0.003414404275516903 and parameters: {'learning_rate': 0.030660055705558072, 'sigma_multiplier': 0.4987974750627463, 'initialization_multiplier': 0.6096925306011302}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 396 final loss: -0.00341440
Trial 397:
  Learning Rate: 0.047001161570405625
  Sigma Multiplier: 0.6055893084573121
  Initialization Multiplier: 0.6801394454244512


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.49it/s, loss=-0.003794, elapsed time=0.04, total time=6.26]
[I 2025-06-07 23:38:49,182] Trial 397 finished with value: -0.003794489845595929 and parameters: {'learning_rate': 0.047001161570405625, 'sigma_multiplier': 0.6055893084573121, 'initialization_multiplier': 0.6801394454244512}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 397 final loss: -0.00379449
Trial 398:
  Learning Rate: 0.021118320361461763
  Sigma Multiplier: 0.5409752536340349
  Initialization Multiplier: 1.1313932410774212


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.24it/s, loss=-0.001742, elapsed time=0.04, total time=6.59]
[I 2025-06-07 23:38:55,808] Trial 398 finished with value: -0.0017422144271349409 and parameters: {'learning_rate': 0.021118320361461763, 'sigma_multiplier': 0.5409752536340349, 'initialization_multiplier': 1.1313932410774212}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 398 final loss: -0.00174221
Trial 399:
  Learning Rate: 0.03659864892178284
  Sigma Multiplier: 0.237194911859005
  Initialization Multiplier: 0.5719985237589511


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.20it/s, loss=-0.000189, elapsed time=0.04, total time=6.89]
[I 2025-06-07 23:39:02,735] Trial 399 finished with value: -0.0001891529305442382 and parameters: {'learning_rate': 0.03659864892178284, 'sigma_multiplier': 0.237194911859005, 'initialization_multiplier': 0.5719985237589511}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 399 final loss: -0.00018915
Trial 400:
  Learning Rate: 0.025647969118274425
  Sigma Multiplier: 0.6406353605920374
  Initialization Multiplier: 0.5128114663710569


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.63it/s, loss=-0.003267, elapsed time=0.04, total time=6.22]
[I 2025-06-07 23:39:08,989] Trial 400 finished with value: -0.0032669013241695697 and parameters: {'learning_rate': 0.025647969118274425, 'sigma_multiplier': 0.6406353605920374, 'initialization_multiplier': 0.5128114663710569}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 400 final loss: -0.00326690
Trial 401:
  Learning Rate: 0.04103903880791812
  Sigma Multiplier: 0.757154753083228
  Initialization Multiplier: 0.6432205914822098


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.82it/s, loss=-0.003268, elapsed time=0.04, total time=5.94]
[I 2025-06-07 23:39:14,968] Trial 401 finished with value: -0.003268491465494575 and parameters: {'learning_rate': 0.04103903880791812, 'sigma_multiplier': 0.757154753083228, 'initialization_multiplier': 0.6432205914822098}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 401 final loss: -0.00326849
Trial 402:
  Learning Rate: 0.017414843964247983
  Sigma Multiplier: 0.46167791136660075
  Initialization Multiplier: 0.7216362429527657


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.03it/s, loss=-0.003375, elapsed time=0.04, total time=6.64]
[I 2025-06-07 23:39:21,648] Trial 402 finished with value: -0.0033746191552074286 and parameters: {'learning_rate': 0.017414843964247983, 'sigma_multiplier': 0.46167791136660075, 'initialization_multiplier': 0.7216362429527657}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 402 final loss: -0.00337462
Trial 403:
  Learning Rate: 0.05753181233697406
  Sigma Multiplier: 0.554548200980549
  Initialization Multiplier: 0.5508162335743366


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.79it/s, loss=-0.003673, elapsed time=0.04, total time=6.45]
[I 2025-06-07 23:39:28,129] Trial 403 finished with value: -0.003672845514175138 and parameters: {'learning_rate': 0.05753181233697406, 'sigma_multiplier': 0.554548200980549, 'initialization_multiplier': 0.5508162335743366}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 403 final loss: -0.00367285
Trial 404:
  Learning Rate: 0.0012588672200071914
  Sigma Multiplier: 0.506923086726429
  Initialization Multiplier: 0.4137873953665937


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.36it/s, loss=-0.000882, elapsed time=0.04, total time=6.56]
[I 2025-06-07 23:39:34,721] Trial 404 finished with value: -0.0008821121854609845 and parameters: {'learning_rate': 0.0012588672200071914, 'sigma_multiplier': 0.506923086726429, 'initialization_multiplier': 0.4137873953665937}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 404 final loss: -0.00088211
Trial 405:
  Learning Rate: 0.031306664186008325
  Sigma Multiplier: 0.5937687441154109
  Initialization Multiplier: 0.6176282091668001


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.07it/s, loss=-0.003920, elapsed time=0.04, total time=6.36]
[I 2025-06-07 23:39:41,118] Trial 405 finished with value: -0.0039202097454962195 and parameters: {'learning_rate': 0.031306664186008325, 'sigma_multiplier': 0.5937687441154109, 'initialization_multiplier': 0.6176282091668001}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 405 final loss: -0.00392021
Trial 406:
  Learning Rate: 0.030074479196014896
  Sigma Multiplier: 0.5849524553999162
  Initialization Multiplier: 0.5924689558528243


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.98it/s, loss=-0.003666, elapsed time=0.04, total time=6.39]
[I 2025-06-07 23:39:47,547] Trial 406 finished with value: -0.003666279231989664 and parameters: {'learning_rate': 0.030074479196014896, 'sigma_multiplier': 0.5849524553999162, 'initialization_multiplier': 0.5924689558528243}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 406 final loss: -0.00366628
Trial 407:
  Learning Rate: 0.0230281494089363
  Sigma Multiplier: 0.5721687096160105
  Initialization Multiplier: 0.4763261717470071


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.86it/s, loss=-0.003327, elapsed time=0.04, total time=6.42]
[I 2025-06-07 23:39:54,003] Trial 407 finished with value: -0.003327013888467233 and parameters: {'learning_rate': 0.0230281494089363, 'sigma_multiplier': 0.5721687096160105, 'initialization_multiplier': 0.4763261717470071}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 407 final loss: -0.00332701
Trial 408:
  Learning Rate: 0.0292695587954653
  Sigma Multiplier: 0.5126178247107597
  Initialization Multiplier: 0.6715821989189329


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.13it/s, loss=-0.003822, elapsed time=0.04, total time=6.62]
[I 2025-06-07 23:40:00,661] Trial 408 finished with value: -0.003821570616174293 and parameters: {'learning_rate': 0.0292695587954653, 'sigma_multiplier': 0.5126178247107597, 'initialization_multiplier': 0.6715821989189329}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 408 final loss: -0.00382157
Trial 409:
  Learning Rate: 0.04830004058882376
  Sigma Multiplier: 0.6025218247251108
  Initialization Multiplier: 0.3478608772312718


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.02it/s, loss=-0.003367, elapsed time=0.04, total time=6.37]
[I 2025-06-07 23:40:07,066] Trial 409 finished with value: -0.00336732588485762 and parameters: {'learning_rate': 0.04830004058882376, 'sigma_multiplier': 0.6025218247251108, 'initialization_multiplier': 0.3478608772312718}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 409 final loss: -0.00336733
Trial 410:
  Learning Rate: 0.013435322026008903
  Sigma Multiplier: 0.5378318187995654
  Initialization Multiplier: 0.5290523859833621


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.36it/s, loss=-0.003039, elapsed time=0.04, total time=6.56]
[I 2025-06-07 23:40:13,659] Trial 410 finished with value: -0.0030385640600909605 and parameters: {'learning_rate': 0.013435322026008903, 'sigma_multiplier': 0.5378318187995654, 'initialization_multiplier': 0.5290523859833621}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 410 final loss: -0.00303856
Trial 411:
  Learning Rate: 0.02498691529495545
  Sigma Multiplier: 0.6719671852013351
  Initialization Multiplier: 0.6159658497704481


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.81it/s, loss=-0.003831, elapsed time=0.04, total time=6.18]
[I 2025-06-07 23:40:19,870] Trial 411 finished with value: -0.0038312542416609717 and parameters: {'learning_rate': 0.02498691529495545, 'sigma_multiplier': 0.6719671852013351, 'initialization_multiplier': 0.6159658497704481}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 411 final loss: -0.00383125
Trial 412:
  Learning Rate: 0.01916837980404159
  Sigma Multiplier: 0.6975278895966164
  Initialization Multiplier: 0.6102759231648653


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.60it/s, loss=-0.003373, elapsed time=0.04, total time=6.76]
[I 2025-06-07 23:40:26,670] Trial 412 finished with value: -0.003372652721825138 and parameters: {'learning_rate': 0.01916837980404159, 'sigma_multiplier': 0.6975278895966164, 'initialization_multiplier': 0.6102759231648653}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 412 final loss: -0.00337265
Trial 413:
  Learning Rate: 0.024278314573988786
  Sigma Multiplier: 0.650759773899034
  Initialization Multiplier: 0.5662585603237039


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.45it/s, loss=-0.003466, elapsed time=0.04, total time=6.26]
[I 2025-06-07 23:40:32,971] Trial 413 finished with value: -0.0034663007261996823 and parameters: {'learning_rate': 0.024278314573988786, 'sigma_multiplier': 0.650759773899034, 'initialization_multiplier': 0.5662585603237039}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 413 final loss: -0.00346630
Trial 414:
  Learning Rate: 0.024848000514488308
  Sigma Multiplier: 0.6749348942670381
  Initialization Multiplier: 0.6260441445391137


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.66it/s, loss=-0.003503, elapsed time=0.04, total time=6.22]
[I 2025-06-07 23:40:39,223] Trial 414 finished with value: -0.0035031768048819465 and parameters: {'learning_rate': 0.024848000514488308, 'sigma_multiplier': 0.6749348942670381, 'initialization_multiplier': 0.6260441445391137}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 414 final loss: -0.00350318
Trial 415:
  Learning Rate: 0.0195765175056314
  Sigma Multiplier: 0.6180020490625845
  Initialization Multiplier: 0.5821129144826857


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.05it/s, loss=-0.003581, elapsed time=0.04, total time=6.37]
[I 2025-06-07 23:40:45,634] Trial 415 finished with value: -0.003580749846322658 and parameters: {'learning_rate': 0.0195765175056314, 'sigma_multiplier': 0.6180020490625845, 'initialization_multiplier': 0.5821129144826857}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 415 final loss: -0.00358075
Trial 416:
  Learning Rate: 0.01611908719807041
  Sigma Multiplier: 0.3759458773247888
  Initialization Multiplier: 0.5063489545831158


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.38it/s, loss=-0.002133, elapsed time=0.04, total time=6.84]
[I 2025-06-07 23:40:52,509] Trial 416 finished with value: -0.0021330298277457173 and parameters: {'learning_rate': 0.01611908719807041, 'sigma_multiplier': 0.3759458773247888, 'initialization_multiplier': 0.5063489545831158}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 416 final loss: -0.00213303
Trial 417:
  Learning Rate: 0.02813763689593831
  Sigma Multiplier: 1.636607494611873
  Initialization Multiplier: 0.6563920144656575


Training Progress: 100%|██████████| 150/150 [00:04<00:00, 30.47it/s, loss=-0.001075, elapsed time=0.03, total time=5.06]
[I 2025-06-07 23:40:57,607] Trial 417 finished with value: -0.0010747230900724786 and parameters: {'learning_rate': 0.02813763689593831, 'sigma_multiplier': 1.636607494611873, 'initialization_multiplier': 0.6563920144656575}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 417 final loss: -0.00107472
Trial 418:
  Learning Rate: 0.022442180622658604
  Sigma Multiplier: 0.6024740635717891
  Initialization Multiplier: 0.5353817742540172


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.68it/s, loss=-0.003699, elapsed time=0.04, total time=6.47]
[I 2025-06-07 23:41:04,117] Trial 418 finished with value: -0.003698886205280673 and parameters: {'learning_rate': 0.022442180622658604, 'sigma_multiplier': 0.6024740635717891, 'initialization_multiplier': 0.5353817742540172}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 418 final loss: -0.00369889
Trial 419:
  Learning Rate: 0.03180960380125253
  Sigma Multiplier: 0.6604237114940376
  Initialization Multiplier: 0.5966902899223512


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.60it/s, loss=-0.003643, elapsed time=0.04, total time=6.23]
[I 2025-06-07 23:41:10,386] Trial 419 finished with value: -0.0036429547160565018 and parameters: {'learning_rate': 0.03180960380125253, 'sigma_multiplier': 0.6604237114940376, 'initialization_multiplier': 0.5966902899223512}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 419 final loss: -0.00364295
Trial 420:
  Learning Rate: 0.034298707735293504
  Sigma Multiplier: 0.5798748725435526
  Initialization Multiplier: 0.6193816736615213


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.89it/s, loss=-0.003850, elapsed time=0.04, total time=6.42]
[I 2025-06-07 23:41:16,839] Trial 420 finished with value: -0.0038497769979003857 and parameters: {'learning_rate': 0.034298707735293504, 'sigma_multiplier': 0.5798748725435526, 'initialization_multiplier': 0.6193816736615213}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 420 final loss: -0.00384978
Trial 421:
  Learning Rate: 0.03530669137949512
  Sigma Multiplier: 0.5720792689084668
  Initialization Multiplier: 0.23368200727654115


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.77it/s, loss=-0.002871, elapsed time=0.04, total time=6.46]
[I 2025-06-07 23:41:23,343] Trial 421 finished with value: -0.0028714275746323753 and parameters: {'learning_rate': 0.03530669137949512, 'sigma_multiplier': 0.5720792689084668, 'initialization_multiplier': 0.23368200727654115}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 421 final loss: -0.00287143
Trial 422:
  Learning Rate: 0.03464050658900017
  Sigma Multiplier: 0.49177197382630733
  Initialization Multiplier: 0.6894490945474586


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.97it/s, loss=-0.003499, elapsed time=0.04, total time=6.66]
[I 2025-06-07 23:41:30,038] Trial 422 finished with value: -0.003498723750176227 and parameters: {'learning_rate': 0.03464050658900017, 'sigma_multiplier': 0.49177197382630733, 'initialization_multiplier': 0.6894490945474586}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 422 final loss: -0.00349872
Trial 423:
  Learning Rate: 0.03825354235198914
  Sigma Multiplier: 0.6320413290003869
  Initialization Multiplier: 0.6248408953875332


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.27it/s, loss=-0.003466, elapsed time=0.04, total time=6.31]
[I 2025-06-07 23:41:36,383] Trial 423 finished with value: -0.00346566497108218 and parameters: {'learning_rate': 0.03825354235198914, 'sigma_multiplier': 0.6320413290003869, 'initialization_multiplier': 0.6248408953875332}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 423 final loss: -0.00346566
Trial 424:
  Learning Rate: 0.0028169480867722653
  Sigma Multiplier: 0.5717387407822637
  Initialization Multiplier: 0.6710905731371538


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.65it/s, loss=-0.002502, elapsed time=0.04, total time=6.48]
[I 2025-06-07 23:41:42,900] Trial 424 finished with value: -0.0025022844062518885 and parameters: {'learning_rate': 0.0028169480867722653, 'sigma_multiplier': 0.5717387407822637, 'initialization_multiplier': 0.6710905731371538}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 424 final loss: -0.00250228
Trial 425:
  Learning Rate: 0.025525960556192755
  Sigma Multiplier: 0.5307940008448677
  Initialization Multiplier: 0.6413075295520634


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.29it/s, loss=-0.003786, elapsed time=0.04, total time=6.58]
[I 2025-06-07 23:41:49,517] Trial 425 finished with value: -0.003785517066825831 and parameters: {'learning_rate': 0.025525960556192755, 'sigma_multiplier': 0.5307940008448677, 'initialization_multiplier': 0.6413075295520634}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 425 final loss: -0.00378552
Trial 426:
  Learning Rate: 0.04168245577944027
  Sigma Multiplier: 0.62101123720432
  Initialization Multiplier: 0.6073505898281855


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.14it/s, loss=-0.003465, elapsed time=0.04, total time=6.35]
[I 2025-06-07 23:41:55,908] Trial 426 finished with value: -0.0034648434963937005 and parameters: {'learning_rate': 0.04168245577944027, 'sigma_multiplier': 0.62101123720432, 'initialization_multiplier': 0.6073505898281855}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 426 final loss: -0.00346484
Trial 427:
  Learning Rate: 0.03339980273630772
  Sigma Multiplier: 0.7170934073554209
  Initialization Multiplier: 0.7126534478032678


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.08it/s, loss=-0.003282, elapsed time=0.04, total time=6.11]
[I 2025-06-07 23:42:02,056] Trial 427 finished with value: -0.003282377241167737 and parameters: {'learning_rate': 0.03339980273630772, 'sigma_multiplier': 0.7170934073554209, 'initialization_multiplier': 0.7126534478032678}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 427 final loss: -0.00328238
Trial 428:
  Learning Rate: 0.07430317853843349
  Sigma Multiplier: 0.5726841664340333
  Initialization Multiplier: 0.5657148659120946


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.67it/s, loss=-0.003636, elapsed time=0.04, total time=6.46]
[I 2025-06-07 23:42:08,557] Trial 428 finished with value: -0.0036360367820525447 and parameters: {'learning_rate': 0.07430317853843349, 'sigma_multiplier': 0.5726841664340333, 'initialization_multiplier': 0.5657148659120946}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 428 final loss: -0.00363604
Trial 429:
  Learning Rate: 0.010207717560596623
  Sigma Multiplier: 0.44371342879374703
  Initialization Multiplier: 0.6575428162280321


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.60it/s, loss=-0.002952, elapsed time=0.04, total time=6.77]
[I 2025-06-07 23:42:15,361] Trial 429 finished with value: -0.0029524163121292793 and parameters: {'learning_rate': 0.010207717560596623, 'sigma_multiplier': 0.44371342879374703, 'initialization_multiplier': 0.6575428162280321}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 429 final loss: -0.00295242
Trial 430:
  Learning Rate: 0.030620323620068512
  Sigma Multiplier: 0.650482798822164
  Initialization Multiplier: 0.6083855195136647


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.53it/s, loss=-0.003398, elapsed time=0.04, total time=6.26]
[I 2025-06-07 23:42:21,655] Trial 430 finished with value: -0.003398092181979043 and parameters: {'learning_rate': 0.030620323620068512, 'sigma_multiplier': 0.650482798822164, 'initialization_multiplier': 0.6083855195136647}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 430 final loss: -0.00339809
Trial 431:
  Learning Rate: 0.021025367917125275
  Sigma Multiplier: 0.5250182977498067
  Initialization Multiplier: 0.6857633809629679


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.29it/s, loss=-0.003801, elapsed time=0.04, total time=6.58]
[I 2025-06-07 23:42:28,267] Trial 431 finished with value: -0.0038012281899476898 and parameters: {'learning_rate': 0.021025367917125275, 'sigma_multiplier': 0.5250182977498067, 'initialization_multiplier': 0.6857633809629679}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 431 final loss: -0.00380123
Trial 432:
  Learning Rate: 0.03973870000874703
  Sigma Multiplier: 0.5997584878591878
  Initialization Multiplier: 0.5673957631275994


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.12it/s, loss=-0.003877, elapsed time=0.04, total time=6.36]
[I 2025-06-07 23:42:34,662] Trial 432 finished with value: -0.003877097797179124 and parameters: {'learning_rate': 0.03973870000874703, 'sigma_multiplier': 0.5997584878591878, 'initialization_multiplier': 0.5673957631275994}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 432 final loss: -0.00387710
Trial 433:
  Learning Rate: 0.051937835762758996
  Sigma Multiplier: 0.6763068829617195
  Initialization Multiplier: 0.5685235612802447


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.78it/s, loss=-0.003635, elapsed time=0.04, total time=6.19]
[I 2025-06-07 23:42:40,891] Trial 433 finished with value: -0.0036352607459175235 and parameters: {'learning_rate': 0.051937835762758996, 'sigma_multiplier': 0.6763068829617195, 'initialization_multiplier': 0.5685235612802447}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 433 final loss: -0.00363526
Trial 434:
  Learning Rate: 0.04526351972881273
  Sigma Multiplier: 0.5851215356082727
  Initialization Multiplier: 0.47613602789957066


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.88it/s, loss=-0.003659, elapsed time=0.04, total time=6.41]
[I 2025-06-07 23:42:47,339] Trial 434 finished with value: -0.0036588601234525094 and parameters: {'learning_rate': 0.04526351972881273, 'sigma_multiplier': 0.5851215356082727, 'initialization_multiplier': 0.47613602789957066}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 434 final loss: -0.00365886
Trial 435:
  Learning Rate: 0.06122003530078437
  Sigma Multiplier: 0.4934787216972482
  Initialization Multiplier: 0.544960382282118


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.07it/s, loss=-0.003222, elapsed time=0.04, total time=6.64]
[I 2025-06-07 23:42:54,014] Trial 435 finished with value: -0.0032220375198881625 and parameters: {'learning_rate': 0.06122003530078437, 'sigma_multiplier': 0.4934787216972482, 'initialization_multiplier': 0.544960382282118}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 435 final loss: -0.00322204
Trial 436:
  Learning Rate: 0.04059462307918289
  Sigma Multiplier: 1.1538148225387979
  Initialization Multiplier: 0.43860536474812173


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 29.04it/s, loss=-0.001819, elapsed time=0.03, total time=5.3] 
[I 2025-06-07 23:42:59,352] Trial 436 finished with value: -0.0018193128928872394 and parameters: {'learning_rate': 0.04059462307918289, 'sigma_multiplier': 1.1538148225387979, 'initialization_multiplier': 0.43860536474812173}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 436 final loss: -0.00181931
Trial 437:
  Learning Rate: 0.04011744866907409
  Sigma Multiplier: 0.6280690286517707
  Initialization Multiplier: 0.6309704971404134


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.26it/s, loss=-0.003857, elapsed time=0.04, total time=6.32]
[I 2025-06-07 23:43:05,710] Trial 437 finished with value: -0.003856702660407319 and parameters: {'learning_rate': 0.04011744866907409, 'sigma_multiplier': 0.6280690286517707, 'initialization_multiplier': 0.6309704971404134}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 437 final loss: -0.00385670
Trial 438:
  Learning Rate: 0.03262760651914713
  Sigma Multiplier: 0.688429702061424
  Initialization Multiplier: 0.6329191731381716


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.02it/s, loss=-0.003211, elapsed time=0.04, total time=6.14]
[I 2025-06-07 23:43:11,893] Trial 438 finished with value: -0.0032107591061830707 and parameters: {'learning_rate': 0.03262760651914713, 'sigma_multiplier': 0.688429702061424, 'initialization_multiplier': 0.6329191731381716}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 438 final loss: -0.00321076
Trial 439:
  Learning Rate: 0.03960264704251686
  Sigma Multiplier: 0.7432654726363909
  Initialization Multiplier: 0.5743604017188062


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.43it/s, loss=-0.003531, elapsed time=0.04, total time=6.03]
[I 2025-06-07 23:43:17,960] Trial 439 finished with value: -0.003530556697374933 and parameters: {'learning_rate': 0.03960264704251686, 'sigma_multiplier': 0.7432654726363909, 'initialization_multiplier': 0.5743604017188062}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 439 final loss: -0.00353056
Trial 440:
  Learning Rate: 0.051195477839053254
  Sigma Multiplier: 0.6403111911340599
  Initialization Multiplier: 0.6490273852706039


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.40it/s, loss=-0.003506, elapsed time=0.04, total time=6.29]
[I 2025-06-07 23:43:24,285] Trial 440 finished with value: -0.0035063761367897144 and parameters: {'learning_rate': 0.051195477839053254, 'sigma_multiplier': 0.6403111911340599, 'initialization_multiplier': 0.6490273852706039}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 440 final loss: -0.00350638
Trial 441:
  Learning Rate: 0.00014415408449304533
  Sigma Multiplier: 0.6163369781198513
  Initialization Multiplier: 0.6070550313186


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.13it/s, loss=0.049347, elapsed time=0.04, total time=6.35]
[I 2025-06-07 23:43:30,670] Trial 441 finished with value: 0.04934741774908251 and parameters: {'learning_rate': 0.00014415408449304533, 'sigma_multiplier': 0.6163369781198513, 'initialization_multiplier': 0.6070550313186}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 441 final loss: 0.04934742
Trial 442:
  Learning Rate: 0.04289136984071218
  Sigma Multiplier: 0.5526505012324668
  Initialization Multiplier: 0.4965156488646172


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.67it/s, loss=-0.003567, elapsed time=0.06, total time=6.47]
[I 2025-06-07 23:43:37,183] Trial 442 finished with value: -0.00356714001957238 and parameters: {'learning_rate': 0.04289136984071218, 'sigma_multiplier': 0.5526505012324668, 'initialization_multiplier': 0.4965156488646172}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 442 final loss: -0.00356714
Trial 443:
  Learning Rate: 0.034159441385878384
  Sigma Multiplier: 0.5915331686170612
  Initialization Multiplier: 0.5581520112215022


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.08it/s, loss=-0.003617, elapsed time=0.04, total time=6.35]
[I 2025-06-07 23:43:43,577] Trial 443 finished with value: -0.003617254319273921 and parameters: {'learning_rate': 0.034159441385878384, 'sigma_multiplier': 0.5915331686170612, 'initialization_multiplier': 0.5581520112215022}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 443 final loss: -0.00361725
Trial 444:
  Learning Rate: 0.059973450769935724
  Sigma Multiplier: 0.662915691656407
  Initialization Multiplier: 0.5914104033548447


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.73it/s, loss=-0.003426, elapsed time=0.04, total time=6.2] 
[I 2025-06-07 23:43:49,809] Trial 444 finished with value: -0.0034262021027990316 and parameters: {'learning_rate': 0.059973450769935724, 'sigma_multiplier': 0.662915691656407, 'initialization_multiplier': 0.5914104033548447}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 444 final loss: -0.00342620
Trial 445:
  Learning Rate: 0.037552825014447316
  Sigma Multiplier: 0.5371473524690716
  Initialization Multiplier: 0.655592177175052


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.50it/s, loss=-0.003668, elapsed time=0.04, total time=6.52]
[I 2025-06-07 23:43:56,365] Trial 445 finished with value: -0.003667961110872108 and parameters: {'learning_rate': 0.037552825014447316, 'sigma_multiplier': 0.5371473524690716, 'initialization_multiplier': 0.655592177175052}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 445 final loss: -0.00366796
Trial 446:
  Learning Rate: 0.0288692986368816
  Sigma Multiplier: 0.47082261557171884
  Initialization Multiplier: 0.5183863812813085


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.73it/s, loss=-0.002938, elapsed time=0.04, total time=6.73]
[I 2025-06-07 23:44:03,127] Trial 446 finished with value: -0.002938308852396432 and parameters: {'learning_rate': 0.0288692986368816, 'sigma_multiplier': 0.47082261557171884, 'initialization_multiplier': 0.5183863812813085}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 446 final loss: -0.00293831
Trial 447:
  Learning Rate: 0.045855300504850434
  Sigma Multiplier: 0.6209422956997146
  Initialization Multiplier: 0.6909600296941394


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.25it/s, loss=-0.003534, elapsed time=0.04, total time=6.32]
[I 2025-06-07 23:44:09,486] Trial 447 finished with value: -0.003533847596106653 and parameters: {'learning_rate': 0.045855300504850434, 'sigma_multiplier': 0.6209422956997146, 'initialization_multiplier': 0.6909600296941394}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 447 final loss: -0.00353385
Trial 448:
  Learning Rate: 0.033322958864237584
  Sigma Multiplier: 0.5701184553349021
  Initialization Multiplier: 0.6250834053535631


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.74it/s, loss=-0.003567, elapsed time=0.04, total time=6.45]
[I 2025-06-07 23:44:15,969] Trial 448 finished with value: -0.003566845487561308 and parameters: {'learning_rate': 0.033322958864237584, 'sigma_multiplier': 0.5701184553349021, 'initialization_multiplier': 0.6250834053535631}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 448 final loss: -0.00356685
Trial 449:
  Learning Rate: 0.025970581238059946
  Sigma Multiplier: 0.706250322005039
  Initialization Multiplier: 0.5623802803678168


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.21it/s, loss=-0.003691, elapsed time=0.04, total time=6.08]
[I 2025-06-07 23:44:22,084] Trial 449 finished with value: -0.0036909502103990707 and parameters: {'learning_rate': 0.025970581238059946, 'sigma_multiplier': 0.706250322005039, 'initialization_multiplier': 0.5623802803678168}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 449 final loss: -0.00369095
Trial 450:
  Learning Rate: 0.004356052396476532
  Sigma Multiplier: 1.4137051295293896
  Initialization Multiplier: 0.6004529431762867


Training Progress: 100%|██████████| 150/150 [00:04<00:00, 30.04it/s, loss=-0.001389, elapsed time=0.03, total time=5.13]
[I 2025-06-07 23:44:27,254] Trial 450 finished with value: -0.0013886192199199514 and parameters: {'learning_rate': 0.004356052396476532, 'sigma_multiplier': 1.4137051295293896, 'initialization_multiplier': 0.6004529431762867}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 450 final loss: -0.00138862
Trial 451:
  Learning Rate: 0.04917794425931553
  Sigma Multiplier: 0.6331745239785727
  Initialization Multiplier: 0.6601861307662817


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.36it/s, loss=-0.003705, elapsed time=0.04, total time=6.3] 
[I 2025-06-07 23:44:33,595] Trial 451 finished with value: -0.00370487193322953 and parameters: {'learning_rate': 0.04917794425931553, 'sigma_multiplier': 0.6331745239785727, 'initialization_multiplier': 0.6601861307662817}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 451 final loss: -0.00370487
Trial 452:
  Learning Rate: 0.03961610169100088
  Sigma Multiplier: 0.5408100761169294
  Initialization Multiplier: 0.5205430031126099


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.50it/s, loss=-0.003398, elapsed time=0.04, total time=6.51]
[I 2025-06-07 23:44:40,143] Trial 452 finished with value: -0.003398353278539582 and parameters: {'learning_rate': 0.03961610169100088, 'sigma_multiplier': 0.5408100761169294, 'initialization_multiplier': 0.5205430031126099}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 452 final loss: -0.00339835
Trial 453:
  Learning Rate: 0.07314777184474508
  Sigma Multiplier: 0.5937881313948481
  Initialization Multiplier: 0.7043918251483074


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.13it/s, loss=-0.003632, elapsed time=0.04, total time=6.35]
[I 2025-06-07 23:44:46,534] Trial 453 finished with value: -0.003631964315355562 and parameters: {'learning_rate': 0.07314777184474508, 'sigma_multiplier': 0.5937881313948481, 'initialization_multiplier': 0.7043918251483074}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 453 final loss: -0.00363196
Trial 454:
  Learning Rate: 0.030591578503164945
  Sigma Multiplier: 0.5013372973713663
  Initialization Multiplier: 0.6261707644646929


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.22it/s, loss=-0.003859, elapsed time=0.04, total time=6.6] 
[I 2025-06-07 23:44:53,170] Trial 454 finished with value: -0.0038591127805880604 and parameters: {'learning_rate': 0.030591578503164945, 'sigma_multiplier': 0.5013372973713663, 'initialization_multiplier': 0.6261707644646929}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 454 final loss: -0.00385911
Trial 455:
  Learning Rate: 0.037219057993905735
  Sigma Multiplier: 0.42373977407531277
  Initialization Multiplier: 0.5631644391354846


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.63it/s, loss=-0.003031, elapsed time=0.04, total time=6.76]
[I 2025-06-07 23:44:59,965] Trial 455 finished with value: -0.0030309522395316145 and parameters: {'learning_rate': 0.037219057993905735, 'sigma_multiplier': 0.42373977407531277, 'initialization_multiplier': 0.5631644391354846}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 455 final loss: -0.00303095
Trial 456:
  Learning Rate: 0.030759821025078658
  Sigma Multiplier: 0.502554186875566
  Initialization Multiplier: 0.6375613354878377


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.64it/s, loss=-0.003629, elapsed time=0.04, total time=6.77]
[I 2025-06-07 23:45:06,767] Trial 456 finished with value: -0.0036292868800676904 and parameters: {'learning_rate': 0.030759821025078658, 'sigma_multiplier': 0.502554186875566, 'initialization_multiplier': 0.6375613354878377}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 456 final loss: -0.00362929
Trial 457:
  Learning Rate: 0.05372811263300048
  Sigma Multiplier: 0.4487192129794162
  Initialization Multiplier: 0.6737872387390943


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.55it/s, loss=-0.003068, elapsed time=0.04, total time=6.79]
[I 2025-06-07 23:45:13,596] Trial 457 finished with value: -0.003067956107712673 and parameters: {'learning_rate': 0.05372811263300048, 'sigma_multiplier': 0.4487192129794162, 'initialization_multiplier': 0.6737872387390943}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 457 final loss: -0.00306796
Trial 458:
  Learning Rate: 0.04273419574934222
  Sigma Multiplier: 0.4812923008825759
  Initialization Multiplier: 0.4803111821891013


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.73it/s, loss=-0.003576, elapsed time=0.04, total time=6.74]
[I 2025-06-07 23:45:20,373] Trial 458 finished with value: -0.0035758879141987664 and parameters: {'learning_rate': 0.04273419574934222, 'sigma_multiplier': 0.4812923008825759, 'initialization_multiplier': 0.4803111821891013}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 458 final loss: -0.00357589
Trial 459:
  Learning Rate: 0.03455165078555502
  Sigma Multiplier: 0.5222108665392958
  Initialization Multiplier: 0.7110747199689129


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.27it/s, loss=-0.003667, elapsed time=0.04, total time=6.58]
[I 2025-06-07 23:45:26,989] Trial 459 finished with value: -0.003666964909210788 and parameters: {'learning_rate': 0.03455165078555502, 'sigma_multiplier': 0.5222108665392958, 'initialization_multiplier': 0.7110747199689129}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 459 final loss: -0.00366696
Trial 460:
  Learning Rate: 0.046928895651342346
  Sigma Multiplier: 0.5544807944693148
  Initialization Multiplier: 0.587577320868243


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.65it/s, loss=-0.003440, elapsed time=0.04, total time=6.48]
[I 2025-06-07 23:45:33,504] Trial 460 finished with value: -0.0034400986347083535 and parameters: {'learning_rate': 0.046928895651342346, 'sigma_multiplier': 0.5544807944693148, 'initialization_multiplier': 0.587577320868243}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 460 final loss: -0.00344010
Trial 461:
  Learning Rate: 0.0297865075382513
  Sigma Multiplier: 0.5020320271614369
  Initialization Multiplier: 0.5403950954988757


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.17it/s, loss=-0.003691, elapsed time=0.04, total time=6.61]
[I 2025-06-07 23:45:40,154] Trial 461 finished with value: -0.0036911621761005647 and parameters: {'learning_rate': 0.0297865075382513, 'sigma_multiplier': 0.5020320271614369, 'initialization_multiplier': 0.5403950954988757}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 461 final loss: -0.00369116
Trial 462:
  Learning Rate: 0.05944131590098991
  Sigma Multiplier: 0.45370185721810563
  Initialization Multiplier: 0.6421802921549112


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.82it/s, loss=-0.003184, elapsed time=0.04, total time=6.71]
[I 2025-06-07 23:45:46,903] Trial 462 finished with value: -0.0031844590470525543 and parameters: {'learning_rate': 0.05944131590098991, 'sigma_multiplier': 0.45370185721810563, 'initialization_multiplier': 0.6421802921549112}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 462 final loss: -0.00318446
Trial 463:
  Learning Rate: 0.03940389887507579
  Sigma Multiplier: 0.5634457910080304
  Initialization Multiplier: 0.5915078458806006


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.66it/s, loss=-0.003475, elapsed time=0.04, total time=6.49]
[I 2025-06-07 23:45:53,424] Trial 463 finished with value: -0.0034748156292457514 and parameters: {'learning_rate': 0.03940389887507579, 'sigma_multiplier': 0.5634457910080304, 'initialization_multiplier': 0.5915078458806006}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 463 final loss: -0.00347482
Trial 464:
  Learning Rate: 0.03362589515424718
  Sigma Multiplier: 0.3968255833396895
  Initialization Multiplier: 0.5163519599656454


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.44it/s, loss=-0.002218, elapsed time=0.05, total time=6.83]
[I 2025-06-07 23:46:00,286] Trial 464 finished with value: -0.002217864146895131 and parameters: {'learning_rate': 0.03362589515424718, 'sigma_multiplier': 0.3968255833396895, 'initialization_multiplier': 0.5163519599656454}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 464 final loss: -0.00221786
Trial 465:
  Learning Rate: 0.04354683003146311
  Sigma Multiplier: 0.5200229496834531
  Initialization Multiplier: 0.3843433361803922


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.28it/s, loss=-0.003603, elapsed time=0.04, total time=6.58]
[I 2025-06-07 23:46:06,903] Trial 465 finished with value: -0.0036030550324485174 and parameters: {'learning_rate': 0.04354683003146311, 'sigma_multiplier': 0.5200229496834531, 'initialization_multiplier': 0.3843433361803922}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 465 final loss: -0.00360306
Trial 466:
  Learning Rate: 0.02854047917149735
  Sigma Multiplier: 0.28900509898604665
  Initialization Multiplier: 0.4427966298559524


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.19it/s, loss=-0.001252, elapsed time=0.04, total time=6.9] 
[I 2025-06-07 23:46:13,837] Trial 466 finished with value: -0.0012524435824592102 and parameters: {'learning_rate': 0.02854047917149735, 'sigma_multiplier': 0.28900509898604665, 'initialization_multiplier': 0.4427966298559524}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 466 final loss: -0.00125244
Trial 467:
  Learning Rate: 0.053777529879401514
  Sigma Multiplier: 0.5916869946423584
  Initialization Multiplier: 0.6232162554701448


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.91it/s, loss=-0.003598, elapsed time=0.04, total time=6.41]
[I 2025-06-07 23:46:20,288] Trial 467 finished with value: -0.0035982133117471544 and parameters: {'learning_rate': 0.053777529879401514, 'sigma_multiplier': 0.5916869946423584, 'initialization_multiplier': 0.6232162554701448}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 467 final loss: -0.00359821
Trial 468:
  Learning Rate: 5.322821026695004e-05
  Sigma Multiplier: 0.5515854141472324
  Initialization Multiplier: 0.674467646472826


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.31it/s, loss=0.064738, elapsed time=0.04, total time=6.57]
[I 2025-06-07 23:46:26,895] Trial 468 finished with value: 0.0647380547864425 and parameters: {'learning_rate': 5.322821026695004e-05, 'sigma_multiplier': 0.5515854141472324, 'initialization_multiplier': 0.674467646472826}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 468 final loss: 0.06473805
Trial 469:
  Learning Rate: 0.000669191676949257
  Sigma Multiplier: 0.6177658505357575
  Initialization Multiplier: 0.5557403503127826


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.18it/s, loss=0.007735, elapsed time=0.04, total time=6.34]
[I 2025-06-07 23:46:33,272] Trial 469 finished with value: 0.007734970384910632 and parameters: {'learning_rate': 0.000669191676949257, 'sigma_multiplier': 0.6177658505357575, 'initialization_multiplier': 0.5557403503127826}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 469 final loss: 0.00773497
Trial 470:
  Learning Rate: 0.036497079131305066
  Sigma Multiplier: 0.49453336365369027
  Initialization Multiplier: 0.7328493058649349


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.02it/s, loss=-0.003455, elapsed time=0.04, total time=6.65]
[I 2025-06-07 23:46:39,960] Trial 470 finished with value: -0.003454751246691837 and parameters: {'learning_rate': 0.036497079131305066, 'sigma_multiplier': 0.49453336365369027, 'initialization_multiplier': 0.7328493058649349}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 470 final loss: -0.00345475
Trial 471:
  Learning Rate: 0.022077761979286927
  Sigma Multiplier: 0.5752308551291492
  Initialization Multiplier: 0.5891227407663111


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.70it/s, loss=-0.003785, elapsed time=0.04, total time=6.47]
[I 2025-06-07 23:46:46,461] Trial 471 finished with value: -0.0037852203037205836 and parameters: {'learning_rate': 0.022077761979286927, 'sigma_multiplier': 0.5752308551291492, 'initialization_multiplier': 0.5891227407663111}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 471 final loss: -0.00378522
Trial 472:
  Learning Rate: 0.01731510302644321
  Sigma Multiplier: 0.5310081412781134
  Initialization Multiplier: 0.6360398328411292


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.34it/s, loss=-0.003556, elapsed time=0.04, total time=6.56]
[I 2025-06-07 23:46:53,060] Trial 472 finished with value: -0.0035561090038261985 and parameters: {'learning_rate': 0.01731510302644321, 'sigma_multiplier': 0.5310081412781134, 'initialization_multiplier': 0.6360398328411292}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 472 final loss: -0.00355611
Trial 473:
  Learning Rate: 0.06346006622202105
  Sigma Multiplier: 0.6479581848735193
  Initialization Multiplier: 0.11456964937115044


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.22it/s, loss=-0.002852, elapsed time=0.04, total time=6.33]
[I 2025-06-07 23:46:59,428] Trial 473 finished with value: -0.002851673506921123 and parameters: {'learning_rate': 0.06346006622202105, 'sigma_multiplier': 0.6479581848735193, 'initialization_multiplier': 0.11456964937115044}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 473 final loss: -0.00285167
Trial 474:
  Learning Rate: 0.047663886198772216
  Sigma Multiplier: 0.6066684212896776
  Initialization Multiplier: 0.4860520713885359


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.87it/s, loss=-0.003305, elapsed time=0.04, total time=6.42]
[I 2025-06-07 23:47:05,882] Trial 474 finished with value: -0.0033045240247751686 and parameters: {'learning_rate': 0.047663886198772216, 'sigma_multiplier': 0.6066684212896776, 'initialization_multiplier': 0.4860520713885359}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 474 final loss: -0.00330452
Trial 475:
  Learning Rate: 0.026717521414613515
  Sigma Multiplier: 0.4766179556099739
  Initialization Multiplier: 0.6927046599226208


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.78it/s, loss=-0.003738, elapsed time=0.04, total time=6.73]
[I 2025-06-07 23:47:12,646] Trial 475 finished with value: -0.0037379925280542955 and parameters: {'learning_rate': 0.026717521414613515, 'sigma_multiplier': 0.4766179556099739, 'initialization_multiplier': 0.6927046599226208}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 475 final loss: -0.00373799
Trial 476:
  Learning Rate: 0.032013552632765147
  Sigma Multiplier: 0.5795528982775295
  Initialization Multiplier: 0.5374607903304263


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.69it/s, loss=-0.003836, elapsed time=0.04, total time=6.47]
[I 2025-06-07 23:47:19,150] Trial 476 finished with value: -0.0038364942569379308 and parameters: {'learning_rate': 0.032013552632765147, 'sigma_multiplier': 0.5795528982775295, 'initialization_multiplier': 0.5374607903304263}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 476 final loss: -0.00383649
Trial 477:
  Learning Rate: 0.08507750137347625
  Sigma Multiplier: 0.5561162330020163
  Initialization Multiplier: 0.5204995749734954


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.47it/s, loss=-0.003239, elapsed time=0.04, total time=6.53]
[I 2025-06-07 23:47:25,720] Trial 477 finished with value: -0.003238792174455624 and parameters: {'learning_rate': 0.08507750137347625, 'sigma_multiplier': 0.5561162330020163, 'initialization_multiplier': 0.5204995749734954}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 477 final loss: -0.00323879
Trial 478:
  Learning Rate: 0.031303351961515465
  Sigma Multiplier: 0.8402002021054391
  Initialization Multiplier: 0.6036864408416069


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 26.51it/s, loss=-0.003011, elapsed time=0.04, total time=5.79]
[I 2025-06-07 23:47:31,549] Trial 478 finished with value: -0.0030108781520725813 and parameters: {'learning_rate': 0.031303351961515465, 'sigma_multiplier': 0.8402002021054391, 'initialization_multiplier': 0.6036864408416069}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 478 final loss: -0.00301088
Trial 479:
  Learning Rate: 0.02306192243231866
  Sigma Multiplier: 0.5192687979122883
  Initialization Multiplier: 0.4386315598895204


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.24it/s, loss=-0.003422, elapsed time=0.04, total time=6.58]
[I 2025-06-07 23:47:38,171] Trial 479 finished with value: -0.003421986363936452 and parameters: {'learning_rate': 0.02306192243231866, 'sigma_multiplier': 0.5192687979122883, 'initialization_multiplier': 0.4386315598895204}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 479 final loss: -0.00342199
Trial 480:
  Learning Rate: 0.040586098015255236
  Sigma Multiplier: 0.6245152769405802
  Initialization Multiplier: 0.5464544749300637


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.21it/s, loss=-0.003406, elapsed time=0.04, total time=6.33]
[I 2025-06-07 23:47:44,542] Trial 480 finished with value: -0.003405636516373078 and parameters: {'learning_rate': 0.040586098015255236, 'sigma_multiplier': 0.6245152769405802, 'initialization_multiplier': 0.5464544749300637}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 480 final loss: -0.00340564
Trial 481:
  Learning Rate: 0.036181738256401814
  Sigma Multiplier: 0.5831215625532655
  Initialization Multiplier: 0.6546662890129364


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.76it/s, loss=-0.003969, elapsed time=0.04, total time=6.45]
[I 2025-06-07 23:47:51,029] Trial 481 finished with value: -0.0039689300495020135 and parameters: {'learning_rate': 0.036181738256401814, 'sigma_multiplier': 0.5831215625532655, 'initialization_multiplier': 0.6546662890129364}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 481 final loss: -0.00396893
Trial 482:
  Learning Rate: 0.051101397304472794
  Sigma Multiplier: 0.6604735816487911
  Initialization Multiplier: 0.7236464006599475


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.49it/s, loss=-0.003575, elapsed time=0.04, total time=6.26]
[I 2025-06-07 23:47:57,325] Trial 482 finished with value: -0.003575491410914083 and parameters: {'learning_rate': 0.051101397304472794, 'sigma_multiplier': 0.6604735816487911, 'initialization_multiplier': 0.7236464006599475}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 482 final loss: -0.00357549
Trial 483:
  Learning Rate: 0.04193096748488796
  Sigma Multiplier: 0.4421653384315055
  Initialization Multiplier: 0.6704023079455226


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.69it/s, loss=-0.003115, elapsed time=0.04, total time=6.75]
[I 2025-06-07 23:48:04,114] Trial 483 finished with value: -0.0031153006021075944 and parameters: {'learning_rate': 0.04193096748488796, 'sigma_multiplier': 0.4421653384315055, 'initialization_multiplier': 0.6704023079455226}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 483 final loss: -0.00311530
Trial 484:
  Learning Rate: 0.0078053526834038
  Sigma Multiplier: 0.607597543759424
  Initialization Multiplier: 0.6598242197666682


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.17it/s, loss=-0.002964, elapsed time=0.04, total time=6.34]
[I 2025-06-07 23:48:10,486] Trial 484 finished with value: -0.0029636972592415292 and parameters: {'learning_rate': 0.0078053526834038, 'sigma_multiplier': 0.607597543759424, 'initialization_multiplier': 0.6598242197666682}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 484 final loss: -0.00296370
Trial 485:
  Learning Rate: 0.0369538907158928
  Sigma Multiplier: 0.5408148665309944
  Initialization Multiplier: 0.7042690228300655


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.48it/s, loss=-0.003724, elapsed time=0.04, total time=6.52]
[I 2025-06-07 23:48:17,047] Trial 485 finished with value: -0.0037238016102266727 and parameters: {'learning_rate': 0.0369538907158928, 'sigma_multiplier': 0.5408148665309944, 'initialization_multiplier': 0.7042690228300655}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 485 final loss: -0.00372380
Trial 486:
  Learning Rate: 0.014255415443058084
  Sigma Multiplier: 0.6955382535385188
  Initialization Multiplier: 0.6327919655560965


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.07it/s, loss=-0.003239, elapsed time=0.04, total time=6.12]
[I 2025-06-07 23:48:23,203] Trial 486 finished with value: -0.003239460304562096 and parameters: {'learning_rate': 0.014255415443058084, 'sigma_multiplier': 0.6955382535385188, 'initialization_multiplier': 0.6327919655560965}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 486 final loss: -0.00323946
Trial 487:
  Learning Rate: 0.04628035382873634
  Sigma Multiplier: 0.5779888579117238
  Initialization Multiplier: 0.7361551948885225


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.82it/s, loss=-0.003626, elapsed time=0.04, total time=6.43]
[I 2025-06-07 23:48:29,673] Trial 487 finished with value: -0.003625821957557175 and parameters: {'learning_rate': 0.04628035382873634, 'sigma_multiplier': 0.5779888579117238, 'initialization_multiplier': 0.7361551948885225}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 487 final loss: -0.00362582
Trial 488:
  Learning Rate: 0.0697374286368887
  Sigma Multiplier: 0.6380073647088577
  Initialization Multiplier: 0.6697549758606692


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.52it/s, loss=-0.003377, elapsed time=0.04, total time=6.25]
[I 2025-06-07 23:48:35,963] Trial 488 finished with value: -0.0033769419782483027 and parameters: {'learning_rate': 0.0697374286368887, 'sigma_multiplier': 0.6380073647088577, 'initialization_multiplier': 0.6697549758606692}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 488 final loss: -0.00337694
Trial 489:
  Learning Rate: 0.027571433486133452
  Sigma Multiplier: 0.5095414349201797
  Initialization Multiplier: 0.6120050159947484


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.36it/s, loss=-0.003897, elapsed time=0.04, total time=6.56]
[I 2025-06-07 23:48:42,561] Trial 489 finished with value: -0.0038969279558223927 and parameters: {'learning_rate': 0.027571433486133452, 'sigma_multiplier': 0.5095414349201797, 'initialization_multiplier': 0.6120050159947484}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 489 final loss: -0.00389693
Trial 490:
  Learning Rate: 0.03661503686868907
  Sigma Multiplier: 0.48720136975613587
  Initialization Multiplier: 0.6123661928872021


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.07it/s, loss=-0.003715, elapsed time=0.04, total time=6.63]
[I 2025-06-07 23:48:49,226] Trial 490 finished with value: -0.003714685931680351 and parameters: {'learning_rate': 0.03661503686868907, 'sigma_multiplier': 0.48720136975613587, 'initialization_multiplier': 0.6123661928872021}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 490 final loss: -0.00371469
Trial 491:
  Learning Rate: 0.027111928072967743
  Sigma Multiplier: 0.43268352308327407
  Initialization Multiplier: 0.6479527547941782


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.77it/s, loss=-0.003066, elapsed time=0.04, total time=6.72]
[I 2025-06-07 23:48:55,987] Trial 491 finished with value: -0.0030658473070775145 and parameters: {'learning_rate': 0.027111928072967743, 'sigma_multiplier': 0.43268352308327407, 'initialization_multiplier': 0.6479527547941782}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 491 final loss: -0.00306585
Trial 492:
  Learning Rate: 0.05657078197641226
  Sigma Multiplier: 0.4747956859418148
  Initialization Multiplier: 0.6967237602637657


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.90it/s, loss=-0.003226, elapsed time=0.04, total time=6.69]
[I 2025-06-07 23:49:02,713] Trial 492 finished with value: -0.0032264436907251956 and parameters: {'learning_rate': 0.05657078197641226, 'sigma_multiplier': 0.4747956859418148, 'initialization_multiplier': 0.6967237602637657}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 492 final loss: -0.00322644
Trial 493:
  Learning Rate: 0.04294214871866099
  Sigma Multiplier: 0.5292236445106979
  Initialization Multiplier: 0.6218313706923269


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 20.80it/s, loss=-0.003594, elapsed time=0.04, total time=7.34]
[I 2025-06-07 23:49:10,088] Trial 493 finished with value: -0.0035938668180728853 and parameters: {'learning_rate': 0.04294214871866099, 'sigma_multiplier': 0.5292236445106979, 'initialization_multiplier': 0.6218313706923269}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 493 final loss: -0.00359387
Trial 494:
  Learning Rate: 0.03352597623316843
  Sigma Multiplier: 0.507556937590688
  Initialization Multiplier: 0.6697064140840959


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.14it/s, loss=-0.003692, elapsed time=0.04, total time=6.63]
[I 2025-06-07 23:49:16,750] Trial 494 finished with value: -0.0036915878144019375 and parameters: {'learning_rate': 0.03352597623316843, 'sigma_multiplier': 0.507556937590688, 'initialization_multiplier': 0.6697064140840959}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 494 final loss: -0.00369159
Trial 495:
  Learning Rate: 0.01930246510602194
  Sigma Multiplier: 0.4704424789199808
  Initialization Multiplier: 0.7523993905667833


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.00it/s, loss=-0.003411, elapsed time=0.04, total time=6.65]
[I 2025-06-07 23:49:23,436] Trial 495 finished with value: -0.0034111110285258224 and parameters: {'learning_rate': 0.01930246510602194, 'sigma_multiplier': 0.4704424789199808, 'initialization_multiplier': 0.7523993905667833}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 495 final loss: -0.00341111
Trial 496:
  Learning Rate: 0.029627939716614888
  Sigma Multiplier: 0.40453211361941943
  Initialization Multiplier: 0.5785591000462372


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.65it/s, loss=-0.002820, elapsed time=0.04, total time=6.76]
[I 2025-06-07 23:49:30,231] Trial 496 finished with value: -0.002820061178215047 and parameters: {'learning_rate': 0.029627939716614888, 'sigma_multiplier': 0.40453211361941943, 'initialization_multiplier': 0.5785591000462372}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 496 final loss: -0.00282006
Trial 497:
  Learning Rate: 0.05168074411546061
  Sigma Multiplier: 0.5429573643983374
  Initialization Multiplier: 0.6299620294982232


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.60it/s, loss=-0.003401, elapsed time=0.04, total time=6.5] 
[I 2025-06-07 23:49:36,762] Trial 497 finished with value: -0.003400503288165016 and parameters: {'learning_rate': 0.05168074411546061, 'sigma_multiplier': 0.5429573643983374, 'initialization_multiplier': 0.6299620294982232}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 497 final loss: -0.00340050
Trial 498:
  Learning Rate: 0.038096209417287044
  Sigma Multiplier: 1.8189907072055074
  Initialization Multiplier: 0.7072776160211672


Training Progress: 100%|██████████| 150/150 [00:04<00:00, 31.48it/s, loss=-0.000879, elapsed time=0.03, total time=4.91]
[I 2025-06-07 23:49:41,704] Trial 498 finished with value: -0.0008786771702710763 and parameters: {'learning_rate': 0.038096209417287044, 'sigma_multiplier': 1.8189907072055074, 'initialization_multiplier': 0.7072776160211672}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 498 final loss: -0.00087868
Trial 499:
  Learning Rate: 0.023307476309650552
  Sigma Multiplier: 0.567108230402606
  Initialization Multiplier: 0.5958984677175212


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.81it/s, loss=-0.003917, elapsed time=0.04, total time=6.43]
[I 2025-06-07 23:49:48,171] Trial 499 finished with value: -0.0039165595785391775 and parameters: {'learning_rate': 0.023307476309650552, 'sigma_multiplier': 0.567108230402606, 'initialization_multiplier': 0.5958984677175212}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 499 final loss: -0.00391656
Trial 500:
  Learning Rate: 0.02675206242124631
  Sigma Multiplier: 0.5785595454642116
  Initialization Multiplier: 0.5939451435973551


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.89it/s, loss=-0.003789, elapsed time=0.04, total time=6.41]
[I 2025-06-07 23:49:54,615] Trial 500 finished with value: -0.0037892966418325543 and parameters: {'learning_rate': 0.02675206242124631, 'sigma_multiplier': 0.5785595454642116, 'initialization_multiplier': 0.5939451435973551}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 500 final loss: -0.00378930
Trial 501:
  Learning Rate: 0.03212394326670481
  Sigma Multiplier: 0.6131666012610831
  Initialization Multiplier: 0.5776615848464368


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.04it/s, loss=-0.003589, elapsed time=0.04, total time=6.37]
[I 2025-06-07 23:50:01,019] Trial 501 finished with value: -0.0035887622956236806 and parameters: {'learning_rate': 0.03212394326670481, 'sigma_multiplier': 0.6131666012610831, 'initialization_multiplier': 0.5776615848464368}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 501 final loss: -0.00358876
Trial 502:
  Learning Rate: 0.023303089716427777
  Sigma Multiplier: 0.5584710094993859
  Initialization Multiplier: 0.6139366507116883


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.22it/s, loss=-0.003958, elapsed time=0.04, total time=6.6] 
[I 2025-06-07 23:50:07,656] Trial 502 finished with value: -0.003957782782135592 and parameters: {'learning_rate': 0.023303089716427777, 'sigma_multiplier': 0.5584710094993859, 'initialization_multiplier': 0.6139366507116883}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 502 final loss: -0.00395778
Trial 503:
  Learning Rate: 0.023880513993150276
  Sigma Multiplier: 0.5097059868175716
  Initialization Multiplier: 1.351007082126799


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.51it/s, loss=0.032462, elapsed time=0.04, total time=6.8] 
[I 2025-06-07 23:50:14,497] Trial 503 finished with value: 0.03246195818664809 and parameters: {'learning_rate': 0.023880513993150276, 'sigma_multiplier': 0.5097059868175716, 'initialization_multiplier': 1.351007082126799}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 503 final loss: 0.03246196
Trial 504:
  Learning Rate: 0.021515966017500738
  Sigma Multiplier: 0.5688785599554965
  Initialization Multiplier: 0.58050898134357


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.84it/s, loss=-0.003935, elapsed time=0.04, total time=6.42]
[I 2025-06-07 23:50:20,951] Trial 504 finished with value: -0.003934643418887277 and parameters: {'learning_rate': 0.021515966017500738, 'sigma_multiplier': 0.5688785599554965, 'initialization_multiplier': 0.58050898134357}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 504 final loss: -0.00393464
Trial 505:
  Learning Rate: 0.02048871199778459
  Sigma Multiplier: 0.5564471480462719
  Initialization Multiplier: 0.5710572854232564


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.71it/s, loss=-0.003627, elapsed time=0.04, total time=6.46]
[I 2025-06-07 23:50:27,447] Trial 505 finished with value: -0.003627475890728087 and parameters: {'learning_rate': 0.02048871199778459, 'sigma_multiplier': 0.5564471480462719, 'initialization_multiplier': 0.5710572854232564}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 505 final loss: -0.00362748
Trial 506:
  Learning Rate: 0.01878475459620148
  Sigma Multiplier: 0.5056755899480356
  Initialization Multiplier: 0.5692492956985828


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.34it/s, loss=-0.003977, elapsed time=0.04, total time=6.56]
[I 2025-06-07 23:50:34,047] Trial 506 finished with value: -0.003977268738112659 and parameters: {'learning_rate': 0.01878475459620148, 'sigma_multiplier': 0.5056755899480356, 'initialization_multiplier': 0.5692492956985828}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 506 final loss: -0.00397727
Trial 507:
  Learning Rate: 0.015510529927479397
  Sigma Multiplier: 0.45982753956723377
  Initialization Multiplier: 0.49138995086851023


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.95it/s, loss=-0.002982, elapsed time=0.04, total time=6.67]
[I 2025-06-07 23:50:40,754] Trial 507 finished with value: -0.0029822511555850956 and parameters: {'learning_rate': 0.015510529927479397, 'sigma_multiplier': 0.45982753956723377, 'initialization_multiplier': 0.49138995086851023}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 507 final loss: -0.00298225
Trial 508:
  Learning Rate: 0.01759288446774777
  Sigma Multiplier: 0.5131221027657451
  Initialization Multiplier: 0.564632457039529


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.31it/s, loss=-0.003360, elapsed time=0.04, total time=6.57]
[I 2025-06-07 23:50:47,361] Trial 508 finished with value: -0.0033596091697703652 and parameters: {'learning_rate': 0.01759288446774777, 'sigma_multiplier': 0.5131221027657451, 'initialization_multiplier': 0.564632457039529}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 508 final loss: -0.00335961
Trial 509:
  Learning Rate: 0.022039713865791833
  Sigma Multiplier: 0.3744622775436197
  Initialization Multiplier: 0.5502556917060047


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.52it/s, loss=-0.001788, elapsed time=0.04, total time=6.79]
[I 2025-06-07 23:50:54,185] Trial 509 finished with value: -0.001788172312778019 and parameters: {'learning_rate': 0.022039713865791833, 'sigma_multiplier': 0.3744622775436197, 'initialization_multiplier': 0.5502556917060047}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 509 final loss: -0.00178817
Trial 510:
  Learning Rate: 0.019005804522276223
  Sigma Multiplier: 0.4625037708833557
  Initialization Multiplier: 0.522644259512302


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.91it/s, loss=-0.003302, elapsed time=0.04, total time=6.68]
[I 2025-06-07 23:51:00,906] Trial 510 finished with value: -0.003302104295834721 and parameters: {'learning_rate': 0.019005804522276223, 'sigma_multiplier': 0.4625037708833557, 'initialization_multiplier': 0.522644259512302}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 510 final loss: -0.00330210
Trial 511:
  Learning Rate: 0.01717305109249179
  Sigma Multiplier: 0.4983279755913964
  Initialization Multiplier: 0.5977247351248662


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.19it/s, loss=-0.003490, elapsed time=0.04, total time=6.61]
[I 2025-06-07 23:51:07,557] Trial 511 finished with value: -0.0034904178404811096 and parameters: {'learning_rate': 0.01717305109249179, 'sigma_multiplier': 0.4983279755913964, 'initialization_multiplier': 0.5977247351248662}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 511 final loss: -0.00349042
Trial 512:
  Learning Rate: 0.021158751248934352
  Sigma Multiplier: 0.5292863898190776
  Initialization Multiplier: 0.48487564656892906


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.54it/s, loss=-0.002904, elapsed time=0.04, total time=6.51]
[I 2025-06-07 23:51:14,102] Trial 512 finished with value: -0.0029035528862182266 and parameters: {'learning_rate': 0.021158751248934352, 'sigma_multiplier': 0.5292863898190776, 'initialization_multiplier': 0.48487564656892906}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 512 final loss: -0.00290355
Trial 513:
  Learning Rate: 0.02185943426072835
  Sigma Multiplier: 0.9165832896659604
  Initialization Multiplier: 0.5817561409620918


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 27.55it/s, loss=-0.002679, elapsed time=0.03, total time=5.58]
[I 2025-06-07 23:51:19,717] Trial 513 finished with value: -0.0026790586406218743 and parameters: {'learning_rate': 0.02185943426072835, 'sigma_multiplier': 0.9165832896659604, 'initialization_multiplier': 0.5817561409620918}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 513 final loss: -0.00267906
Trial 514:
  Learning Rate: 0.02389107867034183
  Sigma Multiplier: 0.4865708802914547
  Initialization Multiplier: 0.5393554592485503


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.25it/s, loss=-0.003588, elapsed time=0.04, total time=6.6] 
[I 2025-06-07 23:51:26,360] Trial 514 finished with value: -0.0035875910051338675 and parameters: {'learning_rate': 0.02389107867034183, 'sigma_multiplier': 0.4865708802914547, 'initialization_multiplier': 0.5393554592485503}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 514 final loss: -0.00358759
Trial 515:
  Learning Rate: 0.012160535925463413
  Sigma Multiplier: 0.43535158206115626
  Initialization Multiplier: 0.6032958518731747


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.84it/s, loss=-0.002882, elapsed time=0.04, total time=6.7] 
[I 2025-06-07 23:51:33,091] Trial 515 finished with value: -0.002881779957882932 and parameters: {'learning_rate': 0.012160535925463413, 'sigma_multiplier': 0.43535158206115626, 'initialization_multiplier': 0.6032958518731747}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 515 final loss: -0.00288178
Trial 516:
  Learning Rate: 0.013770115724464259
  Sigma Multiplier: 0.5465096784952737
  Initialization Multiplier: 0.5088607580689678


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.74it/s, loss=-0.003304, elapsed time=0.04, total time=6.45]
[I 2025-06-07 23:51:39,580] Trial 516 finished with value: -0.0033038590106242126 and parameters: {'learning_rate': 0.013770115724464259, 'sigma_multiplier': 0.5465096784952737, 'initialization_multiplier': 0.5088607580689678}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 516 final loss: -0.00330386
Trial 517:
  Learning Rate: 0.016368316154155085
  Sigma Multiplier: 0.5119491037172833
  Initialization Multiplier: 0.5628552248378907


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.40it/s, loss=-0.003358, elapsed time=0.04, total time=6.55]
[I 2025-06-07 23:51:46,170] Trial 517 finished with value: -0.003358196974616709 and parameters: {'learning_rate': 0.016368316154155085, 'sigma_multiplier': 0.5119491037172833, 'initialization_multiplier': 0.5628552248378907}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 517 final loss: -0.00335820
Trial 518:
  Learning Rate: 0.025296292698117926
  Sigma Multiplier: 0.5640691657654737
  Initialization Multiplier: 0.6108940733955237


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.95it/s, loss=-0.003940, elapsed time=0.04, total time=6.41]
[I 2025-06-07 23:51:52,613] Trial 518 finished with value: -0.003939918093124523 and parameters: {'learning_rate': 0.025296292698117926, 'sigma_multiplier': 0.5640691657654737, 'initialization_multiplier': 0.6108940733955237}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 518 final loss: -0.00393992
Trial 519:
  Learning Rate: 0.025314850336847484
  Sigma Multiplier: 0.5537865592896375
  Initialization Multiplier: 1.5033881157362035


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.88it/s, loss=0.081581, elapsed time=0.04, total time=6.69]
[I 2025-06-07 23:51:59,334] Trial 519 finished with value: 0.08158141919552611 and parameters: {'learning_rate': 0.025314850336847484, 'sigma_multiplier': 0.5537865592896375, 'initialization_multiplier': 1.5033881157362035}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 519 final loss: 0.08158142
Trial 520:
  Learning Rate: 0.021184955050219226
  Sigma Multiplier: 0.41913134938437
  Initialization Multiplier: 0.5781971037254678


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.13it/s, loss=-0.002483, elapsed time=0.04, total time=6.91]
[I 2025-06-07 23:52:06,285] Trial 520 finished with value: -0.002482690139910363 and parameters: {'learning_rate': 0.021184955050219226, 'sigma_multiplier': 0.41913134938437, 'initialization_multiplier': 0.5781971037254678}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 520 final loss: -0.00248269
Trial 521:
  Learning Rate: 0.025538716853863525
  Sigma Multiplier: 0.47889381548360516
  Initialization Multiplier: 0.5400189144509114


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.37it/s, loss=-0.003556, elapsed time=0.04, total time=6.85]
[I 2025-06-07 23:52:13,169] Trial 521 finished with value: -0.003556306553567941 and parameters: {'learning_rate': 0.025538716853863525, 'sigma_multiplier': 0.47889381548360516, 'initialization_multiplier': 0.5400189144509114}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 521 final loss: -0.00355631
Trial 522:
  Learning Rate: 0.021225957495055833
  Sigma Multiplier: 0.5585859881193848
  Initialization Multiplier: 1.822327085520395


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.95it/s, loss=0.074521, elapsed time=0.04, total time=6.66]
[I 2025-06-07 23:52:19,869] Trial 522 finished with value: 0.07452070428320229 and parameters: {'learning_rate': 0.021225957495055833, 'sigma_multiplier': 0.5585859881193848, 'initialization_multiplier': 1.822327085520395}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 522 final loss: 0.07452070
Trial 523:
  Learning Rate: 0.019160427730210393
  Sigma Multiplier: 0.5104500930553774
  Initialization Multiplier: 0.6146564025732053


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.14it/s, loss=-0.003519, elapsed time=0.04, total time=6.62]
[I 2025-06-07 23:52:26,525] Trial 523 finished with value: -0.0035186042860457043 and parameters: {'learning_rate': 0.019160427730210393, 'sigma_multiplier': 0.5104500930553774, 'initialization_multiplier': 0.6146564025732053}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 523 final loss: -0.00351860
Trial 524:
  Learning Rate: 0.025723475311756405
  Sigma Multiplier: 0.5733881942135199
  Initialization Multiplier: 0.4586182946575524


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.70it/s, loss=-0.003346, elapsed time=0.04, total time=6.46]
[I 2025-06-07 23:52:33,024] Trial 524 finished with value: -0.0033463026956536738 and parameters: {'learning_rate': 0.025723475311756405, 'sigma_multiplier': 0.5733881942135199, 'initialization_multiplier': 0.4586182946575524}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 524 final loss: -0.00334630
Trial 525:
  Learning Rate: 0.02368099604583404
  Sigma Multiplier: 0.5303502651871232
  Initialization Multiplier: 0.6054735493890069


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.52it/s, loss=-0.003655, elapsed time=0.04, total time=6.52]
[I 2025-06-07 23:52:39,582] Trial 525 finished with value: -0.0036550226350965586 and parameters: {'learning_rate': 0.02368099604583404, 'sigma_multiplier': 0.5303502651871232, 'initialization_multiplier': 0.6054735493890069}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 525 final loss: -0.00365502
Trial 526:
  Learning Rate: 0.015395382328366018
  Sigma Multiplier: 0.4656602597273875
  Initialization Multiplier: 0.3940559162687065


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.12it/s, loss=-0.002999, elapsed time=0.04, total time=6.62]
[I 2025-06-07 23:52:46,239] Trial 526 finished with value: -0.0029990980304268056 and parameters: {'learning_rate': 0.015395382328366018, 'sigma_multiplier': 0.4656602597273875, 'initialization_multiplier': 0.3940559162687065}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 526 final loss: -0.00299910
Trial 527:
  Learning Rate: 0.028908078606411588
  Sigma Multiplier: 0.5725357517152966
  Initialization Multiplier: 0.5137970072342157


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.11it/s, loss=-0.003804, elapsed time=0.04, total time=6.38]
[I 2025-06-07 23:52:52,652] Trial 527 finished with value: -0.003804040883661038 and parameters: {'learning_rate': 0.028908078606411588, 'sigma_multiplier': 0.5725357517152966, 'initialization_multiplier': 0.5137970072342157}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 527 final loss: -0.00380404
Trial 528:
  Learning Rate: 0.018096066041114613
  Sigma Multiplier: 0.5259899892885733
  Initialization Multiplier: 1.2287259627583609


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.92it/s, loss=0.033561, elapsed time=0.04, total time=6.68]
[I 2025-06-07 23:52:59,371] Trial 528 finished with value: 0.033560644183146325 and parameters: {'learning_rate': 0.018096066041114613, 'sigma_multiplier': 0.5259899892885733, 'initialization_multiplier': 1.2287259627583609}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 528 final loss: 0.03356064
Trial 529:
  Learning Rate: 0.028345663324957856
  Sigma Multiplier: 0.5886692927826904
  Initialization Multiplier: 0.6390991685342796


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.07it/s, loss=-0.003632, elapsed time=0.04, total time=6.37]
[I 2025-06-07 23:53:05,781] Trial 529 finished with value: -0.00363178200936313 and parameters: {'learning_rate': 0.028345663324957856, 'sigma_multiplier': 0.5886692927826904, 'initialization_multiplier': 0.6390991685342796}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 529 final loss: -0.00363178
Trial 530:
  Learning Rate: 0.02326856632903775
  Sigma Multiplier: 0.3471119354645007
  Initialization Multiplier: 0.5562637790708702


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.51it/s, loss=-0.001542, elapsed time=0.04, total time=6.8] 
[I 2025-06-07 23:53:12,613] Trial 530 finished with value: -0.00154174098085651 and parameters: {'learning_rate': 0.02326856632903775, 'sigma_multiplier': 0.3471119354645007, 'initialization_multiplier': 0.5562637790708702}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 530 final loss: -0.00154174
Trial 531:
  Learning Rate: 0.028106316959855573
  Sigma Multiplier: 0.4955168273873186
  Initialization Multiplier: 0.5772349510739149


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.21it/s, loss=-0.003478, elapsed time=0.04, total time=6.6] 
[I 2025-06-07 23:53:19,248] Trial 531 finished with value: -0.0034781075638178273 and parameters: {'learning_rate': 0.028106316959855573, 'sigma_multiplier': 0.4955168273873186, 'initialization_multiplier': 0.5772349510739149}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 531 final loss: -0.00347811
Trial 532:
  Learning Rate: 0.019008669864451683
  Sigma Multiplier: 0.539309846313915
  Initialization Multiplier: 0.64370463144017


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.71it/s, loss=-0.003492, elapsed time=0.04, total time=6.46]
[I 2025-06-07 23:53:25,746] Trial 532 finished with value: -0.0034918314505125465 and parameters: {'learning_rate': 0.019008669864451683, 'sigma_multiplier': 0.539309846313915, 'initialization_multiplier': 0.64370463144017}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 532 final loss: -0.00349183
Trial 533:
  Learning Rate: 0.02940580705339183
  Sigma Multiplier: 0.5927555234199399
  Initialization Multiplier: 0.4819261791258329


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.30it/s, loss=-0.003266, elapsed time=0.04, total time=6.31]
[I 2025-06-07 23:53:32,092] Trial 533 finished with value: -0.003265966355115528 and parameters: {'learning_rate': 0.02940580705339183, 'sigma_multiplier': 0.5927555234199399, 'initialization_multiplier': 0.4819261791258329}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 533 final loss: -0.00326597
Trial 534:
  Learning Rate: 0.02395394885588341
  Sigma Multiplier: 0.5645475182432659
  Initialization Multiplier: 0.5994416230099078


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.92it/s, loss=-0.003828, elapsed time=0.04, total time=6.41]
[I 2025-06-07 23:53:38,541] Trial 534 finished with value: -0.003828232326442984 and parameters: {'learning_rate': 0.02395394885588341, 'sigma_multiplier': 0.5645475182432659, 'initialization_multiplier': 0.5994416230099078}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 534 final loss: -0.00382823
Trial 535:
  Learning Rate: 0.03058138188084108
  Sigma Multiplier: 0.43644904315511246
  Initialization Multiplier: 0.5333827325941309


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.00it/s, loss=-0.003026, elapsed time=0.04, total time=6.66]
[I 2025-06-07 23:53:45,235] Trial 535 finished with value: -0.003025678858662448 and parameters: {'learning_rate': 0.03058138188084108, 'sigma_multiplier': 0.43644904315511246, 'initialization_multiplier': 0.5333827325941309}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 535 final loss: -0.00302568
Trial 536:
  Learning Rate: 0.021004571795140776
  Sigma Multiplier: 0.6049542754335535
  Initialization Multiplier: 0.648371306786143


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.46it/s, loss=-0.003634, elapsed time=0.04, total time=6.27]
[I 2025-06-07 23:53:51,536] Trial 536 finished with value: -0.003633837427168607 and parameters: {'learning_rate': 0.021004571795140776, 'sigma_multiplier': 0.6049542754335535, 'initialization_multiplier': 0.648371306786143}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 536 final loss: -0.00363384
Trial 537:
  Learning Rate: 0.025530006237045016
  Sigma Multiplier: 0.5034022435915982
  Initialization Multiplier: 0.6063709858832311


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.34it/s, loss=-0.003538, elapsed time=0.04, total time=6.56]
[I 2025-06-07 23:53:58,130] Trial 537 finished with value: -0.003538429730402962 and parameters: {'learning_rate': 0.025530006237045016, 'sigma_multiplier': 0.5034022435915982, 'initialization_multiplier': 0.6063709858832311}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 537 final loss: -0.00353843
Trial 538:
  Learning Rate: 0.03271557158049967
  Sigma Multiplier: 0.551415582054557
  Initialization Multiplier: 0.555830541488644


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.81it/s, loss=-0.003405, elapsed time=0.04, total time=6.44]
[I 2025-06-07 23:54:04,607] Trial 538 finished with value: -0.003404663250049092 and parameters: {'learning_rate': 0.03271557158049967, 'sigma_multiplier': 0.551415582054557, 'initialization_multiplier': 0.555830541488644}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 538 final loss: -0.00340466
Trial 539:
  Learning Rate: 0.000512831438846907
  Sigma Multiplier: 0.4724425112453211
  Initialization Multiplier: 0.49883031115077503


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.14it/s, loss=0.004166, elapsed time=0.04, total time=6.62]
[I 2025-06-07 23:54:11,264] Trial 539 finished with value: 0.004165571190253069 and parameters: {'learning_rate': 0.000512831438846907, 'sigma_multiplier': 0.4724425112453211, 'initialization_multiplier': 0.49883031115077503}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 539 final loss: 0.00416557
Trial 540:
  Learning Rate: 0.02770908132367514
  Sigma Multiplier: 1.0599650518648176
  Initialization Multiplier: 0.6530431073347988


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 28.59it/s, loss=-0.002298, elapsed time=0.03, total time=5.38]
[I 2025-06-07 23:54:16,676] Trial 540 finished with value: -0.002297898784450557 and parameters: {'learning_rate': 0.02770908132367514, 'sigma_multiplier': 1.0599650518648176, 'initialization_multiplier': 0.6530431073347988}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 540 final loss: -0.00229790
Trial 541:
  Learning Rate: 0.016066116251270024
  Sigma Multiplier: 0.6095978718980323
  Initialization Multiplier: 0.5874275241354603


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.34it/s, loss=-0.003423, elapsed time=0.04, total time=6.3] 
[I 2025-06-07 23:54:23,016] Trial 541 finished with value: -0.0034233236845036875 and parameters: {'learning_rate': 0.016066116251270024, 'sigma_multiplier': 0.6095978718980323, 'initialization_multiplier': 0.5874275241354603}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 541 final loss: -0.00342332
Trial 542:
  Learning Rate: 0.021998164249569498
  Sigma Multiplier: 0.6612494285115657
  Initialization Multiplier: 0.43488546372077724


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 25.00it/s, loss=-0.003350, elapsed time=0.04, total time=6.14]
[I 2025-06-07 23:54:29,194] Trial 542 finished with value: -0.0033499961762033896 and parameters: {'learning_rate': 0.021998164249569498, 'sigma_multiplier': 0.6612494285115657, 'initialization_multiplier': 0.43488546372077724}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 542 final loss: -0.00335000
Trial 543:
  Learning Rate: 0.03355529506511419
  Sigma Multiplier: 0.5337331824440948
  Initialization Multiplier: 0.6149564406297428


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.60it/s, loss=-0.003565, elapsed time=0.04, total time=6.49]
[I 2025-06-07 23:54:35,719] Trial 543 finished with value: -0.0035646763265912646 and parameters: {'learning_rate': 0.03355529506511419, 'sigma_multiplier': 0.5337331824440948, 'initialization_multiplier': 0.6149564406297428}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 543 final loss: -0.00356468
Trial 544:
  Learning Rate: 0.024931911495956107
  Sigma Multiplier: 1.2741259319245175
  Initialization Multiplier: 0.5283142255788669


Training Progress: 100%|██████████| 150/150 [00:04<00:00, 30.03it/s, loss=-0.001608, elapsed time=0.03, total time=5.13]
[I 2025-06-07 23:54:40,889] Trial 544 finished with value: -0.0016079752937289131 and parameters: {'learning_rate': 0.024931911495956107, 'sigma_multiplier': 1.2741259319245175, 'initialization_multiplier': 0.5283142255788669}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 544 final loss: -0.00160798
Trial 545:
  Learning Rate: 0.019095784362320796
  Sigma Multiplier: 0.5902428338452822
  Initialization Multiplier: 0.6616758166345675


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.19it/s, loss=-0.003225, elapsed time=0.04, total time=6.34]
[I 2025-06-07 23:54:47,264] Trial 545 finished with value: -0.003224802124389111 and parameters: {'learning_rate': 0.019095784362320796, 'sigma_multiplier': 0.5902428338452822, 'initialization_multiplier': 0.6616758166345675}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 545 final loss: -0.00322480
Trial 546:
  Learning Rate: 0.035429829962391066
  Sigma Multiplier: 0.6416516352883425
  Initialization Multiplier: 0.5643674707297822


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.75it/s, loss=-0.003417, elapsed time=0.04, total time=6.19]
[I 2025-06-07 23:54:53,488] Trial 546 finished with value: -0.0034172799446038197 and parameters: {'learning_rate': 0.035429829962391066, 'sigma_multiplier': 0.6416516352883425, 'initialization_multiplier': 0.5643674707297822}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 546 final loss: -0.00341728
Trial 547:
  Learning Rate: 0.029269124812090343
  Sigma Multiplier: 0.567919630347527
  Initialization Multiplier: 0.621566506863749


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.88it/s, loss=-0.003288, elapsed time=0.04, total time=6.42]
[I 2025-06-07 23:54:59,942] Trial 547 finished with value: -0.003288045011069604 and parameters: {'learning_rate': 0.029269124812090343, 'sigma_multiplier': 0.567919630347527, 'initialization_multiplier': 0.621566506863749}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 547 final loss: -0.00328805
Trial 548:
  Learning Rate: 0.0323973080880453
  Sigma Multiplier: 0.4986238291933477
  Initialization Multiplier: 0.5757647245930513


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.20it/s, loss=-0.003700, elapsed time=0.04, total time=6.61]
[I 2025-06-07 23:55:06,586] Trial 548 finished with value: -0.0036996685192837117 and parameters: {'learning_rate': 0.0323973080880453, 'sigma_multiplier': 0.4986238291933477, 'initialization_multiplier': 0.5757647245930513}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 548 final loss: -0.00369967
Trial 549:
  Learning Rate: 0.02396624183396557
  Sigma Multiplier: 0.39581175498064014
  Initialization Multiplier: 0.5076769586669911


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.57it/s, loss=-0.002373, elapsed time=0.04, total time=6.79]
[I 2025-06-07 23:55:13,408] Trial 549 finished with value: -0.0023730366029260045 and parameters: {'learning_rate': 0.02396624183396557, 'sigma_multiplier': 0.39581175498064014, 'initialization_multiplier': 0.5076769586669911}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 549 final loss: -0.00237304
Trial 550:
  Learning Rate: 0.03837029770222256
  Sigma Multiplier: 0.533673201380262
  Initialization Multiplier: 0.6641887089594322


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.57it/s, loss=-0.003610, elapsed time=0.04, total time=6.51]
[I 2025-06-07 23:55:19,949] Trial 550 finished with value: -0.0036099230186626406 and parameters: {'learning_rate': 0.03837029770222256, 'sigma_multiplier': 0.533673201380262, 'initialization_multiplier': 0.6641887089594322}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 550 final loss: -0.00360992
Trial 551:
  Learning Rate: 0.017707872224175848
  Sigma Multiplier: 0.6218538849590511
  Initialization Multiplier: 0.4636780934156692


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.28it/s, loss=-0.003011, elapsed time=0.04, total time=6.31]
[I 2025-06-07 23:55:26,297] Trial 551 finished with value: -0.003010875078971789 and parameters: {'learning_rate': 0.017707872224175848, 'sigma_multiplier': 0.6218538849590511, 'initialization_multiplier': 0.4636780934156692}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 551 final loss: -0.00301088
Trial 552:
  Learning Rate: 0.028273296132084992
  Sigma Multiplier: 0.4517589299872866
  Initialization Multiplier: 0.6183859775494793


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.81it/s, loss=-0.003486, elapsed time=0.04, total time=6.72]
[I 2025-06-07 23:55:33,049] Trial 552 finished with value: -0.003486368236603467 and parameters: {'learning_rate': 0.028273296132084992, 'sigma_multiplier': 0.4517589299872866, 'initialization_multiplier': 0.6183859775494793}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 552 final loss: -0.00348637
Trial 553:
  Learning Rate: 0.014392967373482893
  Sigma Multiplier: 0.565660843743412
  Initialization Multiplier: 0.5438322745324968


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.70it/s, loss=-0.002961, elapsed time=0.04, total time=6.47]
[I 2025-06-07 23:55:39,553] Trial 553 finished with value: -0.002960902539975776 and parameters: {'learning_rate': 0.014392967373482893, 'sigma_multiplier': 0.565660843743412, 'initialization_multiplier': 0.5438322745324968}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 553 final loss: -0.00296090
Trial 554:
  Learning Rate: 0.02084136690133332
  Sigma Multiplier: 0.5943248798188183
  Initialization Multiplier: 0.5913615772119211


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.98it/s, loss=-0.004010, elapsed time=0.04, total time=6.39]
[I 2025-06-07 23:55:45,981] Trial 554 finished with value: -0.0040095556893950226 and parameters: {'learning_rate': 0.02084136690133332, 'sigma_multiplier': 0.5943248798188183, 'initialization_multiplier': 0.5913615772119211}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 554 final loss: -0.00400956
Trial 555:
  Learning Rate: 3.513183158947945e-05
  Sigma Multiplier: 0.6333542421338878
  Initialization Multiplier: 0.6531590235159486


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.33it/s, loss=0.065291, elapsed time=0.04, total time=6.31]
[I 2025-06-07 23:55:52,324] Trial 555 finished with value: 0.06529136697353 and parameters: {'learning_rate': 3.513183158947945e-05, 'sigma_multiplier': 0.6333542421338878, 'initialization_multiplier': 0.6531590235159486}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 555 final loss: 0.06529137
Trial 556:
  Learning Rate: 0.012554402019746579
  Sigma Multiplier: 0.6012024236821019
  Initialization Multiplier: 0.6083718190226887


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.05it/s, loss=-0.003125, elapsed time=0.04, total time=6.38]
[I 2025-06-07 23:55:58,737] Trial 556 finished with value: -0.0031247726491923987 and parameters: {'learning_rate': 0.012554402019746579, 'sigma_multiplier': 0.6012024236821019, 'initialization_multiplier': 0.6083718190226887}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 556 final loss: -0.00312477
Trial 557:
  Learning Rate: 0.0104116734719558
  Sigma Multiplier: 0.6891043794613438
  Initialization Multiplier: 0.22334468936644836


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.97it/s, loss=-0.002429, elapsed time=0.04, total time=6.15]
[I 2025-06-07 23:56:04,923] Trial 557 finished with value: -0.0024292298485390136 and parameters: {'learning_rate': 0.0104116734719558, 'sigma_multiplier': 0.6891043794613438, 'initialization_multiplier': 0.22334468936644836}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 557 final loss: -0.00242923
Trial 558:
  Learning Rate: 0.020258050826793595
  Sigma Multiplier: 0.6659064019938481
  Initialization Multiplier: 0.3542674413746775


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.80it/s, loss=-0.003288, elapsed time=0.04, total time=6.18]
[I 2025-06-07 23:56:11,139] Trial 558 finished with value: -0.003288066011085214 and parameters: {'learning_rate': 0.020258050826793595, 'sigma_multiplier': 0.6659064019938481, 'initialization_multiplier': 0.3542674413746775}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 558 final loss: -0.00328807
Trial 559:
  Learning Rate: 0.015349009246803201
  Sigma Multiplier: 0.5985752321654548
  Initialization Multiplier: 0.589987701015602


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.10it/s, loss=-0.003399, elapsed time=0.04, total time=6.36]
[I 2025-06-07 23:56:17,536] Trial 559 finished with value: -0.00339883941915142 and parameters: {'learning_rate': 0.015349009246803201, 'sigma_multiplier': 0.5985752321654548, 'initialization_multiplier': 0.589987701015602}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 559 final loss: -0.00339884
Trial 560:
  Learning Rate: 0.021980141869344383
  Sigma Multiplier: 0.7358606044127409
  Initialization Multiplier: 0.6860604064872733


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.54it/s, loss=-0.003200, elapsed time=0.04, total time=6.01]
[I 2025-06-07 23:56:23,582] Trial 560 finished with value: -0.0031999603046881484 and parameters: {'learning_rate': 0.021980141869344383, 'sigma_multiplier': 0.7358606044127409, 'initialization_multiplier': 0.6860604064872733}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 560 final loss: -0.00319996
Trial 561:
  Learning Rate: 0.019668887003231137
  Sigma Multiplier: 0.6357530274621911
  Initialization Multiplier: 0.640775077889597


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.55it/s, loss=-0.003600, elapsed time=0.04, total time=6.24]
[I 2025-06-07 23:56:29,864] Trial 561 finished with value: -0.003600080825485101 and parameters: {'learning_rate': 0.019668887003231137, 'sigma_multiplier': 0.6357530274621911, 'initialization_multiplier': 0.640775077889597}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 561 final loss: -0.00360008
Trial 562:
  Learning Rate: 0.017956050920031212
  Sigma Multiplier: 0.5712241608930225
  Initialization Multiplier: 0.5637430134648772


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.90it/s, loss=-0.003472, elapsed time=0.04, total time=6.41]
[I 2025-06-07 23:56:36,312] Trial 562 finished with value: -0.0034720704610287615 and parameters: {'learning_rate': 0.017956050920031212, 'sigma_multiplier': 0.5712241608930225, 'initialization_multiplier': 0.5637430134648772}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 562 final loss: -0.00347207
Trial 563:
  Learning Rate: 0.025504472187113655
  Sigma Multiplier: 0.6090243575336401
  Initialization Multiplier: 0.6792864338901659


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.35it/s, loss=-0.003763, elapsed time=0.04, total time=6.29]
[I 2025-06-07 23:56:42,641] Trial 563 finished with value: -0.003763050246283098 and parameters: {'learning_rate': 0.025504472187113655, 'sigma_multiplier': 0.6090243575336401, 'initialization_multiplier': 0.6792864338901659}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 563 final loss: -0.00376305
Trial 564:
  Learning Rate: 0.021459778439460123
  Sigma Multiplier: 0.5418988617597376
  Initialization Multiplier: 0.6219237372384363


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.62it/s, loss=-0.003394, elapsed time=0.04, total time=6.48]
[I 2025-06-07 23:56:49,161] Trial 564 finished with value: -0.003393559117609804 and parameters: {'learning_rate': 0.021459778439460123, 'sigma_multiplier': 0.5418988617597376, 'initialization_multiplier': 0.6219237372384363}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 564 final loss: -0.00339356
Trial 565:
  Learning Rate: 0.017342183739290196
  Sigma Multiplier: 0.6549839888572089
  Initialization Multiplier: 0.5418015863856737


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.88it/s, loss=-0.003461, elapsed time=0.04, total time=6.16]
[I 2025-06-07 23:56:55,355] Trial 565 finished with value: -0.003460657952536178 and parameters: {'learning_rate': 0.017342183739290196, 'sigma_multiplier': 0.6549839888572089, 'initialization_multiplier': 0.5418015863856737}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 565 final loss: -0.00346066
Trial 566:
  Learning Rate: 0.02515480175358432
  Sigma Multiplier: 0.5799513754280129
  Initialization Multiplier: 0.5932087101961916


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.93it/s, loss=-0.003697, elapsed time=0.04, total time=6.4] 
[I 2025-06-07 23:57:01,790] Trial 566 finished with value: -0.0036968904313388526 and parameters: {'learning_rate': 0.02515480175358432, 'sigma_multiplier': 0.5799513754280129, 'initialization_multiplier': 0.5932087101961916}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 566 final loss: -0.00369689
Trial 567:
  Learning Rate: 0.027651209006451447
  Sigma Multiplier: 0.5473284104672675
  Initialization Multiplier: 0.4880625834225918


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.67it/s, loss=-0.003804, elapsed time=0.04, total time=6.48]
[I 2025-06-07 23:57:08,302] Trial 567 finished with value: -0.0038036578203344993 and parameters: {'learning_rate': 0.027651209006451447, 'sigma_multiplier': 0.5473284104672675, 'initialization_multiplier': 0.4880625834225918}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 567 final loss: -0.00380366
Trial 568:
  Learning Rate: 0.031767381460858994
  Sigma Multiplier: 0.6053762530962292
  Initialization Multiplier: 0.6459608684191626


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.18it/s, loss=-0.003416, elapsed time=0.04, total time=6.33]
[I 2025-06-07 23:57:14,673] Trial 568 finished with value: -0.003415522140768764 and parameters: {'learning_rate': 0.031767381460858994, 'sigma_multiplier': 0.6053762530962292, 'initialization_multiplier': 0.6459608684191626}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 568 final loss: -0.00341552
Trial 569:
  Learning Rate: 0.021947336770035956
  Sigma Multiplier: 0.6888818018181391
  Initialization Multiplier: 0.5855189893066003


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.16it/s, loss=-0.003509, elapsed time=0.04, total time=6.09]
[I 2025-06-07 23:57:20,804] Trial 569 finished with value: -0.003508524829511302 and parameters: {'learning_rate': 0.021947336770035956, 'sigma_multiplier': 0.6888818018181391, 'initialization_multiplier': 0.5855189893066003}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 569 final loss: -0.00350852
Trial 570:
  Learning Rate: 0.04163087136128786
  Sigma Multiplier: 0.5674146543063203
  Initialization Multiplier: 0.5215916048574094


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.93it/s, loss=-0.003241, elapsed time=0.04, total time=6.4] 
[I 2025-06-07 23:57:27,244] Trial 570 finished with value: -0.003240796407263985 and parameters: {'learning_rate': 0.04163087136128786, 'sigma_multiplier': 0.5674146543063203, 'initialization_multiplier': 0.5215916048574094}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 570 final loss: -0.00324080
Trial 571:
  Learning Rate: 0.03026568497319754
  Sigma Multiplier: 0.1615370514899398
  Initialization Multiplier: 0.6740715496121428


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 21.94it/s, loss=0.000252, elapsed time=0.04, total time=6.97] 
[I 2025-06-07 23:57:34,257] Trial 571 finished with value: 0.00025219235813826415 and parameters: {'learning_rate': 0.03026568497319754, 'sigma_multiplier': 0.1615370514899398, 'initialization_multiplier': 0.6740715496121428}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 571 final loss: 0.00025219
Trial 572:
  Learning Rate: 0.02330621161036848
  Sigma Multiplier: 0.5154514497615195
  Initialization Multiplier: 0.6242937694720595


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.53it/s, loss=-0.003918, elapsed time=0.04, total time=6.51]
[I 2025-06-07 23:57:40,805] Trial 572 finished with value: -0.003917527332617043 and parameters: {'learning_rate': 0.02330621161036848, 'sigma_multiplier': 0.5154514497615195, 'initialization_multiplier': 0.6242937694720595}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 572 final loss: -0.00391753
Trial 573:
  Learning Rate: 0.01741154987647737
  Sigma Multiplier: 0.6479339594515422
  Initialization Multiplier: 0.5573657767872322


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.76it/s, loss=-0.003584, elapsed time=0.04, total time=6.19]
[I 2025-06-07 23:57:47,031] Trial 573 finished with value: -0.003583932705559561 and parameters: {'learning_rate': 0.01741154987647737, 'sigma_multiplier': 0.6479339594515422, 'initialization_multiplier': 0.5573657767872322}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 573 final loss: -0.00358393
Trial 574:
  Learning Rate: 0.015815499050897852
  Sigma Multiplier: 0.5330394641680728
  Initialization Multiplier: 0.7053998063302988


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.59it/s, loss=-0.003282, elapsed time=0.04, total time=6.49]
[I 2025-06-07 23:57:53,564] Trial 574 finished with value: -0.003282324233485277 and parameters: {'learning_rate': 0.015815499050897852, 'sigma_multiplier': 0.5330394641680728, 'initialization_multiplier': 0.7053998063302988}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 574 final loss: -0.00328232
Trial 575:
  Learning Rate: 0.02018093959291702
  Sigma Multiplier: 0.6067092746023803
  Initialization Multiplier: 0.40381970293974173


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.37it/s, loss=-0.003133, elapsed time=0.04, total time=6.3] 
[I 2025-06-07 23:57:59,899] Trial 575 finished with value: -0.003133087089712341 and parameters: {'learning_rate': 0.02018093959291702, 'sigma_multiplier': 0.6067092746023803, 'initialization_multiplier': 0.40381970293974173}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 575 final loss: -0.00313309
Trial 576:
  Learning Rate: 0.023178193020448212
  Sigma Multiplier: 0.5766750571506254
  Initialization Multiplier: 0.5920748528970197


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.97it/s, loss=-0.003635, elapsed time=0.04, total time=6.39]
[I 2025-06-07 23:58:06,323] Trial 576 finished with value: -0.0036353186158867173 and parameters: {'learning_rate': 0.023178193020448212, 'sigma_multiplier': 0.5766750571506254, 'initialization_multiplier': 0.5920748528970197}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 576 final loss: -0.00363532
Trial 577:
  Learning Rate: 0.024436175738086086
  Sigma Multiplier: 0.6334364709831762
  Initialization Multiplier: 0.5167201140900644


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.64it/s, loss=-0.003538, elapsed time=0.04, total time=6.22]
[I 2025-06-07 23:58:12,582] Trial 577 finished with value: -0.003537821144848974 and parameters: {'learning_rate': 0.024436175738086086, 'sigma_multiplier': 0.6334364709831762, 'initialization_multiplier': 0.5167201140900644}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 577 final loss: -0.00353782
Trial 578:
  Learning Rate: 0.0193571535269086
  Sigma Multiplier: 0.5255752085412435
  Initialization Multiplier: 0.6307203455117049


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.70it/s, loss=-0.003620, elapsed time=0.04, total time=6.47]
[I 2025-06-07 23:58:19,090] Trial 578 finished with value: -0.0036197249916738792 and parameters: {'learning_rate': 0.0193571535269086, 'sigma_multiplier': 0.5255752085412435, 'initialization_multiplier': 0.6307203455117049}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 578 final loss: -0.00361972
Trial 579:
  Learning Rate: 0.02741230905820703
  Sigma Multiplier: 0.5675366692213918
  Initialization Multiplier: 0.5626717890407675


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.05it/s, loss=-0.003623, elapsed time=0.04, total time=6.38]
[I 2025-06-07 23:58:25,511] Trial 579 finished with value: -0.003622978938231531 and parameters: {'learning_rate': 0.02741230905820703, 'sigma_multiplier': 0.5675366692213918, 'initialization_multiplier': 0.5626717890407675}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 579 final loss: -0.00362298
Trial 580:
  Learning Rate: 0.00021936510732362233
  Sigma Multiplier: 0.6075252320223872
  Initialization Multiplier: 0.6635035644168393


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.29it/s, loss=0.052673, elapsed time=0.04, total time=6.31]
[I 2025-06-07 23:58:31,854] Trial 580 finished with value: 0.05267258712423915 and parameters: {'learning_rate': 0.00021936510732362233, 'sigma_multiplier': 0.6075252320223872, 'initialization_multiplier': 0.6635035644168393}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 580 final loss: 0.05267259
Trial 581:
  Learning Rate: 0.036059893585849934
  Sigma Multiplier: 0.6627586579746461
  Initialization Multiplier: 0.4518194227711405


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.11it/s, loss=-0.003257, elapsed time=0.04, total time=6.1] 
[I 2025-06-07 23:58:37,992] Trial 581 finished with value: -0.003256622067413355 and parameters: {'learning_rate': 0.036059893585849934, 'sigma_multiplier': 0.6627586579746461, 'initialization_multiplier': 0.4518194227711405}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 581 final loss: -0.00325662
Trial 582:
  Learning Rate: 0.013283894345963899
  Sigma Multiplier: 0.5119518617451077
  Initialization Multiplier: 0.6101670669996362


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.59it/s, loss=-0.003297, elapsed time=0.04, total time=6.49]
[I 2025-06-07 23:58:44,521] Trial 582 finished with value: -0.003297487376740284 and parameters: {'learning_rate': 0.013283894345963899, 'sigma_multiplier': 0.5119518617451077, 'initialization_multiplier': 0.6101670669996362}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 582 final loss: -0.00329749
Trial 583:
  Learning Rate: 0.02285853736124209
  Sigma Multiplier: 0.7221732902835118
  Initialization Multiplier: 0.7060863290442302


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.74it/s, loss=-0.003432, elapsed time=0.04, total time=5.96]
[I 2025-06-07 23:58:50,514] Trial 583 finished with value: -0.003432169022056917 and parameters: {'learning_rate': 0.02285853736124209, 'sigma_multiplier': 0.7221732902835118, 'initialization_multiplier': 0.7060863290442302}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 583 final loss: -0.00343217
Trial 584:
  Learning Rate: 0.042435302199960404
  Sigma Multiplier: 0.5573109203556277
  Initialization Multiplier: 0.5310790190806781


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 19.05it/s, loss=-0.003383, elapsed time=0.04, total time=8]   
[I 2025-06-07 23:58:58,554] Trial 584 finished with value: -0.0033834693393001818 and parameters: {'learning_rate': 0.042435302199960404, 'sigma_multiplier': 0.5573109203556277, 'initialization_multiplier': 0.5310790190806781}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 584 final loss: -0.00338347
Trial 585:
  Learning Rate: 0.018239287967171624
  Sigma Multiplier: 0.5944689687884745
  Initialization Multiplier: 1.654766081436514


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.56it/s, loss=0.079909, elapsed time=0.04, total time=6.5] 
[I 2025-06-07 23:59:05,096] Trial 585 finished with value: 0.07990900890680203 and parameters: {'learning_rate': 0.018239287967171624, 'sigma_multiplier': 0.5944689687884745, 'initialization_multiplier': 1.654766081436514}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 585 final loss: 0.07990901
Trial 586:
  Learning Rate: 0.02550893502253257
  Sigma Multiplier: 0.7788640862475835
  Initialization Multiplier: 0.2880267269281459


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 26.37it/s, loss=-0.002658, elapsed time=0.04, total time=5.82]
[I 2025-06-07 23:59:10,957] Trial 586 finished with value: -0.0026581152519832034 and parameters: {'learning_rate': 0.02550893502253257, 'sigma_multiplier': 0.7788640862475835, 'initialization_multiplier': 0.2880267269281459}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 586 final loss: -0.00265812
Trial 587:
  Learning Rate: 0.0009431166808365549
  Sigma Multiplier: 0.48310230042226093
  Initialization Multiplier: 0.5868019005148043


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.20it/s, loss=0.001934, elapsed time=0.04, total time=6.61]
[I 2025-06-07 23:59:17,603] Trial 587 finished with value: 0.0019343838765490985 and parameters: {'learning_rate': 0.0009431166808365549, 'sigma_multiplier': 0.48310230042226093, 'initialization_multiplier': 0.5868019005148043}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 587 final loss: 0.00193438
Trial 588:
  Learning Rate: 0.035885991703699306
  Sigma Multiplier: 0.6282917663325834
  Initialization Multiplier: 0.6410401163399593


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.69it/s, loss=-0.003579, elapsed time=0.04, total time=6.21]
[I 2025-06-07 23:59:23,850] Trial 588 finished with value: -0.003578984000480153 and parameters: {'learning_rate': 0.035885991703699306, 'sigma_multiplier': 0.6282917663325834, 'initialization_multiplier': 0.6410401163399593}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 588 final loss: -0.00357898
Trial 589:
  Learning Rate: 0.044951093079517446
  Sigma Multiplier: 0.5378873566742343
  Initialization Multiplier: 0.676665900143459


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.78it/s, loss=-0.003605, elapsed time=0.04, total time=6.44]
[I 2025-06-07 23:59:30,325] Trial 589 finished with value: -0.00360482764601196 and parameters: {'learning_rate': 0.044951093079517446, 'sigma_multiplier': 0.5378873566742343, 'initialization_multiplier': 0.676665900143459}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 589 final loss: -0.00360483
Trial 590:
  Learning Rate: 0.02092216616045192
  Sigma Multiplier: 0.6683402432893439
  Initialization Multiplier: 0.4758790167976579


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.16it/s, loss=-0.002948, elapsed time=0.04, total time=6.09]
[I 2025-06-07 23:59:36,456] Trial 590 finished with value: -0.0029478094246011867 and parameters: {'learning_rate': 0.02092216616045192, 'sigma_multiplier': 0.6683402432893439, 'initialization_multiplier': 0.4758790167976579}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 590 final loss: -0.00294781
Trial 591:
  Learning Rate: 0.01526316769244029
  Sigma Multiplier: 0.5818634197502305
  Initialization Multiplier: 0.5789595647930651


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.16it/s, loss=-0.003492, elapsed time=0.04, total time=6.34]
[I 2025-06-07 23:59:42,836] Trial 591 finished with value: -0.0034920687048941986 and parameters: {'learning_rate': 0.01526316769244029, 'sigma_multiplier': 0.5818634197502305, 'initialization_multiplier': 0.5789595647930651}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 591 final loss: -0.00349207
Trial 592:
  Learning Rate: 0.029882483883676488
  Sigma Multiplier: 0.5203833405918201
  Initialization Multiplier: 0.6326564503320277


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.64it/s, loss=-0.003875, elapsed time=0.04, total time=6.47]
[I 2025-06-07 23:59:49,347] Trial 592 finished with value: -0.003875399031508417 and parameters: {'learning_rate': 0.029882483883676488, 'sigma_multiplier': 0.5203833405918201, 'initialization_multiplier': 0.6326564503320277}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 592 final loss: -0.00387540
Trial 593:
  Learning Rate: 0.026790071264188087
  Sigma Multiplier: 0.46391555463084466
  Initialization Multiplier: 0.7137495843447699


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.16it/s, loss=-0.003216, elapsed time=0.04, total time=6.61]
[I 2025-06-07 23:59:55,988] Trial 593 finished with value: -0.003216132806866832 and parameters: {'learning_rate': 0.026790071264188087, 'sigma_multiplier': 0.46391555463084466, 'initialization_multiplier': 0.7137495843447699}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 593 final loss: -0.00321613
Trial 594:
  Learning Rate: 0.022603233706307405
  Sigma Multiplier: 0.48598706572077877
  Initialization Multiplier: 0.6454949753776589


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.79it/s, loss=-0.003178, elapsed time=0.04, total time=6.72]
[I 2025-06-08 00:00:02,742] Trial 594 finished with value: -0.003178288610549793 and parameters: {'learning_rate': 0.022603233706307405, 'sigma_multiplier': 0.48598706572077877, 'initialization_multiplier': 0.6454949753776589}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 594 final loss: -0.00317829
Trial 595:
  Learning Rate: 0.03166790166140733
  Sigma Multiplier: 0.5214603114266024
  Initialization Multiplier: 0.6800440244266773


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.47it/s, loss=-0.003935, elapsed time=0.04, total time=6.52]
[I 2025-06-08 00:00:09,298] Trial 595 finished with value: -0.003935129461928887 and parameters: {'learning_rate': 0.03166790166140733, 'sigma_multiplier': 0.5214603114266024, 'initialization_multiplier': 0.6800440244266773}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 595 final loss: -0.00393513
Trial 596:
  Learning Rate: 0.030512614880307053
  Sigma Multiplier: 0.5409393018134506
  Initialization Multiplier: 0.7449787683985514


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.75it/s, loss=-0.003554, elapsed time=0.04, total time=6.45]
[I 2025-06-08 00:00:15,784] Trial 596 finished with value: -0.003553785160888255 and parameters: {'learning_rate': 0.030512614880307053, 'sigma_multiplier': 0.5409393018134506, 'initialization_multiplier': 0.7449787683985514}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 596 final loss: -0.00355379
Trial 597:
  Learning Rate: 0.03343907869673006
  Sigma Multiplier: 0.5082093251345443
  Initialization Multiplier: 0.6923449641318543


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.49it/s, loss=-0.003553, elapsed time=0.04, total time=6.53]
[I 2025-06-08 00:00:22,353] Trial 597 finished with value: -0.0035526455350008137 and parameters: {'learning_rate': 0.03343907869673006, 'sigma_multiplier': 0.5082093251345443, 'initialization_multiplier': 0.6923449641318543}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 597 final loss: -0.00355265
Trial 598:
  Learning Rate: 0.02900812797948065
  Sigma Multiplier: 1.458534784879466
  Initialization Multiplier: 0.6740730348843961


Training Progress: 100%|██████████| 150/150 [00:04<00:00, 30.76it/s, loss=-0.001442, elapsed time=0.03, total time=5.02]
[I 2025-06-08 00:00:27,407] Trial 598 finished with value: -0.0014417140364164603 and parameters: {'learning_rate': 0.02900812797948065, 'sigma_multiplier': 1.458534784879466, 'initialization_multiplier': 0.6740730348843961}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 598 final loss: -0.00144171
Trial 599:
  Learning Rate: 0.03347939296220572
  Sigma Multiplier: 0.5161324584834094
  Initialization Multiplier: 0.7073065562261703


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.45it/s, loss=-0.003616, elapsed time=0.04, total time=6.53]
[I 2025-06-08 00:00:33,980] Trial 599 finished with value: -0.003616130245629882 and parameters: {'learning_rate': 0.03347939296220572, 'sigma_multiplier': 0.5161324584834094, 'initialization_multiplier': 0.7073065562261703}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 599 final loss: -0.00361613
Trial 600:
  Learning Rate: 0.026679293258428445
  Sigma Multiplier: 0.5600002765062552
  Initialization Multiplier: 0.635511464802634


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.93it/s, loss=-0.003908, elapsed time=0.04, total time=6.4] 
[I 2025-06-08 00:00:40,415] Trial 600 finished with value: -0.003907996413153445 and parameters: {'learning_rate': 0.026679293258428445, 'sigma_multiplier': 0.5600002765062552, 'initialization_multiplier': 0.635511464802634}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 600 final loss: -0.00390800
Trial 601:
  Learning Rate: 0.026085553818177492
  Sigma Multiplier: 0.46304051677884717
  Initialization Multiplier: 0.6630598211130286


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.17it/s, loss=-0.003298, elapsed time=0.04, total time=6.62]
[I 2025-06-08 00:00:47,068] Trial 601 finished with value: -0.003297661421513364 and parameters: {'learning_rate': 0.026085553818177492, 'sigma_multiplier': 0.46304051677884717, 'initialization_multiplier': 0.6630598211130286}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 601 final loss: -0.00329766
Trial 602:
  Learning Rate: 0.025236324755541367
  Sigma Multiplier: 0.4177219552543943
  Initialization Multiplier: 0.7739673649426614


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.80it/s, loss=-0.002800, elapsed time=0.04, total time=6.72]
[I 2025-06-08 00:00:53,827] Trial 602 finished with value: -0.0027997119069560067 and parameters: {'learning_rate': 0.025236324755541367, 'sigma_multiplier': 0.4177219552543943, 'initialization_multiplier': 0.7739673649426614}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 602 final loss: -0.00279971
Trial 603:
  Learning Rate: 0.029018405348793977
  Sigma Multiplier: 0.5357708616580095
  Initialization Multiplier: 0.7413367142342442


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.53it/s, loss=-0.003487, elapsed time=0.05, total time=6.51]
[I 2025-06-08 00:01:00,372] Trial 603 finished with value: -0.0034865668836871516 and parameters: {'learning_rate': 0.029018405348793977, 'sigma_multiplier': 0.5357708616580095, 'initialization_multiplier': 0.7413367142342442}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 603 final loss: -0.00348657
Trial 604:
  Learning Rate: 0.02316739083059238
  Sigma Multiplier: 0.48794835627638833
  Initialization Multiplier: 0.6370804495692843


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.13it/s, loss=-0.003236, elapsed time=0.04, total time=6.62]
[I 2025-06-08 00:01:07,031] Trial 604 finished with value: -0.003235608993181904 and parameters: {'learning_rate': 0.02316739083059238, 'sigma_multiplier': 0.48794835627638833, 'initialization_multiplier': 0.6370804495692843}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 604 final loss: -0.00323561
Trial 605:
  Learning Rate: 0.03109868659429603
  Sigma Multiplier: 0.5603196039779176
  Initialization Multiplier: 0.7073723764533603


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.81it/s, loss=-0.003577, elapsed time=0.04, total time=6.44]
[I 2025-06-08 00:01:13,512] Trial 605 finished with value: -0.003576571852401592 and parameters: {'learning_rate': 0.03109868659429603, 'sigma_multiplier': 0.5603196039779176, 'initialization_multiplier': 0.7073723764533603}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 605 final loss: -0.00357657
Trial 606:
  Learning Rate: 0.019387061937578814
  Sigma Multiplier: 0.5113886131856923
  Initialization Multiplier: 0.6414138420067899


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.99it/s, loss=-0.003352, elapsed time=0.04, total time=6.66]
[I 2025-06-08 00:01:20,209] Trial 606 finished with value: -0.0033520879970484444 and parameters: {'learning_rate': 0.019387061937578814, 'sigma_multiplier': 0.5113886131856923, 'initialization_multiplier': 0.6414138420067899}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 606 final loss: -0.00335209
Trial 607:
  Learning Rate: 0.026452370341048644
  Sigma Multiplier: 1.2161244826939417
  Initialization Multiplier: 0.00853128828294214


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 29.44it/s, loss=-0.001572, elapsed time=0.03, total time=5.22]
[I 2025-06-08 00:01:25,469] Trial 607 finished with value: -0.0015715973153236083 and parameters: {'learning_rate': 0.026452370341048644, 'sigma_multiplier': 1.2161244826939417, 'initialization_multiplier': 0.00853128828294214}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 607 final loss: -0.00157160
Trial 608:
  Learning Rate: 0.03711619620031505
  Sigma Multiplier: 0.5579819134583475
  Initialization Multiplier: 0.6153260251988817


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.80it/s, loss=-0.003731, elapsed time=0.04, total time=6.45]
[I 2025-06-08 00:01:31,955] Trial 608 finished with value: -0.003730695395109935 and parameters: {'learning_rate': 0.03711619620031505, 'sigma_multiplier': 0.5579819134583475, 'initialization_multiplier': 0.6153260251988817}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 608 final loss: -0.00373070
Trial 609:
  Learning Rate: 0.02239297705965255
  Sigma Multiplier: 0.4462526970674427
  Initialization Multiplier: 0.6707645262985977


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.80it/s, loss=-0.003104, elapsed time=0.04, total time=6.72]
[I 2025-06-08 00:01:38,714] Trial 609 finished with value: -0.0031038388403602387 and parameters: {'learning_rate': 0.02239297705965255, 'sigma_multiplier': 0.4462526970674427, 'initialization_multiplier': 0.6707645262985977}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 609 final loss: -0.00310384
Trial 610:
  Learning Rate: 0.03211375222741677
  Sigma Multiplier: 0.6016685853349393
  Initialization Multiplier: 0.7358293312039219


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.97it/s, loss=-0.003688, elapsed time=0.04, total time=6.4] 
[I 2025-06-08 00:01:45,149] Trial 610 finished with value: -0.003687607301474647 and parameters: {'learning_rate': 0.03211375222741677, 'sigma_multiplier': 0.6016685853349393, 'initialization_multiplier': 0.7358293312039219}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 610 final loss: -0.00368761
Trial 611:
  Learning Rate: 0.02746147972773299
  Sigma Multiplier: 0.5289093269301741
  Initialization Multiplier: 0.6111292111846872


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.20it/s, loss=-0.003439, elapsed time=0.04, total time=6.6] 
[I 2025-06-08 00:01:51,782] Trial 611 finished with value: -0.0034390975001165496 and parameters: {'learning_rate': 0.02746147972773299, 'sigma_multiplier': 0.5289093269301741, 'initialization_multiplier': 0.6111292111846872}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 611 final loss: -0.00343910
Trial 612:
  Learning Rate: 0.03976741734135711
  Sigma Multiplier: 0.5667544514060363
  Initialization Multiplier: 0.6779698850048957


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.58it/s, loss=-0.003819, elapsed time=0.04, total time=6.49]
[I 2025-06-08 00:01:58,313] Trial 612 finished with value: -0.00381944579372801 and parameters: {'learning_rate': 0.03976741734135711, 'sigma_multiplier': 0.5667544514060363, 'initialization_multiplier': 0.6779698850048957}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 612 final loss: -0.00381945
Trial 613:
  Learning Rate: 0.017427937198396245
  Sigma Multiplier: 0.623761521872995
  Initialization Multiplier: 0.6297127151951668


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.93it/s, loss=-0.003685, elapsed time=0.04, total time=6.4] 
[I 2025-06-08 00:02:04,747] Trial 613 finished with value: -0.0036848345996218067 and parameters: {'learning_rate': 0.017427937198396245, 'sigma_multiplier': 0.623761521872995, 'initialization_multiplier': 0.6297127151951668}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 613 final loss: -0.00368483
Trial 614:
  Learning Rate: 0.0031854214545768682
  Sigma Multiplier: 0.49605786558270504
  Initialization Multiplier: 0.5598092486631447


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.90it/s, loss=-0.002354, elapsed time=0.04, total time=6.69]
[I 2025-06-08 00:02:11,476] Trial 614 finished with value: -0.0023542921587993934 and parameters: {'learning_rate': 0.0031854214545768682, 'sigma_multiplier': 0.49605786558270504, 'initialization_multiplier': 0.5598092486631447}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 614 final loss: -0.00235429
Trial 615:
  Learning Rate: 0.02393258519865473
  Sigma Multiplier: 0.5808724460700905
  Initialization Multiplier: 0.5905472095405864


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.60it/s, loss=-0.003943, elapsed time=0.04, total time=6.49]
[I 2025-06-08 00:02:18,009] Trial 615 finished with value: -0.003943008058572356 and parameters: {'learning_rate': 0.02393258519865473, 'sigma_multiplier': 0.5808724460700905, 'initialization_multiplier': 0.5905472095405864}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 615 final loss: -0.00394301
Trial 616:
  Learning Rate: 0.02047507450641085
  Sigma Multiplier: 0.5455136192938335
  Initialization Multiplier: 0.5064113153886627


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.38it/s, loss=-0.003343, elapsed time=0.04, total time=6.55]
[I 2025-06-08 00:02:24,601] Trial 616 finished with value: -0.0033430342691591094 and parameters: {'learning_rate': 0.02047507450641085, 'sigma_multiplier': 0.5455136192938335, 'initialization_multiplier': 0.5064113153886627}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 616 final loss: -0.00334303
Trial 617:
  Learning Rate: 0.02282292109840155
  Sigma Multiplier: 0.5784125707047907
  Initialization Multiplier: 0.5947772574571859


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.66it/s, loss=-0.003610, elapsed time=0.04, total time=6.48]
[I 2025-06-08 00:02:31,122] Trial 617 finished with value: -0.0036099982630965607 and parameters: {'learning_rate': 0.02282292109840155, 'sigma_multiplier': 0.5784125707047907, 'initialization_multiplier': 0.5947772574571859}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 617 final loss: -0.00361000
Trial 618:
  Learning Rate: 0.016792768503596463
  Sigma Multiplier: 0.5125321360196334
  Initialization Multiplier: 0.6560763085081067


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.08it/s, loss=-0.003154, elapsed time=0.04, total time=6.63]
[I 2025-06-08 00:02:37,792] Trial 618 finished with value: -0.003154105823658644 and parameters: {'learning_rate': 0.016792768503596463, 'sigma_multiplier': 0.5125321360196334, 'initialization_multiplier': 0.6560763085081067}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 618 final loss: -0.00315411
Trial 619:
  Learning Rate: 0.02353892039203109
  Sigma Multiplier: 0.4729491610088645
  Initialization Multiplier: 1.0059971410057047


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.44it/s, loss=-0.000681, elapsed time=0.04, total time=6.82]
[I 2025-06-08 00:02:44,652] Trial 619 finished with value: -0.0006809363096198584 and parameters: {'learning_rate': 0.02353892039203109, 'sigma_multiplier': 0.4729491610088645, 'initialization_multiplier': 1.0059971410057047}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 619 final loss: -0.00068094
Trial 620:
  Learning Rate: 0.019795939637646583
  Sigma Multiplier: 0.6323159703904933
  Initialization Multiplier: 0.5351916406980595


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.31it/s, loss=-0.003568, elapsed time=0.04, total time=6.31]
[I 2025-06-08 00:02:50,994] Trial 620 finished with value: -0.003568433479773204 and parameters: {'learning_rate': 0.019795939637646583, 'sigma_multiplier': 0.6323159703904933, 'initialization_multiplier': 0.5351916406980595}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 620 final loss: -0.00356843
Trial 621:
  Learning Rate: 0.026764451406110782
  Sigma Multiplier: 1.6188110522085406
  Initialization Multiplier: 0.7129292060545898


Training Progress: 100%|██████████| 150/150 [00:04<00:00, 30.52it/s, loss=-0.001171, elapsed time=0.03, total time=5.05]
[I 2025-06-08 00:02:56,081] Trial 621 finished with value: -0.0011705788276775036 and parameters: {'learning_rate': 0.026764451406110782, 'sigma_multiplier': 1.6188110522085406, 'initialization_multiplier': 0.7129292060545898}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 621 final loss: -0.00117058
Trial 622:
  Learning Rate: 0.014305101899817917
  Sigma Multiplier: 0.5919907117791756
  Initialization Multiplier: 0.6171194270355984


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.64it/s, loss=-0.003150, elapsed time=0.04, total time=6.47]
[I 2025-06-08 00:03:02,594] Trial 622 finished with value: -0.0031500310605699065 and parameters: {'learning_rate': 0.014305101899817917, 'sigma_multiplier': 0.5919907117791756, 'initialization_multiplier': 0.6171194270355984}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 622 final loss: -0.00315003
Trial 623:
  Learning Rate: 0.02507425967477896
  Sigma Multiplier: 0.5545171995126281
  Initialization Multiplier: 0.5637695371346186


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.56it/s, loss=-0.003625, elapsed time=0.04, total time=6.51]
[I 2025-06-08 00:03:09,140] Trial 623 finished with value: -0.0036245197204171015 and parameters: {'learning_rate': 0.02507425967477896, 'sigma_multiplier': 0.5545171995126281, 'initialization_multiplier': 0.5637695371346186}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 623 final loss: -0.00362452
Trial 624:
  Learning Rate: 0.019982183319104542
  Sigma Multiplier: 0.5224267350346828
  Initialization Multiplier: 0.6882152835744992


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.18it/s, loss=-0.003559, elapsed time=0.04, total time=6.61]
[I 2025-06-08 00:03:15,786] Trial 624 finished with value: -0.0035591503481414532 and parameters: {'learning_rate': 0.019982183319104542, 'sigma_multiplier': 0.5224267350346828, 'initialization_multiplier': 0.6882152835744992}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 624 final loss: -0.00355915
Trial 625:
  Learning Rate: 0.028827257827067337
  Sigma Multiplier: 0.4289302807689323
  Initialization Multiplier: 0.6430322728534769


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.67it/s, loss=-0.003207, elapsed time=0.04, total time=6.76]
[I 2025-06-08 00:03:22,580] Trial 625 finished with value: -0.003206877488669014 and parameters: {'learning_rate': 0.028827257827067337, 'sigma_multiplier': 0.4289302807689323, 'initialization_multiplier': 0.6430322728534769}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 625 final loss: -0.00320688
Trial 626:
  Learning Rate: 0.03564842338017399
  Sigma Multiplier: 0.6800104698545073
  Initialization Multiplier: 0.5853574398886164


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.87it/s, loss=-0.003681, elapsed time=0.04, total time=6.16]
[I 2025-06-08 00:03:28,777] Trial 626 finished with value: -0.003681436918902893 and parameters: {'learning_rate': 0.03564842338017399, 'sigma_multiplier': 0.6800104698545073, 'initialization_multiplier': 0.5853574398886164}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 626 final loss: -0.00368144
Trial 627:
  Learning Rate: 0.046874168138330265
  Sigma Multiplier: 0.619370143605014
  Initialization Multiplier: 0.431344711838413


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.15it/s, loss=-0.003558, elapsed time=0.04, total time=6.34]
[I 2025-06-08 00:03:35,153] Trial 627 finished with value: -0.003558167241394072 and parameters: {'learning_rate': 0.046874168138330265, 'sigma_multiplier': 0.619370143605014, 'initialization_multiplier': 0.431344711838413}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 627 final loss: -0.00355817
Trial 628:
  Learning Rate: 0.008825718384223424
  Sigma Multiplier: 0.572041139496978
  Initialization Multiplier: 0.5278893367032854


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.80it/s, loss=-0.003140, elapsed time=0.04, total time=6.44]
[I 2025-06-08 00:03:41,626] Trial 628 finished with value: -0.003139789468746001 and parameters: {'learning_rate': 0.008825718384223424, 'sigma_multiplier': 0.572041139496978, 'initialization_multiplier': 0.5278893367032854}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 628 final loss: -0.00313979
Trial 629:
  Learning Rate: 0.023403696899706516
  Sigma Multiplier: 0.4856597847418535
  Initialization Multiplier: 0.7413303685382411


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.03it/s, loss=-0.003512, elapsed time=0.04, total time=6.65]
[I 2025-06-08 00:03:48,311] Trial 629 finished with value: -0.003511597848149951 and parameters: {'learning_rate': 0.023403696899706516, 'sigma_multiplier': 0.4856597847418535, 'initialization_multiplier': 0.7413303685382411}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 629 final loss: -0.00351160
Trial 630:
  Learning Rate: 0.030742866760898255
  Sigma Multiplier: 0.6481228780315603
  Initialization Multiplier: 0.4875553372860314


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.70it/s, loss=-0.003440, elapsed time=0.04, total time=6.22]
[I 2025-06-08 00:03:54,563] Trial 630 finished with value: -0.0034399519611803386 and parameters: {'learning_rate': 0.030742866760898255, 'sigma_multiplier': 0.6481228780315603, 'initialization_multiplier': 0.4875553372860314}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 630 final loss: -0.00343995
Trial 631:
  Learning Rate: 0.03924752928787711
  Sigma Multiplier: 0.5426444361572428
  Initialization Multiplier: 0.6124868535074076


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.45it/s, loss=-0.003570, elapsed time=0.04, total time=6.53]
[I 2025-06-08 00:04:01,133] Trial 631 finished with value: -0.0035704498907141416 and parameters: {'learning_rate': 0.03924752928787711, 'sigma_multiplier': 0.5426444361572428, 'initialization_multiplier': 0.6124868535074076}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 631 final loss: -0.00357045
Trial 632:
  Learning Rate: 0.0197980968382274
  Sigma Multiplier: 0.59754911139264
  Initialization Multiplier: 0.6616988419114088


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.10it/s, loss=-0.003622, elapsed time=0.04, total time=6.35]
[I 2025-06-08 00:04:07,525] Trial 632 finished with value: -0.0036217037215577684 and parameters: {'learning_rate': 0.0197980968382274, 'sigma_multiplier': 0.59754911139264, 'initialization_multiplier': 0.6616988419114088}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 632 final loss: -0.00362170
Trial 633:
  Learning Rate: 0.0277592645105304
  Sigma Multiplier: 0.5225207650670198
  Initialization Multiplier: 0.5572944368003326


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.48it/s, loss=-0.003673, elapsed time=0.04, total time=6.52]
[I 2025-06-08 00:04:14,083] Trial 633 finished with value: -0.003672988716296239 and parameters: {'learning_rate': 0.0277592645105304, 'sigma_multiplier': 0.5225207650670198, 'initialization_multiplier': 0.5572944368003326}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 633 final loss: -0.00367299
Trial 634:
  Learning Rate: 0.00013214697532600445
  Sigma Multiplier: 0.5795063658728323
  Initialization Multiplier: 0.6916348990257711


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.90it/s, loss=0.061656, elapsed time=0.04, total time=6.42]
[I 2025-06-08 00:04:20,539] Trial 634 finished with value: 0.061656078602241426 and parameters: {'learning_rate': 0.00013214697532600445, 'sigma_multiplier': 0.5795063658728323, 'initialization_multiplier': 0.6916348990257711}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 634 final loss: 0.06165608
Trial 635:
  Learning Rate: 0.017513114138599673
  Sigma Multiplier: 0.4551921825101729
  Initialization Multiplier: 0.5948174991945145


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.99it/s, loss=-0.003142, elapsed time=0.04, total time=6.66]
[I 2025-06-08 00:04:27,236] Trial 635 finished with value: -0.003142188794235943 and parameters: {'learning_rate': 0.017513114138599673, 'sigma_multiplier': 0.4551921825101729, 'initialization_multiplier': 0.5948174991945145}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 635 final loss: -0.00314219
Trial 636:
  Learning Rate: 0.03353622609086426
  Sigma Multiplier: 0.630932332571002
  Initialization Multiplier: 0.6494505723438095


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.68it/s, loss=-0.003507, elapsed time=0.04, total time=6.21]
[I 2025-06-08 00:04:33,480] Trial 636 finished with value: -0.003507049265320159 and parameters: {'learning_rate': 0.03353622609086426, 'sigma_multiplier': 0.630932332571002, 'initialization_multiplier': 0.6494505723438095}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 636 final loss: -0.00350705
Trial 637:
  Learning Rate: 0.02400070316396314
  Sigma Multiplier: 0.7075945061328149
  Initialization Multiplier: 0.6162174739226467


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.48it/s, loss=-0.003423, elapsed time=0.04, total time=6.02]
[I 2025-06-08 00:04:39,536] Trial 637 finished with value: -0.003422930473768263 and parameters: {'learning_rate': 0.02400070316396314, 'sigma_multiplier': 0.7075945061328149, 'initialization_multiplier': 0.6162174739226467}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 637 final loss: -0.00342293
Trial 638:
  Learning Rate: 0.011269057632496858
  Sigma Multiplier: 0.5563838245922148
  Initialization Multiplier: 0.5428293583609325


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.90it/s, loss=-0.003161, elapsed time=0.04, total time=6.4] 
[I 2025-06-08 00:04:45,978] Trial 638 finished with value: -0.003161001766162078 and parameters: {'learning_rate': 0.011269057632496858, 'sigma_multiplier': 0.5563838245922148, 'initialization_multiplier': 0.5428293583609325}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 638 final loss: -0.00316100
Trial 639:
  Learning Rate: 0.04661622408248396
  Sigma Multiplier: 0.4886756833204323
  Initialization Multiplier: 0.4961685939118641


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.34it/s, loss=-0.003202, elapsed time=0.04, total time=6.57]
[I 2025-06-08 00:04:52,580] Trial 639 finished with value: -0.0032019056829059966 and parameters: {'learning_rate': 0.04661622408248396, 'sigma_multiplier': 0.4886756833204323, 'initialization_multiplier': 0.4961685939118641}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 639 final loss: -0.00320191
Trial 640:
  Learning Rate: 0.03879321321644867
  Sigma Multiplier: 0.5895547336372251
  Initialization Multiplier: 0.6894456833679101


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.26it/s, loss=-0.003954, elapsed time=0.04, total time=6.31]
[I 2025-06-08 00:04:58,925] Trial 640 finished with value: -0.00395411974705142 and parameters: {'learning_rate': 0.03879321321644867, 'sigma_multiplier': 0.5895547336372251, 'initialization_multiplier': 0.6894456833679101}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 640 final loss: -0.00395412
Trial 641:
  Learning Rate: 0.04750034346600878
  Sigma Multiplier: 0.6763535619522094
  Initialization Multiplier: 0.7255622634129592


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.07it/s, loss=-0.003490, elapsed time=0.04, total time=6.13]
[I 2025-06-08 00:05:05,088] Trial 641 finished with value: -0.0034895675346253716 and parameters: {'learning_rate': 0.04750034346600878, 'sigma_multiplier': 0.6763535619522094, 'initialization_multiplier': 0.7255622634129592}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 641 final loss: -0.00348957
Trial 642:
  Learning Rate: 0.037587077647846315
  Sigma Multiplier: 0.6141662118369295
  Initialization Multiplier: 0.700979051971865


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.52it/s, loss=-0.003425, elapsed time=0.04, total time=6.25]
[I 2025-06-08 00:05:11,378] Trial 642 finished with value: -0.003424840895010998 and parameters: {'learning_rate': 0.037587077647846315, 'sigma_multiplier': 0.6141662118369295, 'initialization_multiplier': 0.700979051971865}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 642 final loss: -0.00342484
Trial 643:
  Learning Rate: 0.041944459400062674
  Sigma Multiplier: 0.6623518435402269
  Initialization Multiplier: 0.7534180558672104


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.94it/s, loss=-0.003560, elapsed time=0.04, total time=6.14]
[I 2025-06-08 00:05:17,554] Trial 643 finished with value: -0.003559716077539306 and parameters: {'learning_rate': 0.041944459400062674, 'sigma_multiplier': 0.6623518435402269, 'initialization_multiplier': 0.7534180558672104}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 643 final loss: -0.00355972
Trial 644:
  Learning Rate: 0.0540027437076787
  Sigma Multiplier: 1.3250257990887573
  Initialization Multiplier: 0.7834080535777039


Training Progress: 100%|██████████| 150/150 [00:04<00:00, 30.37it/s, loss=-0.001545, elapsed time=0.03, total time=5.08]
[I 2025-06-08 00:05:22,668] Trial 644 finished with value: -0.0015447970599325703 and parameters: {'learning_rate': 0.0540027437076787, 'sigma_multiplier': 1.3250257990887573, 'initialization_multiplier': 0.7834080535777039}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 644 final loss: -0.00154480
Trial 645:
  Learning Rate: 0.0003621912473513941
  Sigma Multiplier: 0.5965645884253471
  Initialization Multiplier: 0.6767328467377034


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.27it/s, loss=0.045613, elapsed time=0.04, total time=6.33]
[I 2025-06-08 00:05:29,033] Trial 645 finished with value: 0.04561281169103732 and parameters: {'learning_rate': 0.0003621912473513941, 'sigma_multiplier': 0.5965645884253471, 'initialization_multiplier': 0.6767328467377034}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 645 final loss: 0.04561281
Trial 646:
  Learning Rate: 0.04420815214616421
  Sigma Multiplier: 0.6358697289635631
  Initialization Multiplier: 0.72075725411314


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.89it/s, loss=-0.003550, elapsed time=0.04, total time=6.16]
[I 2025-06-08 00:05:35,230] Trial 646 finished with value: -0.003550281731744055 and parameters: {'learning_rate': 0.04420815214616421, 'sigma_multiplier': 0.6358697289635631, 'initialization_multiplier': 0.72075725411314}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 646 final loss: -0.00355028
Trial 647:
  Learning Rate: 0.03554391001784089
  Sigma Multiplier: 0.5875537563086026
  Initialization Multiplier: 0.5714523557336323


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.27it/s, loss=-0.003708, elapsed time=0.04, total time=6.31]
[I 2025-06-08 00:05:41,581] Trial 647 finished with value: -0.003708260064186617 and parameters: {'learning_rate': 0.03554391001784089, 'sigma_multiplier': 0.5875537563086026, 'initialization_multiplier': 0.5714523557336323}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 647 final loss: -0.00370826
Trial 648:
  Learning Rate: 0.01609528905419023
  Sigma Multiplier: 0.5592684428934033
  Initialization Multiplier: 0.4602490729591699


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.00it/s, loss=-0.003118, elapsed time=0.04, total time=6.39]
[I 2025-06-08 00:05:48,011] Trial 648 finished with value: -0.0031182384864943957 and parameters: {'learning_rate': 0.01609528905419023, 'sigma_multiplier': 0.5592684428934033, 'initialization_multiplier': 0.4602490729591699}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 648 final loss: -0.00311824
Trial 649:
  Learning Rate: 0.04156121768183422
  Sigma Multiplier: 0.621966412375156
  Initialization Multiplier: 0.6746339333026453


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.59it/s, loss=-0.003875, elapsed time=0.04, total time=6.25]
[I 2025-06-08 00:05:54,299] Trial 649 finished with value: -0.003874971499022028 and parameters: {'learning_rate': 0.04156121768183422, 'sigma_multiplier': 0.621966412375156, 'initialization_multiplier': 0.6746339333026453}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 649 final loss: -0.00387497
Trial 650:
  Learning Rate: 0.03263189433384681
  Sigma Multiplier: 0.6523705009979688
  Initialization Multiplier: 0.5967695140621304


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.89it/s, loss=-0.003456, elapsed time=0.04, total time=6.17]
[I 2025-06-08 00:06:00,504] Trial 650 finished with value: -0.0034560987225348036 and parameters: {'learning_rate': 0.03263189433384681, 'sigma_multiplier': 0.6523705009979688, 'initialization_multiplier': 0.5967695140621304}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 650 final loss: -0.00345610
Trial 651:
  Learning Rate: 0.05741915086852185
  Sigma Multiplier: 0.5886775314997218
  Initialization Multiplier: 0.6354515168597266


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.29it/s, loss=-0.003730, elapsed time=0.04, total time=6.31]
[I 2025-06-08 00:06:06,851] Trial 651 finished with value: -0.003729608489817568 and parameters: {'learning_rate': 0.05741915086852185, 'sigma_multiplier': 0.5886775314997218, 'initialization_multiplier': 0.6354515168597266}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 651 final loss: -0.00372961
Trial 652:
  Learning Rate: 0.021822621633914182
  Sigma Multiplier: 0.7120622642944209
  Initialization Multiplier: 0.5216038642505225


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.42it/s, loss=-0.003126, elapsed time=0.04, total time=6.03]
[I 2025-06-08 00:06:12,921] Trial 652 finished with value: -0.003126351538725997 and parameters: {'learning_rate': 0.021822621633914182, 'sigma_multiplier': 0.7120622642944209, 'initialization_multiplier': 0.5216038642505225}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 652 final loss: -0.00312635
Trial 653:
  Learning Rate: 0.025404423289411445
  Sigma Multiplier: 0.5513492077753506
  Initialization Multiplier: 0.6599133022387926


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.83it/s, loss=-0.003982, elapsed time=0.04, total time=6.42]
[I 2025-06-08 00:06:19,380] Trial 653 finished with value: -0.003981597330530054 and parameters: {'learning_rate': 0.025404423289411445, 'sigma_multiplier': 0.5513492077753506, 'initialization_multiplier': 0.6599133022387926}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 653 final loss: -0.00398160
Trial 654:
  Learning Rate: 0.019250947733201178
  Sigma Multiplier: 1.9244073799661154
  Initialization Multiplier: 0.9401621444968303


Training Progress: 100%|██████████| 150/150 [00:04<00:00, 31.74it/s, loss=0.002483, elapsed time=0.03, total time=4.86]
[I 2025-06-08 00:06:24,273] Trial 654 finished with value: 0.0024830254804904693 and parameters: {'learning_rate': 0.019250947733201178, 'sigma_multiplier': 1.9244073799661154, 'initialization_multiplier': 0.9401621444968303}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 654 final loss: 0.00248303
Trial 655:
  Learning Rate: 0.0014344109502591811
  Sigma Multiplier: 0.5428053296949349
  Initialization Multiplier: 0.7661003200946008


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.64it/s, loss=0.001771, elapsed time=0.04, total time=6.49]
[I 2025-06-08 00:06:30,800] Trial 655 finished with value: 0.0017709120039562638 and parameters: {'learning_rate': 0.0014344109502591811, 'sigma_multiplier': 0.5428053296949349, 'initialization_multiplier': 0.7661003200946008}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 655 final loss: 0.00177091
Trial 656:
  Learning Rate: 0.025556753815464933
  Sigma Multiplier: 0.5010524425207099
  Initialization Multiplier: 0.7236420836421474


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.37it/s, loss=-0.003435, elapsed time=0.04, total time=6.55]
[I 2025-06-08 00:06:37,384] Trial 656 finished with value: -0.0034353175889627378 and parameters: {'learning_rate': 0.025556753815464933, 'sigma_multiplier': 0.5010524425207099, 'initialization_multiplier': 0.7236420836421474}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 656 final loss: -0.00343532
Trial 657:
  Learning Rate: 0.02154018724062854
  Sigma Multiplier: 0.5693997798988981
  Initialization Multiplier: 0.7013418675738488


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.46it/s, loss=-0.003838, elapsed time=0.04, total time=6.54]
[I 2025-06-08 00:06:43,959] Trial 657 finished with value: -0.0038375698086404553 and parameters: {'learning_rate': 0.02154018724062854, 'sigma_multiplier': 0.5693997798988981, 'initialization_multiplier': 0.7013418675738488}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 657 final loss: -0.00383757
Trial 658:
  Learning Rate: 0.013904748406392359
  Sigma Multiplier: 0.5133667300447714
  Initialization Multiplier: 0.6740845145622945


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.65it/s, loss=-0.003410, elapsed time=0.04, total time=6.76]
[I 2025-06-08 00:06:50,752] Trial 658 finished with value: -0.003409925216207177 and parameters: {'learning_rate': 0.013904748406392359, 'sigma_multiplier': 0.5133667300447714, 'initialization_multiplier': 0.6740845145622945}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 658 final loss: -0.00340993
Trial 659:
  Learning Rate: 0.02480657508727048
  Sigma Multiplier: 0.5522430779927764
  Initialization Multiplier: 0.6528051203981602


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.57it/s, loss=-0.003594, elapsed time=0.04, total time=6.5] 
[I 2025-06-08 00:06:57,294] Trial 659 finished with value: -0.0035936744619741922 and parameters: {'learning_rate': 0.02480657508727048, 'sigma_multiplier': 0.5522430779927764, 'initialization_multiplier': 0.6528051203981602}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 659 final loss: -0.00359367
Trial 660:
  Learning Rate: 7.129129961414054e-05
  Sigma Multiplier: 0.5440934748411093
  Initialization Multiplier: 0.6562910939591325


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.46it/s, loss=0.054138, elapsed time=0.04, total time=6.53]
[I 2025-06-08 00:07:03,861] Trial 660 finished with value: 0.05413817677658157 and parameters: {'learning_rate': 7.129129961414054e-05, 'sigma_multiplier': 0.5440934748411093, 'initialization_multiplier': 0.6562910939591325}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 660 final loss: 0.05413818
Trial 661:
  Learning Rate: 0.026563792094766687
  Sigma Multiplier: 0.6071107621157508
  Initialization Multiplier: 0.7960443343812988


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.47it/s, loss=-0.003767, elapsed time=0.04, total time=6.26]
[I 2025-06-08 00:07:10,167] Trial 661 finished with value: -0.0037671604888594385 and parameters: {'learning_rate': 0.026563792094766687, 'sigma_multiplier': 0.6071107621157508, 'initialization_multiplier': 0.7960443343812988}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 661 final loss: -0.00376716
Trial 662:
  Learning Rate: 0.017344648037614804
  Sigma Multiplier: 1.739257529709789
  Initialization Multiplier: 0.7211075682125012


Training Progress: 100%|██████████| 150/150 [00:04<00:00, 31.63it/s, loss=-0.000982, elapsed time=0.03, total time=4.87]
[I 2025-06-08 00:07:15,078] Trial 662 finished with value: -0.0009815525498152954 and parameters: {'learning_rate': 0.017344648037614804, 'sigma_multiplier': 1.739257529709789, 'initialization_multiplier': 0.7211075682125012}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 662 final loss: -0.00098155
Trial 663:
  Learning Rate: 0.021714812967590688
  Sigma Multiplier: 0.47271901621164925
  Initialization Multiplier: 0.6219737365516499


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.31it/s, loss=-0.003259, elapsed time=0.04, total time=6.57]
[I 2025-06-08 00:07:21,692] Trial 663 finished with value: -0.0032589676935492105 and parameters: {'learning_rate': 0.021714812967590688, 'sigma_multiplier': 0.47271901621164925, 'initialization_multiplier': 0.6219737365516499}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 663 final loss: -0.00325897
Trial 664:
  Learning Rate: 0.030036766316206703
  Sigma Multiplier: 0.5221996073544369
  Initialization Multiplier: 0.6760556055673609


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.62it/s, loss=-0.003864, elapsed time=0.04, total time=6.49]
[I 2025-06-08 00:07:28,215] Trial 664 finished with value: -0.0038643851034236235 and parameters: {'learning_rate': 0.030036766316206703, 'sigma_multiplier': 0.5221996073544369, 'initialization_multiplier': 0.6760556055673609}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 664 final loss: -0.00386439
Trial 665:
  Learning Rate: 0.004085639763690239
  Sigma Multiplier: 0.5802373346582282
  Initialization Multiplier: 0.6216093216560266


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.02it/s, loss=-0.002688, elapsed time=0.04, total time=6.38]
[I 2025-06-08 00:07:34,636] Trial 665 finished with value: -0.0026879055737609515 and parameters: {'learning_rate': 0.004085639763690239, 'sigma_multiplier': 0.5802373346582282, 'initialization_multiplier': 0.6216093216560266}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 665 final loss: -0.00268791
Trial 666:
  Learning Rate: 0.026944365724535253
  Sigma Multiplier: 0.6478119745143256
  Initialization Multiplier: 0.6909997539741897


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.80it/s, loss=-0.003517, elapsed time=0.04, total time=6.18]
[I 2025-06-08 00:07:40,855] Trial 666 finished with value: -0.0035172615415954953 and parameters: {'learning_rate': 0.026944365724535253, 'sigma_multiplier': 0.6478119745143256, 'initialization_multiplier': 0.6909997539741897}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 666 final loss: -0.00351726
Trial 667:
  Learning Rate: 0.018501306775093273
  Sigma Multiplier: 0.5600129052212097
  Initialization Multiplier: 0.594513732443527


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.94it/s, loss=-0.003797, elapsed time=0.04, total time=6.41]
[I 2025-06-08 00:07:47,299] Trial 667 finished with value: -0.0037972938974175224 and parameters: {'learning_rate': 0.018501306775093273, 'sigma_multiplier': 0.5600129052212097, 'initialization_multiplier': 0.594513732443527}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 667 final loss: -0.00379729
Trial 668:
  Learning Rate: 0.022970343654785015
  Sigma Multiplier: 0.6153445729621744
  Initialization Multiplier: 0.38136556686188483


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.47it/s, loss=-0.003058, elapsed time=0.04, total time=6.27]
[I 2025-06-08 00:07:53,607] Trial 668 finished with value: -0.0030581090791441845 and parameters: {'learning_rate': 0.022970343654785015, 'sigma_multiplier': 0.6153445729621744, 'initialization_multiplier': 0.38136556686188483}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 668 final loss: -0.00305811
Trial 669:
  Learning Rate: 0.03149782435723498
  Sigma Multiplier: 0.5015771001016341
  Initialization Multiplier: 0.7618902875600456


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.54it/s, loss=-0.003324, elapsed time=0.04, total time=6.79]
[I 2025-06-08 00:08:00,446] Trial 669 finished with value: -0.0033242093857503508 and parameters: {'learning_rate': 0.03149782435723498, 'sigma_multiplier': 0.5015771001016341, 'initialization_multiplier': 0.7618902875600456}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 669 final loss: -0.00332421
Trial 670:
  Learning Rate: 0.028464273985566252
  Sigma Multiplier: 0.5441370623005891
  Initialization Multiplier: 0.6495101895626816


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.66it/s, loss=-0.003933, elapsed time=0.04, total time=6.5] 
[I 2025-06-08 00:08:06,993] Trial 670 finished with value: -0.003932893418088143 and parameters: {'learning_rate': 0.028464273985566252, 'sigma_multiplier': 0.5441370623005891, 'initialization_multiplier': 0.6495101895626816}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 670 final loss: -0.00393289
Trial 671:
  Learning Rate: 0.024475521974036022
  Sigma Multiplier: 0.4807335574740847
  Initialization Multiplier: 0.6534315418727694


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.17it/s, loss=-0.003371, elapsed time=0.04, total time=6.61]
[I 2025-06-08 00:08:13,646] Trial 671 finished with value: -0.0033714867686203223 and parameters: {'learning_rate': 0.024475521974036022, 'sigma_multiplier': 0.4807335574740847, 'initialization_multiplier': 0.6534315418727694}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 671 final loss: -0.00337149
Trial 672:
  Learning Rate: 0.028820763225244215
  Sigma Multiplier: 0.4137219537464861
  Initialization Multiplier: 0.7042938332841779


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.74it/s, loss=-0.002519, elapsed time=0.04, total time=6.73]
[I 2025-06-08 00:08:20,413] Trial 672 finished with value: -0.002518550387475499 and parameters: {'learning_rate': 0.028820763225244215, 'sigma_multiplier': 0.4137219537464861, 'initialization_multiplier': 0.7042938332841779}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 672 final loss: -0.00251855
Trial 673:
  Learning Rate: 0.01984434847127923
  Sigma Multiplier: 0.5352560694792199
  Initialization Multiplier: 0.6447820800228339


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.59it/s, loss=-0.003124, elapsed time=0.04, total time=6.49]
[I 2025-06-08 00:08:26,942] Trial 673 finished with value: -0.003124340374278792 and parameters: {'learning_rate': 0.01984434847127923, 'sigma_multiplier': 0.5352560694792199, 'initialization_multiplier': 0.6447820800228339}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 673 final loss: -0.00312434
Trial 674:
  Learning Rate: 0.01560381242062952
  Sigma Multiplier: 0.45373945306320373
  Initialization Multiplier: 0.7384385857564248


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.69it/s, loss=-0.003090, elapsed time=0.04, total time=6.75]
[I 2025-06-08 00:08:33,730] Trial 674 finished with value: -0.0030900514263255846 and parameters: {'learning_rate': 0.01560381242062952, 'sigma_multiplier': 0.45373945306320373, 'initialization_multiplier': 0.7384385857564248}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 674 final loss: -0.00309005
Trial 675:
  Learning Rate: 0.026798989790411374
  Sigma Multiplier: 0.5221225928788288
  Initialization Multiplier: 0.6141244977728223


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.00it/s, loss=-0.003745, elapsed time=0.04, total time=6.65]
[I 2025-06-08 00:08:40,419] Trial 675 finished with value: -0.0037452411343899075 and parameters: {'learning_rate': 0.026798989790411374, 'sigma_multiplier': 0.5221225928788288, 'initialization_multiplier': 0.6141244977728223}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 675 final loss: -0.00374524
Trial 676:
  Learning Rate: 0.002004351306262618
  Sigma Multiplier: 0.5631773298693495
  Initialization Multiplier: 0.6828620184535288


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.77it/s, loss=-0.002238, elapsed time=0.04, total time=6.45]
[I 2025-06-08 00:08:46,912] Trial 676 finished with value: -0.0022381786308152117 and parameters: {'learning_rate': 0.002004351306262618, 'sigma_multiplier': 0.5631773298693495, 'initialization_multiplier': 0.6828620184535288}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 676 final loss: -0.00223818
Trial 677:
  Learning Rate: 0.021617456346715178
  Sigma Multiplier: 0.4436072092534172
  Initialization Multiplier: 0.6376418834224342


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.07it/s, loss=-0.003258, elapsed time=0.04, total time=6.65]
[I 2025-06-08 00:08:53,596] Trial 677 finished with value: -0.003257860316429963 and parameters: {'learning_rate': 0.021617456346715178, 'sigma_multiplier': 0.4436072092534172, 'initialization_multiplier': 0.6376418834224342}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 677 final loss: -0.00325786
Trial 678:
  Learning Rate: 0.03199702137609929
  Sigma Multiplier: 0.48664421756660325
  Initialization Multiplier: 0.6759764301225628


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.32it/s, loss=-0.003361, elapsed time=0.04, total time=6.57]
[I 2025-06-08 00:09:00,202] Trial 678 finished with value: -0.0033609880907107697 and parameters: {'learning_rate': 0.03199702137609929, 'sigma_multiplier': 0.48664421756660325, 'initialization_multiplier': 0.6759764301225628}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 678 final loss: -0.00336099
Trial 679:
  Learning Rate: 0.024198771145498253
  Sigma Multiplier: 0.5346164034149382
  Initialization Multiplier: 0.588262371501441


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.62it/s, loss=-0.003848, elapsed time=0.04, total time=6.49]
[I 2025-06-08 00:09:06,729] Trial 679 finished with value: -0.003848171410982557 and parameters: {'learning_rate': 0.024198771145498253, 'sigma_multiplier': 0.5346164034149382, 'initialization_multiplier': 0.588262371501441}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 679 final loss: -0.00384817
Trial 680:
  Learning Rate: 0.027971066426726678
  Sigma Multiplier: 0.5859046981725539
  Initialization Multiplier: 0.7313660131044751


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.08it/s, loss=-0.003910, elapsed time=0.04, total time=6.37]
[I 2025-06-08 00:09:13,136] Trial 680 finished with value: -0.0039101156178457585 and parameters: {'learning_rate': 0.027971066426726678, 'sigma_multiplier': 0.5859046981725539, 'initialization_multiplier': 0.7313660131044751}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 680 final loss: -0.00391012
Trial 681:
  Learning Rate: 0.026664837223876938
  Sigma Multiplier: 0.3738766676898039
  Initialization Multiplier: 0.7476112305291799


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.67it/s, loss=-0.002276, elapsed time=0.04, total time=6.76]
[I 2025-06-08 00:09:19,933] Trial 681 finished with value: -0.002275917141265018 and parameters: {'learning_rate': 0.026664837223876938, 'sigma_multiplier': 0.3738766676898039, 'initialization_multiplier': 0.7476112305291799}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 681 final loss: -0.00227592
Trial 682:
  Learning Rate: 0.02075515129053029
  Sigma Multiplier: 0.574297628546929
  Initialization Multiplier: 0.7830699171130517


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.08it/s, loss=-0.003648, elapsed time=0.04, total time=6.38]
[I 2025-06-08 00:09:26,348] Trial 682 finished with value: -0.003648255090569835 and parameters: {'learning_rate': 0.02075515129053029, 'sigma_multiplier': 0.574297628546929, 'initialization_multiplier': 0.7830699171130517}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 682 final loss: -0.00364826
Trial 683:
  Learning Rate: 0.029638767684251978
  Sigma Multiplier: 0.5180557532356795
  Initialization Multiplier: 0.8446490902202983


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.29it/s, loss=-0.003568, elapsed time=0.04, total time=6.58]
[I 2025-06-08 00:09:32,970] Trial 683 finished with value: -0.003567836381842685 and parameters: {'learning_rate': 0.029638767684251978, 'sigma_multiplier': 0.5180557532356795, 'initialization_multiplier': 0.8446490902202983}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 683 final loss: -0.00356784
Trial 684:
  Learning Rate: 0.02349460722656693
  Sigma Multiplier: 0.5636319474757182
  Initialization Multiplier: 0.7032191358367005


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.93it/s, loss=-0.003618, elapsed time=0.04, total time=6.41]
[I 2025-06-08 00:09:39,411] Trial 684 finished with value: -0.0036183320594103236 and parameters: {'learning_rate': 0.02349460722656693, 'sigma_multiplier': 0.5636319474757182, 'initialization_multiplier': 0.7032191358367005}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 684 final loss: -0.00361833
Trial 685:
  Learning Rate: 0.00575963774691895
  Sigma Multiplier: 0.49638039775205245
  Initialization Multiplier: 0.6372026205489155


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.32it/s, loss=-0.002707, elapsed time=0.04, total time=6.57]
[I 2025-06-08 00:09:46,014] Trial 685 finished with value: -0.0027073609193387763 and parameters: {'learning_rate': 0.00575963774691895, 'sigma_multiplier': 0.49638039775205245, 'initialization_multiplier': 0.6372026205489155}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 685 final loss: -0.00270736
Trial 686:
  Learning Rate: 0.018286282788990207
  Sigma Multiplier: 0.5905604572091462
  Initialization Multiplier: 0.728813011818626


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.09it/s, loss=-0.003576, elapsed time=0.04, total time=6.37]
[I 2025-06-08 00:09:52,416] Trial 686 finished with value: -0.003576357449264511 and parameters: {'learning_rate': 0.018286282788990207, 'sigma_multiplier': 0.5905604572091462, 'initialization_multiplier': 0.728813011818626}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 686 final loss: -0.00357636
Trial 687:
  Learning Rate: 0.028028795085724055
  Sigma Multiplier: 0.5433395936731259
  Initialization Multiplier: 0.5673554767110367


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.67it/s, loss=-0.003752, elapsed time=0.04, total time=6.47]
[I 2025-06-08 00:09:58,925] Trial 687 finished with value: -0.003752322027107982 and parameters: {'learning_rate': 0.028028795085724055, 'sigma_multiplier': 0.5433395936731259, 'initialization_multiplier': 0.5673554767110367}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 687 final loss: -0.00375232
Trial 688:
  Learning Rate: 0.022791609633567123
  Sigma Multiplier: 0.6013806445205625
  Initialization Multiplier: 0.6122133095831674


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.01it/s, loss=-0.003783, elapsed time=0.04, total time=6.39]
[I 2025-06-08 00:10:05,354] Trial 688 finished with value: -0.003782953732557743 and parameters: {'learning_rate': 0.022791609633567123, 'sigma_multiplier': 0.6013806445205625, 'initialization_multiplier': 0.6122133095831674}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 688 final loss: -0.00378295
Trial 689:
  Learning Rate: 0.031680981897586405
  Sigma Multiplier: 0.4575065613218551
  Initialization Multiplier: 0.6740462905291343


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 18.21it/s, loss=-0.003136, elapsed time=0.04, total time=8.37]
[I 2025-06-08 00:10:13,764] Trial 689 finished with value: -0.003136064992038095 and parameters: {'learning_rate': 0.031680981897586405, 'sigma_multiplier': 0.4575065613218551, 'initialization_multiplier': 0.6740462905291343}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 689 final loss: -0.00313606
Trial 690:
  Learning Rate: 0.026095351839292385
  Sigma Multiplier: 0.5440017675756461
  Initialization Multiplier: 0.5900279690404052


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.63it/s, loss=-0.003693, elapsed time=0.04, total time=6.49]
[I 2025-06-08 00:10:20,289] Trial 690 finished with value: -0.003693110503296621 and parameters: {'learning_rate': 0.026095351839292385, 'sigma_multiplier': 0.5440017675756461, 'initialization_multiplier': 0.5900279690404052}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 690 final loss: -0.00369311
Trial 691:
  Learning Rate: 0.017804626679047163
  Sigma Multiplier: 0.5043115883168057
  Initialization Multiplier: 0.6494447023131424


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.39it/s, loss=-0.003448, elapsed time=0.04, total time=6.55]
[I 2025-06-08 00:10:26,876] Trial 691 finished with value: -0.0034484718643997213 and parameters: {'learning_rate': 0.017804626679047163, 'sigma_multiplier': 0.5043115883168057, 'initialization_multiplier': 0.6494447023131424}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 691 final loss: -0.00344847
Trial 692:
  Learning Rate: 0.034019421547980086
  Sigma Multiplier: 0.6259902156072074
  Initialization Multiplier: 0.7133420170149947


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.66it/s, loss=-0.003803, elapsed time=0.04, total time=6.22]
[I 2025-06-08 00:10:33,138] Trial 692 finished with value: -0.0038033375206992827 and parameters: {'learning_rate': 0.034019421547980086, 'sigma_multiplier': 0.6259902156072074, 'initialization_multiplier': 0.7133420170149947}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 692 final loss: -0.00380334
Trial 693:
  Learning Rate: 0.020853948703386627
  Sigma Multiplier: 0.9605669383721189
  Initialization Multiplier: 0.8029388282931641


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 27.86it/s, loss=-0.002360, elapsed time=0.03, total time=5.53]
[I 2025-06-08 00:10:38,706] Trial 693 finished with value: -0.0023604817977583487 and parameters: {'learning_rate': 0.020853948703386627, 'sigma_multiplier': 0.9605669383721189, 'initialization_multiplier': 0.8029388282931641}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 693 final loss: -0.00236048
Trial 694:
  Learning Rate: 0.01606816022659168
  Sigma Multiplier: 0.5803813124559143
  Initialization Multiplier: 0.6127907845387343


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.12it/s, loss=-0.003524, elapsed time=0.04, total time=6.35]
[I 2025-06-08 00:10:45,098] Trial 694 finished with value: -0.0035235294116268048 and parameters: {'learning_rate': 0.01606816022659168, 'sigma_multiplier': 0.5803813124559143, 'initialization_multiplier': 0.6127907845387343}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 694 final loss: -0.00352353
Trial 695:
  Learning Rate: 0.028198575815693346
  Sigma Multiplier: 0.5225793222331518
  Initialization Multiplier: 0.5641738830515887


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.58it/s, loss=-0.003887, elapsed time=0.04, total time=6.49]
[I 2025-06-08 00:10:51,630] Trial 695 finished with value: -0.0038866317245494584 and parameters: {'learning_rate': 0.028198575815693346, 'sigma_multiplier': 0.5225793222331518, 'initialization_multiplier': 0.5641738830515887}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 695 final loss: -0.00388663
Trial 696:
  Learning Rate: 0.034558360706674236
  Sigma Multiplier: 0.5584394452809422
  Initialization Multiplier: 0.6805661532441872


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.95it/s, loss=-0.004059, elapsed time=0.04, total time=6.4] 
[I 2025-06-08 00:10:58,071] Trial 696 finished with value: -0.004058597898419899 and parameters: {'learning_rate': 0.034558360706674236, 'sigma_multiplier': 0.5584394452809422, 'initialization_multiplier': 0.6805661532441872}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 696 final loss: -0.00405860
Trial 697:
  Learning Rate: 0.022696584231209425
  Sigma Multiplier: 0.6133359523748604
  Initialization Multiplier: 0.7295318052834869


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.02it/s, loss=-0.003798, elapsed time=0.04, total time=6.38]
[I 2025-06-08 00:11:04,491] Trial 697 finished with value: -0.003798248202725837 and parameters: {'learning_rate': 0.022696584231209425, 'sigma_multiplier': 0.6133359523748604, 'initialization_multiplier': 0.7295318052834869}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 697 final loss: -0.00379825
Trial 698:
  Learning Rate: 0.03325623654541077
  Sigma Multiplier: 0.5746149348449597
  Initialization Multiplier: 0.689997783741958


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.97it/s, loss=-0.003688, elapsed time=0.04, total time=6.39]
[I 2025-06-08 00:11:10,921] Trial 698 finished with value: -0.00368789223837283 and parameters: {'learning_rate': 0.03325623654541077, 'sigma_multiplier': 0.5746149348449597, 'initialization_multiplier': 0.689997783741958}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 698 final loss: -0.00368789
Trial 699:
  Learning Rate: 0.026087718564547797
  Sigma Multiplier: 0.6541844425897748
  Initialization Multiplier: 0.7681017716864125


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.77it/s, loss=-0.003538, elapsed time=0.04, total time=6.19]
[I 2025-06-08 00:11:17,145] Trial 699 finished with value: -0.00353782545188522 and parameters: {'learning_rate': 0.026087718564547797, 'sigma_multiplier': 0.6541844425897748, 'initialization_multiplier': 0.7681017716864125}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 699 final loss: -0.00353783
Trial 700:
  Learning Rate: 0.012839252077196802
  Sigma Multiplier: 0.4767115715267415
  Initialization Multiplier: 1.1838265918300104


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.70it/s, loss=0.009964, elapsed time=0.04, total time=6.74]
[I 2025-06-08 00:11:23,924] Trial 700 finished with value: 0.00996444826350951 and parameters: {'learning_rate': 0.012839252077196802, 'sigma_multiplier': 0.4767115715267415, 'initialization_multiplier': 1.1838265918300104}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 700 final loss: 0.00996445
Trial 701:
  Learning Rate: 0.029478229421543427
  Sigma Multiplier: 0.5535581213897611
  Initialization Multiplier: 0.681670290584024


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.78it/s, loss=-0.003851, elapsed time=0.04, total time=6.45]
[I 2025-06-08 00:11:30,408] Trial 701 finished with value: -0.0038509141756986236 and parameters: {'learning_rate': 0.029478229421543427, 'sigma_multiplier': 0.5535581213897611, 'initialization_multiplier': 0.681670290584024}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 701 final loss: -0.00385091
Trial 702:
  Learning Rate: 0.02461984354443344
  Sigma Multiplier: 0.6025525369714853
  Initialization Multiplier: 0.7424331541380315


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.31it/s, loss=-0.003715, elapsed time=0.04, total time=6.31]
[I 2025-06-08 00:11:36,757] Trial 702 finished with value: -0.0037148219979894963 and parameters: {'learning_rate': 0.02461984354443344, 'sigma_multiplier': 0.6025525369714853, 'initialization_multiplier': 0.7424331541380315}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 702 final loss: -0.00371482
Trial 703:
  Learning Rate: 0.02004331480618929
  Sigma Multiplier: 0.5272405804781385
  Initialization Multiplier: 0.6458542610298362


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.60it/s, loss=-0.003597, elapsed time=0.04, total time=6.48]
[I 2025-06-08 00:11:43,278] Trial 703 finished with value: -0.003596758268094532 and parameters: {'learning_rate': 0.02004331480618929, 'sigma_multiplier': 0.5272405804781385, 'initialization_multiplier': 0.6458542610298362}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 703 final loss: -0.00359676
Trial 704:
  Learning Rate: 0.03477830779390476
  Sigma Multiplier: 0.4220412764958376
  Initialization Multiplier: 1.109925894638351


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.35it/s, loss=-0.000423, elapsed time=0.04, total time=6.84]
[I 2025-06-08 00:11:50,159] Trial 704 finished with value: -0.0004227698491985441 and parameters: {'learning_rate': 0.03477830779390476, 'sigma_multiplier': 0.4220412764958376, 'initialization_multiplier': 1.109925894638351}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 704 final loss: -0.00042277
Trial 705:
  Learning Rate: 0.02844116344818244
  Sigma Multiplier: 0.582006167598866
  Initialization Multiplier: 0.713646693764793


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.07it/s, loss=-0.003499, elapsed time=0.04, total time=6.36]
[I 2025-06-08 00:11:56,561] Trial 705 finished with value: -0.003499332746275033 and parameters: {'learning_rate': 0.02844116344818244, 'sigma_multiplier': 0.582006167598866, 'initialization_multiplier': 0.713646693764793}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 705 final loss: -0.00349933
Trial 706:
  Learning Rate: 0.022781945360451843
  Sigma Multiplier: 0.6367080828828668
  Initialization Multiplier: 0.6641757012095497


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.60it/s, loss=-0.003615, elapsed time=0.04, total time=6.24]
[I 2025-06-08 00:12:02,838] Trial 706 finished with value: -0.003615327387965201 and parameters: {'learning_rate': 0.022781945360451843, 'sigma_multiplier': 0.6367080828828668, 'initialization_multiplier': 0.6641757012095497}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 706 final loss: -0.00361533
Trial 707:
  Learning Rate: 0.033662813879794716
  Sigma Multiplier: 0.5047414166167912
  Initialization Multiplier: 0.6368960271302507


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.29it/s, loss=-0.003904, elapsed time=0.04, total time=6.58]
[I 2025-06-08 00:12:09,460] Trial 707 finished with value: -0.003904040480416854 and parameters: {'learning_rate': 0.033662813879794716, 'sigma_multiplier': 0.5047414166167912, 'initialization_multiplier': 0.6368960271302507}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 707 final loss: -0.00390404
Trial 708:
  Learning Rate: 0.03561760736992612
  Sigma Multiplier: 0.6832773000112016
  Initialization Multiplier: 0.6875490025640092


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.97it/s, loss=-0.003603, elapsed time=0.04, total time=6.14]
[I 2025-06-08 00:12:15,640] Trial 708 finished with value: -0.0036032085405515953 and parameters: {'learning_rate': 0.03561760736992612, 'sigma_multiplier': 0.6832773000112016, 'initialization_multiplier': 0.6875490025640092}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 708 final loss: -0.00360321
Trial 709:
  Learning Rate: 0.033690061478748284
  Sigma Multiplier: 0.5561191200067034
  Initialization Multiplier: 0.6490739162128217


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.85it/s, loss=-0.003462, elapsed time=0.04, total time=6.43]
[I 2025-06-08 00:12:22,103] Trial 709 finished with value: -0.00346190830206064 and parameters: {'learning_rate': 0.033690061478748284, 'sigma_multiplier': 0.5561191200067034, 'initialization_multiplier': 0.6490739162128217}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 709 final loss: -0.00346191
Trial 710:
  Learning Rate: 0.03691676294758051
  Sigma Multiplier: 0.48463386786864465
  Initialization Multiplier: 0.8135191693356123


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.20it/s, loss=-0.003461, elapsed time=0.04, total time=6.6] 
[I 2025-06-08 00:12:28,739] Trial 710 finished with value: -0.003460624157730925 and parameters: {'learning_rate': 0.03691676294758051, 'sigma_multiplier': 0.48463386786864465, 'initialization_multiplier': 0.8135191693356123}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 710 final loss: -0.00346062
Trial 711:
  Learning Rate: 0.03048660386035886
  Sigma Multiplier: 0.5979698600830526
  Initialization Multiplier: 0.8823946648341547


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.24it/s, loss=-0.003751, elapsed time=0.04, total time=6.32]
[I 2025-06-08 00:12:35,100] Trial 711 finished with value: -0.003750666911499124 and parameters: {'learning_rate': 0.03048660386035886, 'sigma_multiplier': 0.5979698600830526, 'initialization_multiplier': 0.8823946648341547}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 711 final loss: -0.00375067
Trial 712:
  Learning Rate: 0.017848483486613966
  Sigma Multiplier: 0.5321161461261231
  Initialization Multiplier: 0.7519637601742636


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.62it/s, loss=-0.003332, elapsed time=0.04, total time=6.48]
[I 2025-06-08 00:12:41,617] Trial 712 finished with value: -0.003332156307639126 and parameters: {'learning_rate': 0.017848483486613966, 'sigma_multiplier': 0.5321161461261231, 'initialization_multiplier': 0.7519637601742636}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 712 final loss: -0.00333216
Trial 713:
  Learning Rate: 0.037027982727405366
  Sigma Multiplier: 0.2133806421443537
  Initialization Multiplier: 0.7082588022235242


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 21.87it/s, loss=0.000040, elapsed time=0.04, total time=6.99] 
[I 2025-06-08 00:12:48,645] Trial 713 finished with value: 4.045628776435031e-05 and parameters: {'learning_rate': 0.037027982727405366, 'sigma_multiplier': 0.2133806421443537, 'initialization_multiplier': 0.7082588022235242}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 713 final loss: 0.00004046
Trial 714:
  Learning Rate: 0.02384360681154092
  Sigma Multiplier: 0.6302211493506679
  Initialization Multiplier: 0.6226034488759936


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.36it/s, loss=-0.003397, elapsed time=0.04, total time=6.29]
[I 2025-06-08 00:12:54,976] Trial 714 finished with value: -0.003397249836031762 and parameters: {'learning_rate': 0.02384360681154092, 'sigma_multiplier': 0.6302211493506679, 'initialization_multiplier': 0.6226034488759936}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 714 final loss: -0.00339725
Trial 715:
  Learning Rate: 0.031209386104762295
  Sigma Multiplier: 0.561373213971701
  Initialization Multiplier: 0.6624201221662905


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.45it/s, loss=-0.003681, elapsed time=0.04, total time=6.54]
[I 2025-06-08 00:13:01,554] Trial 715 finished with value: -0.003681183081764517 and parameters: {'learning_rate': 0.031209386104762295, 'sigma_multiplier': 0.561373213971701, 'initialization_multiplier': 0.6624201221662905}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 715 final loss: -0.00368118
Trial 716:
  Learning Rate: 0.020115401684041193
  Sigma Multiplier: 0.4392363405175162
  Initialization Multiplier: 0.6936328491719053


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.82it/s, loss=-0.002808, elapsed time=0.04, total time=6.7] 
[I 2025-06-08 00:13:08,296] Trial 716 finished with value: -0.002807558195228064 and parameters: {'learning_rate': 0.020115401684041193, 'sigma_multiplier': 0.4392363405175162, 'initialization_multiplier': 0.6936328491719053}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 716 final loss: -0.00280756
Trial 717:
  Learning Rate: 0.014971847424560803
  Sigma Multiplier: 0.5083388817286412
  Initialization Multiplier: 0.6426339834369704


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.28it/s, loss=-0.003172, elapsed time=0.04, total time=6.59]
[I 2025-06-08 00:13:14,922] Trial 717 finished with value: -0.0031716388832663 and parameters: {'learning_rate': 0.014971847424560803, 'sigma_multiplier': 0.5083388817286412, 'initialization_multiplier': 0.6426339834369704}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 717 final loss: -0.00317164
Trial 718:
  Learning Rate: 0.02615882412962476
  Sigma Multiplier: 0.5917782857978879
  Initialization Multiplier: 0.6019690914543709


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.06it/s, loss=-0.003579, elapsed time=0.04, total time=6.37]
[I 2025-06-08 00:13:21,334] Trial 718 finished with value: -0.0035786679178978612 and parameters: {'learning_rate': 0.02615882412962476, 'sigma_multiplier': 0.5917782857978879, 'initialization_multiplier': 0.6019690914543709}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 718 final loss: -0.00357867
Trial 719:
  Learning Rate: 0.03605120878401397
  Sigma Multiplier: 0.6580126453008935
  Initialization Multiplier: 0.6697304256963791


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.77it/s, loss=-0.003654, elapsed time=0.04, total time=6.2] 
[I 2025-06-08 00:13:27,570] Trial 719 finished with value: -0.0036538891584390926 and parameters: {'learning_rate': 0.03605120878401397, 'sigma_multiplier': 0.6580126453008935, 'initialization_multiplier': 0.6697304256963791}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 719 final loss: -0.00365389
Trial 720:
  Learning Rate: 0.02960246338567956
  Sigma Multiplier: 0.5489044459281054
  Initialization Multiplier: 0.7370829727176277


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.70it/s, loss=-0.003708, elapsed time=0.04, total time=6.47]
[I 2025-06-08 00:13:34,082] Trial 720 finished with value: -0.003707877652436704 and parameters: {'learning_rate': 0.02960246338567956, 'sigma_multiplier': 0.5489044459281054, 'initialization_multiplier': 0.7370829727176277}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 720 final loss: -0.00370788
Trial 721:
  Learning Rate: 0.024790243251477755
  Sigma Multiplier: 0.4694804010181161
  Initialization Multiplier: 0.6274610795377902


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.14it/s, loss=-0.003488, elapsed time=0.04, total time=6.61]
[I 2025-06-08 00:13:40,729] Trial 721 finished with value: -0.0034880375315908783 and parameters: {'learning_rate': 0.024790243251477755, 'sigma_multiplier': 0.4694804010181161, 'initialization_multiplier': 0.6274610795377902}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 721 final loss: -0.00348804
Trial 722:
  Learning Rate: 0.019812991080242343
  Sigma Multiplier: 0.6264687560852011
  Initialization Multiplier: 0.5860855161032755


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.59it/s, loss=-0.003409, elapsed time=0.04, total time=6.24]
[I 2025-06-08 00:13:47,004] Trial 722 finished with value: -0.003409183047653837 and parameters: {'learning_rate': 0.019812991080242343, 'sigma_multiplier': 0.6264687560852011, 'initialization_multiplier': 0.5860855161032755}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 722 final loss: -0.00340918
Trial 723:
  Learning Rate: 0.038049119788809656
  Sigma Multiplier: 0.5839891322752
  Initialization Multiplier: 0.7688495001152553


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.17it/s, loss=-0.003885, elapsed time=0.04, total time=6.34]
[I 2025-06-08 00:13:53,383] Trial 723 finished with value: -0.0038854525368975956 and parameters: {'learning_rate': 0.038049119788809656, 'sigma_multiplier': 0.5839891322752, 'initialization_multiplier': 0.7688495001152553}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 723 final loss: -0.00388545
Trial 724:
  Learning Rate: 0.002436626695508403
  Sigma Multiplier: 0.3332570094547925
  Initialization Multiplier: 0.7180580416335969


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.52it/s, loss=-0.000955, elapsed time=0.04, total time=6.79]
[I 2025-06-08 00:14:00,210] Trial 724 finished with value: -0.0009552531661128572 and parameters: {'learning_rate': 0.002436626695508403, 'sigma_multiplier': 0.3332570094547925, 'initialization_multiplier': 0.7180580416335969}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 724 final loss: -0.00095525
Trial 725:
  Learning Rate: 0.031221260856414957
  Sigma Multiplier: 0.511366155425762
  Initialization Multiplier: 0.6755589631924108


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.45it/s, loss=-0.003510, elapsed time=0.04, total time=6.54]
[I 2025-06-08 00:14:06,804] Trial 725 finished with value: -0.003510110693103096 and parameters: {'learning_rate': 0.031221260856414957, 'sigma_multiplier': 0.511366155425762, 'initialization_multiplier': 0.6755589631924108}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 725 final loss: -0.00351011
Trial 726:
  Learning Rate: 0.023505384142922286
  Sigma Multiplier: 0.7505354178934791
  Initialization Multiplier: 1.0509314625926096


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.66it/s, loss=-0.001738, elapsed time=0.04, total time=5.97]
[I 2025-06-08 00:14:12,815] Trial 726 finished with value: -0.001738148420051034 and parameters: {'learning_rate': 0.023505384142922286, 'sigma_multiplier': 0.7505354178934791, 'initialization_multiplier': 1.0509314625926096}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 726 final loss: -0.00173815
Trial 727:
  Learning Rate: 0.028003917397451186
  Sigma Multiplier: 0.5640911476034043
  Initialization Multiplier: 0.621732767298188


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.94it/s, loss=-0.003723, elapsed time=0.04, total time=6.41]
[I 2025-06-08 00:14:19,261] Trial 727 finished with value: -0.0037225609912079215 and parameters: {'learning_rate': 0.028003917397451186, 'sigma_multiplier': 0.5640911476034043, 'initialization_multiplier': 0.621732767298188}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 727 final loss: -0.00372256
Trial 728:
  Learning Rate: 0.017668928961636743
  Sigma Multiplier: 0.6118261708326981
  Initialization Multiplier: 0.5807197264982779


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.52it/s, loss=-0.003500, elapsed time=0.04, total time=6.26]
[I 2025-06-08 00:14:25,554] Trial 728 finished with value: -0.0034998417093828997 and parameters: {'learning_rate': 0.017668928961636743, 'sigma_multiplier': 0.6118261708326981, 'initialization_multiplier': 0.5807197264982779}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 728 final loss: -0.00349984
Trial 729:
  Learning Rate: 0.03977410174817273
  Sigma Multiplier: 0.5362677659696118
  Initialization Multiplier: 0.6489678728488139


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.80it/s, loss=-0.003597, elapsed time=0.04, total time=6.43]
[I 2025-06-08 00:14:32,022] Trial 729 finished with value: -0.0035971918110689317 and parameters: {'learning_rate': 0.03977410174817273, 'sigma_multiplier': 0.5362677659696118, 'initialization_multiplier': 0.6489678728488139}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 729 final loss: -0.00359719
Trial 730:
  Learning Rate: 0.03217945379108586
  Sigma Multiplier: 1.1123750075275884
  Initialization Multiplier: 0.7088704674916367


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 28.95it/s, loss=-0.002000, elapsed time=0.03, total time=5.31]
[I 2025-06-08 00:14:37,369] Trial 730 finished with value: -0.0020002922093137086 and parameters: {'learning_rate': 0.03217945379108586, 'sigma_multiplier': 1.1123750075275884, 'initialization_multiplier': 0.7088704674916367}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 730 final loss: -0.00200029
Trial 731:
  Learning Rate: 0.021302062565495678
  Sigma Multiplier: 0.6977923361833516
  Initialization Multiplier: 0.596743691782259


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.42it/s, loss=-0.003557, elapsed time=0.04, total time=6.04]
[I 2025-06-08 00:14:43,442] Trial 731 finished with value: -0.003557348252941015 and parameters: {'learning_rate': 0.021302062565495678, 'sigma_multiplier': 0.6977923361833516, 'initialization_multiplier': 0.596743691782259}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 731 final loss: -0.00355735
Trial 732:
  Learning Rate: 0.0008262134317170083
  Sigma Multiplier: 0.48516897586116964
  Initialization Multiplier: 0.5516505907296999


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.19it/s, loss=0.001673, elapsed time=0.04, total time=6.6] 
[I 2025-06-08 00:14:50,081] Trial 732 finished with value: 0.001672953226234294 and parameters: {'learning_rate': 0.0008262134317170083, 'sigma_multiplier': 0.48516897586116964, 'initialization_multiplier': 0.5516505907296999}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 732 final loss: 0.00167295
Trial 733:
  Learning Rate: 0.025787113181301613
  Sigma Multiplier: 0.809796725340398
  Initialization Multiplier: 0.6618348816514799


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 26.55it/s, loss=-0.003223, elapsed time=0.03, total time=5.79]
[I 2025-06-08 00:14:55,915] Trial 733 finished with value: -0.003222817999598764 and parameters: {'learning_rate': 0.025787113181301613, 'sigma_multiplier': 0.809796725340398, 'initialization_multiplier': 0.6618348816514799}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 733 final loss: -0.00322282
Trial 734:
  Learning Rate: 0.015730552151982644
  Sigma Multiplier: 0.6547802241770877
  Initialization Multiplier: 0.6286225244139828


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.87it/s, loss=-0.003210, elapsed time=0.04, total time=6.17]
[I 2025-06-08 00:15:02,128] Trial 734 finished with value: -0.0032096840398286824 and parameters: {'learning_rate': 0.015730552151982644, 'sigma_multiplier': 0.6547802241770877, 'initialization_multiplier': 0.6286225244139828}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 734 final loss: -0.00320968
Trial 735:
  Learning Rate: 0.03354464720338956
  Sigma Multiplier: 0.5870153159081295
  Initialization Multiplier: 0.6836652550849772


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.19it/s, loss=-0.003707, elapsed time=0.04, total time=6.34]
[I 2025-06-08 00:15:08,506] Trial 735 finished with value: -0.0037065914976461703 and parameters: {'learning_rate': 0.03354464720338956, 'sigma_multiplier': 0.5870153159081295, 'initialization_multiplier': 0.6836652550849772}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 735 final loss: -0.00370659
Trial 736:
  Learning Rate: 0.027640090381833713
  Sigma Multiplier: 1.8162838912353387
  Initialization Multiplier: 0.5861116644238727


Training Progress: 100%|██████████| 150/150 [00:04<00:00, 31.79it/s, loss=-0.000910, elapsed time=0.03, total time=4.85]
[I 2025-06-08 00:15:13,392] Trial 736 finished with value: -0.000910158877709113 and parameters: {'learning_rate': 0.027640090381833713, 'sigma_multiplier': 1.8162838912353387, 'initialization_multiplier': 0.5861116644238727}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 736 final loss: -0.00091016
Trial 737:
  Learning Rate: 0.040439254456806684
  Sigma Multiplier: 0.5388196763394499
  Initialization Multiplier: 0.641317662594206


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.76it/s, loss=-0.003804, elapsed time=0.04, total time=6.45]
[I 2025-06-08 00:15:19,884] Trial 737 finished with value: -0.003804149938987397 and parameters: {'learning_rate': 0.040439254456806684, 'sigma_multiplier': 0.5388196763394499, 'initialization_multiplier': 0.641317662594206}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 737 final loss: -0.00380415
Trial 738:
  Learning Rate: 0.020970107635556314
  Sigma Multiplier: 1.0062578780394664
  Initialization Multiplier: 0.5566647626236653


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 28.42it/s, loss=-0.002247, elapsed time=0.03, total time=5.41]
[I 2025-06-08 00:15:25,330] Trial 738 finished with value: -0.002247477240513426 and parameters: {'learning_rate': 0.020970107635556314, 'sigma_multiplier': 1.0062578780394664, 'initialization_multiplier': 0.5566647626236653}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 738 final loss: -0.00224748
Trial 739:
  Learning Rate: 0.024165872396969223
  Sigma Multiplier: 0.6186609828739302
  Initialization Multiplier: 0.7559161343010934


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.57it/s, loss=-0.003548, elapsed time=0.04, total time=6.24]
[I 2025-06-08 00:15:31,609] Trial 739 finished with value: -0.003547866443201513 and parameters: {'learning_rate': 0.024165872396969223, 'sigma_multiplier': 0.6186609828739302, 'initialization_multiplier': 0.7559161343010934}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 739 final loss: -0.00354787
Trial 740:
  Learning Rate: 0.03496575818745832
  Sigma Multiplier: 0.49777856664571535
  Initialization Multiplier: 0.7049601140481605


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.47it/s, loss=-0.003497, elapsed time=0.04, total time=6.53]
[I 2025-06-08 00:15:38,172] Trial 740 finished with value: -0.0034972298942206794 and parameters: {'learning_rate': 0.03496575818745832, 'sigma_multiplier': 0.49777856664571535, 'initialization_multiplier': 0.7049601140481605}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 740 final loss: -0.00349723
Trial 741:
  Learning Rate: 0.02944643679102479
  Sigma Multiplier: 0.3887654611215272
  Initialization Multiplier: 0.6139548105512329


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.78it/s, loss=-0.002533, elapsed time=0.04, total time=6.71]
[I 2025-06-08 00:15:44,923] Trial 741 finished with value: -0.002532693798912821 and parameters: {'learning_rate': 0.02944643679102479, 'sigma_multiplier': 0.3887654611215272, 'initialization_multiplier': 0.6139548105512329}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 741 final loss: -0.00253269
Trial 742:
  Learning Rate: 0.018612638909745197
  Sigma Multiplier: 1.549950166015297
  Initialization Multiplier: 0.6627439610672378


Training Progress: 100%|██████████| 150/150 [00:04<00:00, 31.27it/s, loss=-0.001163, elapsed time=0.03, total time=4.95]
[I 2025-06-08 00:15:49,906] Trial 742 finished with value: -0.001162704338026054 and parameters: {'learning_rate': 0.018612638909745197, 'sigma_multiplier': 1.549950166015297, 'initialization_multiplier': 0.6627439610672378}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 742 final loss: -0.00116270
Trial 743:
  Learning Rate: 0.023956029562377846
  Sigma Multiplier: 0.5675200396801632
  Initialization Multiplier: 0.5466277068530017


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.05it/s, loss=-0.003564, elapsed time=0.04, total time=6.38]
[I 2025-06-08 00:15:56,325] Trial 743 finished with value: -0.0035643840645575614 and parameters: {'learning_rate': 0.023956029562377846, 'sigma_multiplier': 0.5675200396801632, 'initialization_multiplier': 0.5466277068530017}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 743 final loss: -0.00356438
Trial 744:
  Learning Rate: 0.04125326433118499
  Sigma Multiplier: 0.4389277099054185
  Initialization Multiplier: 0.5958105887474159


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.01it/s, loss=-0.002924, elapsed time=0.04, total time=6.66]
[I 2025-06-08 00:16:03,018] Trial 744 finished with value: -0.0029237952053604967 and parameters: {'learning_rate': 0.04125326433118499, 'sigma_multiplier': 0.4389277099054185, 'initialization_multiplier': 0.5958105887474159}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 744 final loss: -0.00292380
Trial 745:
  Learning Rate: 0.013094080268687134
  Sigma Multiplier: 0.6022913518015339
  Initialization Multiplier: 0.7262772516410235


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.39it/s, loss=-0.003140, elapsed time=0.04, total time=6.28]
[I 2025-06-08 00:16:09,331] Trial 745 finished with value: -0.0031396007408849933 and parameters: {'learning_rate': 0.013094080268687134, 'sigma_multiplier': 0.6022913518015339, 'initialization_multiplier': 0.7262772516410235}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 745 final loss: -0.00313960
Trial 746:
  Learning Rate: 0.03111009430588505
  Sigma Multiplier: 0.5262881767300799
  Initialization Multiplier: 0.6347011486218535


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.60it/s, loss=-0.003529, elapsed time=0.04, total time=6.49]
[I 2025-06-08 00:16:15,858] Trial 746 finished with value: -0.003529081026796881 and parameters: {'learning_rate': 0.03111009430588505, 'sigma_multiplier': 0.5262881767300799, 'initialization_multiplier': 0.6347011486218535}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 746 final loss: -0.00352908
Trial 747:
  Learning Rate: 0.020979535117134917
  Sigma Multiplier: 0.6769511542035727
  Initialization Multiplier: 0.6776956289497696


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.28it/s, loss=-0.003658, elapsed time=0.05, total time=6.08]
[I 2025-06-08 00:16:21,975] Trial 747 finished with value: -0.0036579184846776834 and parameters: {'learning_rate': 0.020979535117134917, 'sigma_multiplier': 0.6769511542035727, 'initialization_multiplier': 0.6776956289497696}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 747 final loss: -0.00365792
Trial 748:
  Learning Rate: 0.02627867106487357
  Sigma Multiplier: 0.5639898219305179
  Initialization Multiplier: 0.7861851521425307


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.10it/s, loss=-0.004064, elapsed time=0.04, total time=6.36]
[I 2025-06-08 00:16:28,372] Trial 748 finished with value: -0.0040642736334269005 and parameters: {'learning_rate': 0.02627867106487357, 'sigma_multiplier': 0.5639898219305179, 'initialization_multiplier': 0.7861851521425307}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 748 final loss: -0.00406427
Trial 749:
  Learning Rate: 0.016373602727880965
  Sigma Multiplier: 0.47888166706305285
  Initialization Multiplier: 0.8323046293868401


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.30it/s, loss=-0.003045, elapsed time=0.04, total time=6.57]
[I 2025-06-08 00:16:34,979] Trial 749 finished with value: -0.0030448453985017004 and parameters: {'learning_rate': 0.016373602727880965, 'sigma_multiplier': 0.47888166706305285, 'initialization_multiplier': 0.8323046293868401}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 749 final loss: -0.00304485
Trial 750:
  Learning Rate: 0.025834089095666292
  Sigma Multiplier: 0.5472970979887488
  Initialization Multiplier: 0.7782580349028954


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.83it/s, loss=-0.003800, elapsed time=0.04, total time=6.43]
[I 2025-06-08 00:16:41,443] Trial 750 finished with value: -0.0037995870913312218 and parameters: {'learning_rate': 0.025834089095666292, 'sigma_multiplier': 0.5472970979887488, 'initialization_multiplier': 0.7782580349028954}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 750 final loss: -0.00379959
Trial 751:
  Learning Rate: 0.019150231084341142
  Sigma Multiplier: 0.1264036236422006
  Initialization Multiplier: 0.7812671217767513


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.04it/s, loss=-0.000072, elapsed time=0.04, total time=6.93]
[I 2025-06-08 00:16:48,413] Trial 751 finished with value: -7.167655677778038e-05 and parameters: {'learning_rate': 0.019150231084341142, 'sigma_multiplier': 0.1264036236422006, 'initialization_multiplier': 0.7812671217767513}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 751 final loss: -0.00007168
Trial 752:
  Learning Rate: 0.026914384320779864
  Sigma Multiplier: 0.516551085824496
  Initialization Multiplier: 0.7505920946597366


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.57it/s, loss=-0.003896, elapsed time=0.04, total time=6.49]
[I 2025-06-08 00:16:54,946] Trial 752 finished with value: -0.0038963532221751793 and parameters: {'learning_rate': 0.026914384320779864, 'sigma_multiplier': 0.516551085824496, 'initialization_multiplier': 0.7505920946597366}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 752 final loss: -0.00389635
Trial 753:
  Learning Rate: 0.022147736954083792
  Sigma Multiplier: 0.5718909049655362
  Initialization Multiplier: 0.7279937594613471


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.94it/s, loss=-0.003804, elapsed time=0.04, total time=6.4] 
[I 2025-06-08 00:17:01,387] Trial 753 finished with value: -0.0038038451115842264 and parameters: {'learning_rate': 0.022147736954083792, 'sigma_multiplier': 0.5718909049655362, 'initialization_multiplier': 0.7279937594613471}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 753 final loss: -0.00380385
Trial 754:
  Learning Rate: 0.0004932143932934948
  Sigma Multiplier: 0.2763431552370501
  Initialization Multiplier: 0.8117897428741961


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.15it/s, loss=0.005495, elapsed time=0.04, total time=6.92]
[I 2025-06-08 00:17:08,341] Trial 754 finished with value: 0.0054954227768657105 and parameters: {'learning_rate': 0.0004932143932934948, 'sigma_multiplier': 0.2763431552370501, 'initialization_multiplier': 0.8117897428741961}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 754 final loss: 0.00549542
Trial 755:
  Learning Rate: 0.022641341489862143
  Sigma Multiplier: 0.49583127369504726
  Initialization Multiplier: 0.7057782295615569


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.35it/s, loss=-0.003666, elapsed time=0.04, total time=6.56]
[I 2025-06-08 00:17:14,940] Trial 755 finished with value: -0.0036662944803870856 and parameters: {'learning_rate': 0.022641341489862143, 'sigma_multiplier': 0.49583127369504726, 'initialization_multiplier': 0.7057782295615569}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 755 final loss: -0.00366629
Trial 756:
  Learning Rate: 0.01769899022593581
  Sigma Multiplier: 0.6280367231347528
  Initialization Multiplier: 0.7313269804272231


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.64it/s, loss=-0.003511, elapsed time=0.04, total time=6.22]
[I 2025-06-08 00:17:21,201] Trial 756 finished with value: -0.003510606924519623 and parameters: {'learning_rate': 0.01769899022593581, 'sigma_multiplier': 0.6280367231347528, 'initialization_multiplier': 0.7313269804272231}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 756 final loss: -0.00351061
Trial 757:
  Learning Rate: 0.027979521404757998
  Sigma Multiplier: 0.45035665580067014
  Initialization Multiplier: 0.7888254557052448


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.09it/s, loss=-0.003044, elapsed time=0.04, total time=6.63]
[I 2025-06-08 00:17:27,870] Trial 757 finished with value: -0.0030438635390922296 and parameters: {'learning_rate': 0.027979521404757998, 'sigma_multiplier': 0.45035665580067014, 'initialization_multiplier': 0.7888254557052448}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 757 final loss: -0.00304386
Trial 758:
  Learning Rate: 0.024804511931876046
  Sigma Multiplier: 0.5568833973377822
  Initialization Multiplier: 0.6875915645389736


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.95it/s, loss=-0.003579, elapsed time=0.04, total time=6.4] 
[I 2025-06-08 00:17:34,310] Trial 758 finished with value: -0.003578726291076188 and parameters: {'learning_rate': 0.024804511931876046, 'sigma_multiplier': 0.5568833973377822, 'initialization_multiplier': 0.6875915645389736}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 758 final loss: -0.00357873
Trial 759:
  Learning Rate: 0.021245428486942997
  Sigma Multiplier: 0.5247901064586563
  Initialization Multiplier: 0.6914105020997013


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.68it/s, loss=-0.003583, elapsed time=0.04, total time=6.47]
[I 2025-06-08 00:17:40,818] Trial 759 finished with value: -0.0035833728205813186 and parameters: {'learning_rate': 0.021245428486942997, 'sigma_multiplier': 0.5247901064586563, 'initialization_multiplier': 0.6914105020997013}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 759 final loss: -0.00358337
Trial 760:
  Learning Rate: 0.03207340180145197
  Sigma Multiplier: 0.5927847184172189
  Initialization Multiplier: 0.750280216683621


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.23it/s, loss=-0.003960, elapsed time=0.04, total time=6.32]
[I 2025-06-08 00:17:47,174] Trial 760 finished with value: -0.003959619585960741 and parameters: {'learning_rate': 0.03207340180145197, 'sigma_multiplier': 0.5927847184172189, 'initialization_multiplier': 0.750280216683621}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 760 final loss: -0.00395962
Trial 761:
  Learning Rate: 0.014230905473664615
  Sigma Multiplier: 0.6469159241784953
  Initialization Multiplier: 0.8526559798967198


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.78it/s, loss=-0.003277, elapsed time=0.04, total time=6.18]
[I 2025-06-08 00:17:53,389] Trial 761 finished with value: -0.0032765030031276553 and parameters: {'learning_rate': 0.014230905473664615, 'sigma_multiplier': 0.6469159241784953, 'initialization_multiplier': 0.8526559798967198}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 761 final loss: -0.00327650
Trial 762:
  Learning Rate: 0.028180473818274536
  Sigma Multiplier: 0.6013725341441197
  Initialization Multiplier: 0.8000233177670544


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.42it/s, loss=-0.003367, elapsed time=0.04, total time=6.29]
[I 2025-06-08 00:17:59,722] Trial 762 finished with value: -0.0033668757352553593 and parameters: {'learning_rate': 0.028180473818274536, 'sigma_multiplier': 0.6013725341441197, 'initialization_multiplier': 0.8000233177670544}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 762 final loss: -0.00336688
Trial 763:
  Learning Rate: 0.018362679190665074
  Sigma Multiplier: 0.6272510269275938
  Initialization Multiplier: 0.8825394596757814


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.46it/s, loss=-0.003350, elapsed time=0.04, total time=6.27]
[I 2025-06-08 00:18:06,035] Trial 763 finished with value: -0.0033499107621420042 and parameters: {'learning_rate': 0.018362679190665074, 'sigma_multiplier': 0.6272510269275938, 'initialization_multiplier': 0.8825394596757814}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 763 final loss: -0.00334991
Trial 764:
  Learning Rate: 0.025001881279077487
  Sigma Multiplier: 0.582213303182247
  Initialization Multiplier: 0.8434285322395627


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.12it/s, loss=-0.003658, elapsed time=0.04, total time=6.36]
[I 2025-06-08 00:18:12,437] Trial 764 finished with value: -0.0036575057136140395 and parameters: {'learning_rate': 0.025001881279077487, 'sigma_multiplier': 0.582213303182247, 'initialization_multiplier': 0.8434285322395627}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 764 final loss: -0.00365751
Trial 765:
  Learning Rate: 0.029029643667548614
  Sigma Multiplier: 0.68793447471978
  Initialization Multiplier: 0.9748000005601507


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.22it/s, loss=-0.003285, elapsed time=0.04, total time=6.09]
[I 2025-06-08 00:18:18,565] Trial 765 finished with value: -0.003285480833481852 and parameters: {'learning_rate': 0.029029643667548614, 'sigma_multiplier': 0.68793447471978, 'initialization_multiplier': 0.9748000005601507}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 765 final loss: -0.00328548
Trial 766:
  Learning Rate: 0.021756466053720214
  Sigma Multiplier: 0.5943038354712035
  Initialization Multiplier: 0.7774634686772963


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.33it/s, loss=-0.003478, elapsed time=0.04, total time=6.3] 
[I 2025-06-08 00:18:24,904] Trial 766 finished with value: -0.0034783355236702946 and parameters: {'learning_rate': 0.021756466053720214, 'sigma_multiplier': 0.5943038354712035, 'initialization_multiplier': 0.7774634686772963}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 766 final loss: -0.00347834
Trial 767:
  Learning Rate: 0.03370964933997076
  Sigma Multiplier: 0.6485407117691888
  Initialization Multiplier: 0.762251872375312


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.91it/s, loss=-0.003703, elapsed time=0.04, total time=6.16]
[I 2025-06-08 00:18:31,104] Trial 767 finished with value: -0.003702869179252464 and parameters: {'learning_rate': 0.03370964933997076, 'sigma_multiplier': 0.6485407117691888, 'initialization_multiplier': 0.762251872375312}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 767 final loss: -0.00370287
Trial 768:
  Learning Rate: 0.024393403955172695
  Sigma Multiplier: 0.5655832653450562
  Initialization Multiplier: 0.743004374645535


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.02it/s, loss=-0.003946, elapsed time=0.04, total time=6.39]
[I 2025-06-08 00:18:37,528] Trial 768 finished with value: -0.003945940366813042 and parameters: {'learning_rate': 0.024393403955172695, 'sigma_multiplier': 0.5655832653450562, 'initialization_multiplier': 0.743004374645535}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 768 final loss: -0.00394594
Trial 769:
  Learning Rate: 0.030859046620242803
  Sigma Multiplier: 0.5537630719609258
  Initialization Multiplier: 0.7782796643450278


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.87it/s, loss=-0.003623, elapsed time=0.04, total time=6.42]
[I 2025-06-08 00:18:43,986] Trial 769 finished with value: -0.0036226144557757003 and parameters: {'learning_rate': 0.030859046620242803, 'sigma_multiplier': 0.5537630719609258, 'initialization_multiplier': 0.7782796643450278}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 769 final loss: -0.00362261
Trial 770:
  Learning Rate: 0.035256053913389394
  Sigma Multiplier: 0.564449976039251
  Initialization Multiplier: 0.7482046555501952


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.04it/s, loss=-0.003615, elapsed time=0.04, total time=6.38]
[I 2025-06-08 00:18:50,406] Trial 770 finished with value: -0.003614716813340211 and parameters: {'learning_rate': 0.035256053913389394, 'sigma_multiplier': 0.564449976039251, 'initialization_multiplier': 0.7482046555501952}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 770 final loss: -0.00361472
Trial 771:
  Learning Rate: 0.026204914432804427
  Sigma Multiplier: 0.5504685186593092
  Initialization Multiplier: 0.7497750574664057


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.15it/s, loss=-0.003579, elapsed time=0.04, total time=6.62]
[I 2025-06-08 00:18:57,067] Trial 771 finished with value: -0.003579043518184344 and parameters: {'learning_rate': 0.026204914432804427, 'sigma_multiplier': 0.5504685186593092, 'initialization_multiplier': 0.7497750574664057}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 771 final loss: -0.00357904
Trial 772:
  Learning Rate: 0.03952723110437941
  Sigma Multiplier: 0.5891944000011372
  Initialization Multiplier: 0.8180743089405399


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.09it/s, loss=-0.003638, elapsed time=0.04, total time=6.37]
[I 2025-06-08 00:19:03,481] Trial 772 finished with value: -0.003638266281812886 and parameters: {'learning_rate': 0.03952723110437941, 'sigma_multiplier': 0.5891944000011372, 'initialization_multiplier': 0.8180743089405399}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 772 final loss: -0.00363827
Trial 773:
  Learning Rate: 0.03007916785700582
  Sigma Multiplier: 0.5431493256697361
  Initialization Multiplier: 0.8121460563353076


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.80it/s, loss=-0.003993, elapsed time=0.04, total time=6.45]
[I 2025-06-08 00:19:09,965] Trial 773 finished with value: -0.003992733035500597 and parameters: {'learning_rate': 0.03007916785700582, 'sigma_multiplier': 0.5431493256697361, 'initialization_multiplier': 0.8121460563353076}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 773 final loss: -0.00399273
Trial 774:
  Learning Rate: 0.030297568107874664
  Sigma Multiplier: 0.5190364140971703
  Initialization Multiplier: 0.8065628110214749


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.37it/s, loss=-0.003547, elapsed time=0.04, total time=6.56]
[I 2025-06-08 00:19:16,561] Trial 774 finished with value: -0.0035473103945520856 and parameters: {'learning_rate': 0.030297568107874664, 'sigma_multiplier': 0.5190364140971703, 'initialization_multiplier': 0.8065628110214749}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 774 final loss: -0.00354731
Trial 775:
  Learning Rate: 0.04392480527439746
  Sigma Multiplier: 0.5335203748982562
  Initialization Multiplier: 0.9200224402445897


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.57it/s, loss=-0.003417, elapsed time=0.04, total time=6.5] 
[I 2025-06-08 00:19:23,103] Trial 775 finished with value: -0.0034174285067569894 and parameters: {'learning_rate': 0.04392480527439746, 'sigma_multiplier': 0.5335203748982562, 'initialization_multiplier': 0.9200224402445897}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 775 final loss: -0.00341743
Trial 776:
  Learning Rate: 0.006808888785334045
  Sigma Multiplier: 0.5955480655850701
  Initialization Multiplier: 0.8783451059984033


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.17it/s, loss=-0.002717, elapsed time=0.04, total time=6.34]
[I 2025-06-08 00:19:29,485] Trial 776 finished with value: -0.002716641329429622 and parameters: {'learning_rate': 0.006808888785334045, 'sigma_multiplier': 0.5955480655850701, 'initialization_multiplier': 0.8783451059984033}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 776 final loss: -0.00271664
Trial 777:
  Learning Rate: 0.03662589393846059
  Sigma Multiplier: 0.48051998917719524
  Initialization Multiplier: 0.811034611909337


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.18it/s, loss=-0.003531, elapsed time=0.06, total time=6.61]
[I 2025-06-08 00:19:36,134] Trial 777 finished with value: -0.003530665868477283 and parameters: {'learning_rate': 0.03662589393846059, 'sigma_multiplier': 0.48051998917719524, 'initialization_multiplier': 0.811034611909337}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 777 final loss: -0.00353067
Trial 778:
  Learning Rate: 0.03189732861334021
  Sigma Multiplier: 0.5760253783114251
  Initialization Multiplier: 0.8258095747832475


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.09it/s, loss=-0.003561, elapsed time=0.04, total time=6.36]
[I 2025-06-08 00:19:42,539] Trial 778 finished with value: -0.0035605076087411778 and parameters: {'learning_rate': 0.03189732861334021, 'sigma_multiplier': 0.5760253783114251, 'initialization_multiplier': 0.8258095747832475}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 778 final loss: -0.00356051
Trial 779:
  Learning Rate: 0.024225094259955445
  Sigma Multiplier: 0.6169501819496386
  Initialization Multiplier: 0.875787167321668


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.43it/s, loss=-0.003972, elapsed time=0.04, total time=6.28]
[I 2025-06-08 00:19:48,863] Trial 779 finished with value: -0.0039724145822186375 and parameters: {'learning_rate': 0.024225094259955445, 'sigma_multiplier': 0.6169501819496386, 'initialization_multiplier': 0.875787167321668}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 779 final loss: -0.00397241
Trial 780:
  Learning Rate: 0.022447524522781613
  Sigma Multiplier: 0.6260794315278784
  Initialization Multiplier: 0.8557919277397696


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.47it/s, loss=-0.003576, elapsed time=0.04, total time=6.26]
[I 2025-06-08 00:19:55,161] Trial 780 finished with value: -0.0035760530585154235 and parameters: {'learning_rate': 0.022447524522781613, 'sigma_multiplier': 0.6260794315278784, 'initialization_multiplier': 0.8557919277397696}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 780 final loss: -0.00357605
Trial 781:
  Learning Rate: 0.024323723863271834
  Sigma Multiplier: 0.6764542172843304
  Initialization Multiplier: 0.8362369164028269


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.93it/s, loss=-0.003359, elapsed time=0.04, total time=6.15]
[I 2025-06-08 00:20:01,353] Trial 781 finished with value: -0.0033591838091602943 and parameters: {'learning_rate': 0.024323723863271834, 'sigma_multiplier': 0.6764542172843304, 'initialization_multiplier': 0.8362369164028269}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 781 final loss: -0.00335918
Trial 782:
  Learning Rate: 0.019173587655346592
  Sigma Multiplier: 0.6181271574101892
  Initialization Multiplier: 0.8324293471038854


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.29it/s, loss=-0.003462, elapsed time=0.04, total time=6.31]
[I 2025-06-08 00:20:07,707] Trial 782 finished with value: -0.0034622008051277972 and parameters: {'learning_rate': 0.019173587655346592, 'sigma_multiplier': 0.6181271574101892, 'initialization_multiplier': 0.8324293471038854}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 782 final loss: -0.00346220
Trial 783:
  Learning Rate: 0.024770428308228707
  Sigma Multiplier: 0.5302369079669687
  Initialization Multiplier: 0.7909226112719392


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.42it/s, loss=-0.003719, elapsed time=0.04, total time=6.54]
[I 2025-06-08 00:20:14,289] Trial 783 finished with value: -0.00371925436709589 and parameters: {'learning_rate': 0.024770428308228707, 'sigma_multiplier': 0.5302369079669687, 'initialization_multiplier': 0.7909226112719392}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 783 final loss: -0.00371925
Trial 784:
  Learning Rate: 0.03661682654073433
  Sigma Multiplier: 0.8620585322364134
  Initialization Multiplier: 0.9276660816531956


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 26.87it/s, loss=-0.002797, elapsed time=0.03, total time=5.72]
[I 2025-06-08 00:20:20,045] Trial 784 finished with value: -0.00279681647516523 and parameters: {'learning_rate': 0.03661682654073433, 'sigma_multiplier': 0.8620585322364134, 'initialization_multiplier': 0.9276660816531956}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 784 final loss: -0.00279682
Trial 785:
  Learning Rate: 0.04597516613990552
  Sigma Multiplier: 0.6574915760324395
  Initialization Multiplier: 0.839598046610732


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.78it/s, loss=-0.003188, elapsed time=0.04, total time=6.19]
[I 2025-06-08 00:20:26,279] Trial 785 finished with value: -0.0031882110352449908 and parameters: {'learning_rate': 0.04597516613990552, 'sigma_multiplier': 0.6574915760324395, 'initialization_multiplier': 0.839598046610732}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 785 final loss: -0.00318821
Trial 786:
  Learning Rate: 0.016307139223754347
  Sigma Multiplier: 0.5546336479616965
  Initialization Multiplier: 0.8710668031820351


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.69it/s, loss=-0.003429, elapsed time=0.04, total time=6.46]
[I 2025-06-08 00:20:32,780] Trial 786 finished with value: -0.0034286025634812506 and parameters: {'learning_rate': 0.016307139223754347, 'sigma_multiplier': 0.5546336479616965, 'initialization_multiplier': 0.8710668031820351}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 786 final loss: -0.00342860
Trial 787:
  Learning Rate: 0.020619870173298385
  Sigma Multiplier: 0.7251118879393106
  Initialization Multiplier: 0.8073459615579419


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.61it/s, loss=-0.003233, elapsed time=0.04, total time=5.99]
[I 2025-06-08 00:20:38,806] Trial 787 finished with value: -0.00323335698000245 and parameters: {'learning_rate': 0.020619870173298385, 'sigma_multiplier': 0.7251118879393106, 'initialization_multiplier': 0.8073459615579419}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 787 final loss: -0.00323336
Trial 788:
  Learning Rate: 0.03160513525153884
  Sigma Multiplier: 0.48915128095286886
  Initialization Multiplier: 0.8822784444619758


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.22it/s, loss=-0.003416, elapsed time=0.04, total time=6.6] 
[I 2025-06-08 00:20:45,443] Trial 788 finished with value: -0.0034155372081864726 and parameters: {'learning_rate': 0.03160513525153884, 'sigma_multiplier': 0.48915128095286886, 'initialization_multiplier': 0.8822784444619758}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 788 final loss: -0.00341554
Trial 789:
  Learning Rate: 0.024420537793435136
  Sigma Multiplier: 0.6084014432366457
  Initialization Multiplier: 0.9429421695074156


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.34it/s, loss=-0.003388, elapsed time=0.04, total time=6.3] 
[I 2025-06-08 00:20:51,780] Trial 789 finished with value: -0.003387926449374378 and parameters: {'learning_rate': 0.024420537793435136, 'sigma_multiplier': 0.6084014432366457, 'initialization_multiplier': 0.9429421695074156}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 789 final loss: -0.00338793
Trial 790:
  Learning Rate: 0.0002661892567844277
  Sigma Multiplier: 0.5261875187126892
  Initialization Multiplier: 0.7808876513114755


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.15it/s, loss=0.058077, elapsed time=0.04, total time=6.61]
[I 2025-06-08 00:20:58,429] Trial 790 finished with value: 0.05807725087355746 and parameters: {'learning_rate': 0.0002661892567844277, 'sigma_multiplier': 0.5261875187126892, 'initialization_multiplier': 0.7808876513114755}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 790 final loss: 0.05807725
Trial 791:
  Learning Rate: 0.028284104984369982
  Sigma Multiplier: 0.5663050427264869
  Initialization Multiplier: 0.8879892793053877


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.65it/s, loss=-0.003676, elapsed time=0.04, total time=6.47]
[I 2025-06-08 00:21:04,942] Trial 791 finished with value: -0.0036756661335359464 and parameters: {'learning_rate': 0.028284104984369982, 'sigma_multiplier': 0.5663050427264869, 'initialization_multiplier': 0.8879892793053877}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 791 final loss: -0.00367567
Trial 792:
  Learning Rate: 0.039921112245059606
  Sigma Multiplier: 0.6289549807643028
  Initialization Multiplier: 0.7430894127347889


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.65it/s, loss=-0.003747, elapsed time=0.04, total time=6.23]
[I 2025-06-08 00:21:11,209] Trial 792 finished with value: -0.003746873795158442 and parameters: {'learning_rate': 0.039921112245059606, 'sigma_multiplier': 0.6289549807643028, 'initialization_multiplier': 0.7430894127347889}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 792 final loss: -0.00374687
Trial 793:
  Learning Rate: 0.020682559328513938
  Sigma Multiplier: 0.47302855674009736
  Initialization Multiplier: 0.7612791828130533


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.09it/s, loss=-0.002949, elapsed time=0.04, total time=6.64]
[I 2025-06-08 00:21:17,885] Trial 793 finished with value: -0.0029493837530125714 and parameters: {'learning_rate': 0.020682559328513938, 'sigma_multiplier': 0.47302855674009736, 'initialization_multiplier': 0.7612791828130533}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 793 final loss: -0.00294938
Trial 794:
  Learning Rate: 0.032880634406743464
  Sigma Multiplier: 0.5876907930842569
  Initialization Multiplier: 0.7768863670778485


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.15it/s, loss=-0.003503, elapsed time=0.04, total time=6.35]
[I 2025-06-08 00:21:24,268] Trial 794 finished with value: -0.0035029063428708663 and parameters: {'learning_rate': 0.032880634406743464, 'sigma_multiplier': 0.5876907930842569, 'initialization_multiplier': 0.7768863670778485}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 794 final loss: -0.00350291
Trial 795:
  Learning Rate: 0.027500024150357124
  Sigma Multiplier: 0.5152716111142203
  Initialization Multiplier: 0.7745832588659551


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.48it/s, loss=-0.003960, elapsed time=0.04, total time=6.53]
[I 2025-06-08 00:21:30,832] Trial 795 finished with value: -0.003959819079876305 and parameters: {'learning_rate': 0.027500024150357124, 'sigma_multiplier': 0.5152716111142203, 'initialization_multiplier': 0.7745832588659551}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 795 final loss: -0.00395982
Trial 796:
  Learning Rate: 0.0366444549334681
  Sigma Multiplier: 0.47425849948476084
  Initialization Multiplier: 0.8351120611974845


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.18it/s, loss=-0.003589, elapsed time=0.04, total time=6.61]
[I 2025-06-08 00:21:37,477] Trial 796 finished with value: -0.0035886621388515013 and parameters: {'learning_rate': 0.0366444549334681, 'sigma_multiplier': 0.47425849948476084, 'initialization_multiplier': 0.8351120611974845}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 796 final loss: -0.00358866
Trial 797:
  Learning Rate: 0.030120486321266446
  Sigma Multiplier: 0.4169523397117073
  Initialization Multiplier: 0.8202458185789162


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.85it/s, loss=-0.003159, elapsed time=0.04, total time=6.71]
[I 2025-06-08 00:21:44,223] Trial 797 finished with value: -0.0031589495300326613 and parameters: {'learning_rate': 0.030120486321266446, 'sigma_multiplier': 0.4169523397117073, 'initialization_multiplier': 0.8202458185789162}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 797 final loss: -0.00315895
Trial 798:
  Learning Rate: 0.04232676382867813
  Sigma Multiplier: 0.44424205977290077
  Initialization Multiplier: 0.791349242264328


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.04it/s, loss=-0.003281, elapsed time=0.04, total time=6.64]
[I 2025-06-08 00:21:50,905] Trial 798 finished with value: -0.003281371616004356 and parameters: {'learning_rate': 0.04232676382867813, 'sigma_multiplier': 0.44424205977290077, 'initialization_multiplier': 0.791349242264328}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 798 final loss: -0.00328137
Trial 799:
  Learning Rate: 0.027394547024582203
  Sigma Multiplier: 0.5114181764904663
  Initialization Multiplier: 0.8792729516643233


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.48it/s, loss=-0.004014, elapsed time=0.04, total time=6.51]
[I 2025-06-08 00:21:57,457] Trial 799 finished with value: -0.004014353069161924 and parameters: {'learning_rate': 0.027394547024582203, 'sigma_multiplier': 0.5114181764904663, 'initialization_multiplier': 0.8792729516643233}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 799 final loss: -0.00401435
Trial 800:
  Learning Rate: 0.048701341385846586
  Sigma Multiplier: 0.5131084205757172
  Initialization Multiplier: 0.9211159013994076


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.28it/s, loss=-0.003509, elapsed time=0.04, total time=6.58]
[I 2025-06-08 00:22:04,073] Trial 800 finished with value: -0.003508795076105393 and parameters: {'learning_rate': 0.048701341385846586, 'sigma_multiplier': 0.5131084205757172, 'initialization_multiplier': 0.9211159013994076}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 800 final loss: -0.00350880
Trial 801:
  Learning Rate: 0.03604482126703407
  Sigma Multiplier: 0.45788665189296707
  Initialization Multiplier: 0.9084844525081903


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.00it/s, loss=-0.002900, elapsed time=0.04, total time=6.65]
[I 2025-06-08 00:22:10,761] Trial 801 finished with value: -0.002899988957205406 and parameters: {'learning_rate': 0.03604482126703407, 'sigma_multiplier': 0.45788665189296707, 'initialization_multiplier': 0.9084844525081903}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 801 final loss: -0.00289999
Trial 802:
  Learning Rate: 0.029833105923336656
  Sigma Multiplier: 0.49959355640915115
  Initialization Multiplier: 0.8971919079423833


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.27it/s, loss=-0.003195, elapsed time=0.04, total time=6.58]
[I 2025-06-08 00:22:17,384] Trial 802 finished with value: -0.003194973634136304 and parameters: {'learning_rate': 0.029833105923336656, 'sigma_multiplier': 0.49959355640915115, 'initialization_multiplier': 0.8971919079423833}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 802 final loss: -0.00319497
Trial 803:
  Learning Rate: 0.0339275811994077
  Sigma Multiplier: 0.5356372431308505
  Initialization Multiplier: 1.0018682480871948


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.33it/s, loss=-0.002773, elapsed time=0.04, total time=6.56]
[I 2025-06-08 00:22:23,981] Trial 803 finished with value: -0.0027733951252519023 and parameters: {'learning_rate': 0.0339275811994077, 'sigma_multiplier': 0.5356372431308505, 'initialization_multiplier': 1.0018682480871948}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 803 final loss: -0.00277340
Trial 804:
  Learning Rate: 0.027857234262891935
  Sigma Multiplier: 0.5455652618967387
  Initialization Multiplier: 0.959004547631218


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.55it/s, loss=-0.002990, elapsed time=0.04, total time=6.51]
[I 2025-06-08 00:22:30,528] Trial 804 finished with value: -0.0029895910781956533 and parameters: {'learning_rate': 0.027857234262891935, 'sigma_multiplier': 0.5455652618967387, 'initialization_multiplier': 0.959004547631218}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 804 final loss: -0.00298959
Trial 805:
  Learning Rate: 0.04260388828064424
  Sigma Multiplier: 0.4979774234573662
  Initialization Multiplier: 0.8507206734219644


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.38it/s, loss=-0.003579, elapsed time=0.04, total time=6.55]
[I 2025-06-08 00:22:37,118] Trial 805 finished with value: -0.003578921402377212 and parameters: {'learning_rate': 0.04260388828064424, 'sigma_multiplier': 0.4979774234573662, 'initialization_multiplier': 0.8507206734219644}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 805 final loss: -0.00357892
Trial 806:
  Learning Rate: 0.031122878949094508
  Sigma Multiplier: 0.5961667865694049
  Initialization Multiplier: 0.811365578987481


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.35it/s, loss=-0.003733, elapsed time=0.04, total time=6.3] 
[I 2025-06-08 00:22:43,456] Trial 806 finished with value: -0.0037329906270283514 and parameters: {'learning_rate': 0.031122878949094508, 'sigma_multiplier': 0.5961667865694049, 'initialization_multiplier': 0.811365578987481}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 806 final loss: -0.00373299
Trial 807:
  Learning Rate: 0.04002831277945746
  Sigma Multiplier: 0.6513172096494856
  Initialization Multiplier: 0.8908632149877245


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.85it/s, loss=-0.003709, elapsed time=0.04, total time=6.16]
[I 2025-06-08 00:22:49,657] Trial 807 finished with value: -0.003709202083386794 and parameters: {'learning_rate': 0.04002831277945746, 'sigma_multiplier': 0.6513172096494856, 'initialization_multiplier': 0.8908632149877245}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 807 final loss: -0.00370920
Trial 808:
  Learning Rate: 0.026599413390078045
  Sigma Multiplier: 0.39838109288591034
  Initialization Multiplier: 0.8197448158437015


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.88it/s, loss=-0.002498, elapsed time=0.04, total time=6.7] 
[I 2025-06-08 00:22:56,391] Trial 808 finished with value: -0.0024984035907947557 and parameters: {'learning_rate': 0.026599413390078045, 'sigma_multiplier': 0.39838109288591034, 'initialization_multiplier': 0.8197448158437015}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 808 final loss: -0.00249840
Trial 809:
  Learning Rate: 0.03545188668143435
  Sigma Multiplier: 0.534914849860587
  Initialization Multiplier: 0.8366138868152767


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.61it/s, loss=-0.003789, elapsed time=0.04, total time=6.48]
[I 2025-06-08 00:23:02,913] Trial 809 finished with value: -0.003788981140946421 and parameters: {'learning_rate': 0.03545188668143435, 'sigma_multiplier': 0.534914849860587, 'initialization_multiplier': 0.8366138868152767}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 809 final loss: -0.00378898
Trial 810:
  Learning Rate: 0.05021616728592695
  Sigma Multiplier: 0.5675541486948801
  Initialization Multiplier: 0.8742767812355028


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.97it/s, loss=-0.003533, elapsed time=0.04, total time=6.39]
[I 2025-06-08 00:23:09,341] Trial 810 finished with value: -0.003532711365418417 and parameters: {'learning_rate': 0.05021616728592695, 'sigma_multiplier': 0.5675541486948801, 'initialization_multiplier': 0.8742767812355028}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 810 final loss: -0.00353271
Trial 811:
  Learning Rate: 0.02679309277029925
  Sigma Multiplier: 0.45679853093124845
  Initialization Multiplier: 0.9751877463793495


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.79it/s, loss=-0.002688, elapsed time=0.04, total time=6.72]
[I 2025-06-08 00:23:16,098] Trial 811 finished with value: -0.0026878163464831035 and parameters: {'learning_rate': 0.02679309277029925, 'sigma_multiplier': 0.45679853093124845, 'initialization_multiplier': 0.9751877463793495}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 811 final loss: -0.00268782
Trial 812:
  Learning Rate: 0.029948076170548562
  Sigma Multiplier: 0.6023174509591989
  Initialization Multiplier: 0.8561444667420399


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 18.15it/s, loss=-0.003609, elapsed time=0.04, total time=8.4] 
[I 2025-06-08 00:23:24,535] Trial 812 finished with value: -0.003608973460200932 and parameters: {'learning_rate': 0.029948076170548562, 'sigma_multiplier': 0.6023174509591989, 'initialization_multiplier': 0.8561444667420399}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 812 final loss: -0.00360897
Trial 813:
  Learning Rate: 0.03728513198617526
  Sigma Multiplier: 0.6314859719630299
  Initialization Multiplier: 0.7766388390613244


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.74it/s, loss=-0.003774, elapsed time=0.04, total time=6.21]
[I 2025-06-08 00:23:30,779] Trial 813 finished with value: -0.003774271423358187 and parameters: {'learning_rate': 0.03728513198617526, 'sigma_multiplier': 0.6314859719630299, 'initialization_multiplier': 0.7766388390613244}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 813 final loss: -0.00377427
Trial 814:
  Learning Rate: 0.023554076238545947
  Sigma Multiplier: 0.5068761250663961
  Initialization Multiplier: 0.7759210219094327


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.53it/s, loss=-0.003549, elapsed time=0.04, total time=6.51]
[I 2025-06-08 00:23:37,324] Trial 814 finished with value: -0.0035486278558840584 and parameters: {'learning_rate': 0.023554076238545947, 'sigma_multiplier': 0.5068761250663961, 'initialization_multiplier': 0.7759210219094327}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 814 final loss: -0.00354863
Trial 815:
  Learning Rate: 0.03379404331069104
  Sigma Multiplier: 0.5495341351988062
  Initialization Multiplier: 0.8564023746160949


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.98it/s, loss=-0.003675, elapsed time=0.04, total time=6.39]
[I 2025-06-08 00:23:43,751] Trial 815 finished with value: -0.00367467959838939 and parameters: {'learning_rate': 0.03379404331069104, 'sigma_multiplier': 0.5495341351988062, 'initialization_multiplier': 0.8564023746160949}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 815 final loss: -0.00367468
Trial 816:
  Learning Rate: 0.04490112088903967
  Sigma Multiplier: 0.5756485574597114
  Initialization Multiplier: 0.7511398875399757


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.22it/s, loss=-0.003467, elapsed time=0.04, total time=6.32]
[I 2025-06-08 00:23:50,112] Trial 816 finished with value: -0.0034668784957248818 and parameters: {'learning_rate': 0.04490112088903967, 'sigma_multiplier': 0.5756485574597114, 'initialization_multiplier': 0.7511398875399757}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 816 final loss: -0.00346688
Trial 817:
  Learning Rate: 0.02734228428125977
  Sigma Multiplier: 0.6877693638207538
  Initialization Multiplier: 0.8015618352363144


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.28it/s, loss=-0.003620, elapsed time=0.04, total time=6.07]
[I 2025-06-08 00:23:56,222] Trial 817 finished with value: -0.003620139814595053 and parameters: {'learning_rate': 0.02734228428125977, 'sigma_multiplier': 0.6877693638207538, 'initialization_multiplier': 0.8015618352363144}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 817 final loss: -0.00362014
Trial 818:
  Learning Rate: 0.024033138578552788
  Sigma Multiplier: 0.6163234065820734
  Initialization Multiplier: 0.9331503396730948


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.41it/s, loss=-0.003693, elapsed time=0.04, total time=6.28]
[I 2025-06-08 00:24:02,539] Trial 818 finished with value: -0.0036925705599666703 and parameters: {'learning_rate': 0.024033138578552788, 'sigma_multiplier': 0.6163234065820734, 'initialization_multiplier': 0.9331503396730948}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 818 final loss: -0.00369257
Trial 819:
  Learning Rate: 0.03019356764552061
  Sigma Multiplier: 0.9059631377987254
  Initialization Multiplier: 0.73800356497704


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 27.32it/s, loss=-0.002747, elapsed time=0.03, total time=5.62]
[I 2025-06-08 00:24:08,202] Trial 819 finished with value: -0.002747025121888043 and parameters: {'learning_rate': 0.03019356764552061, 'sigma_multiplier': 0.9059631377987254, 'initialization_multiplier': 0.73800356497704}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 819 final loss: -0.00274703
Trial 820:
  Learning Rate: 0.03798305981172327
  Sigma Multiplier: 0.4857178544255148
  Initialization Multiplier: 0.7919214668983716


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.36it/s, loss=-0.003671, elapsed time=0.04, total time=6.56]
[I 2025-06-08 00:24:14,807] Trial 820 finished with value: -0.0036713621050980666 and parameters: {'learning_rate': 0.03798305981172327, 'sigma_multiplier': 0.4857178544255148, 'initialization_multiplier': 0.7919214668983716}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 820 final loss: -0.00367136
Trial 821:
  Learning Rate: 0.020201010098967027
  Sigma Multiplier: 0.5327147974010549
  Initialization Multiplier: 0.8683916688758908


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.77it/s, loss=-0.003578, elapsed time=0.04, total time=6.46]
[I 2025-06-08 00:24:21,303] Trial 821 finished with value: -0.003578376662847848 and parameters: {'learning_rate': 0.020201010098967027, 'sigma_multiplier': 0.5327147974010549, 'initialization_multiplier': 0.8683916688758908}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 821 final loss: -0.00357838
Trial 822:
  Learning Rate: 0.03252172380005105
  Sigma Multiplier: 0.4243661475011318
  Initialization Multiplier: 0.9067560476161054


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.01it/s, loss=-0.002716, elapsed time=0.04, total time=6.66]
[I 2025-06-08 00:24:27,999] Trial 822 finished with value: -0.0027163165599274402 and parameters: {'learning_rate': 0.03252172380005105, 'sigma_multiplier': 0.4243661475011318, 'initialization_multiplier': 0.9067560476161054}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 822 final loss: -0.00271632
Trial 823:
  Learning Rate: 0.025322683004171034
  Sigma Multiplier: 0.574314191518761
  Initialization Multiplier: 1.8605075080980455


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.99it/s, loss=0.082753, elapsed time=0.04, total time=6.66]
[I 2025-06-08 00:24:34,699] Trial 823 finished with value: 0.08275250955187619 and parameters: {'learning_rate': 0.025322683004171034, 'sigma_multiplier': 0.574314191518761, 'initialization_multiplier': 1.8605075080980455}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 823 final loss: 0.08275251
Trial 824:
  Learning Rate: 0.01816296753866415
  Sigma Multiplier: 0.6510407059692758
  Initialization Multiplier: 0.7546017706282152


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.76it/s, loss=-0.003512, elapsed time=0.04, total time=6.2] 
[I 2025-06-08 00:24:40,944] Trial 824 finished with value: -0.0035123068078712657 and parameters: {'learning_rate': 0.01816296753866415, 'sigma_multiplier': 0.6510407059692758, 'initialization_multiplier': 0.7546017706282152}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 824 final loss: -0.00351231
Trial 825:
  Learning Rate: 0.044263954969550455
  Sigma Multiplier: 0.5145608163645226
  Initialization Multiplier: 0.7233679394923354


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.67it/s, loss=-0.003677, elapsed time=0.04, total time=6.48]
[I 2025-06-08 00:24:47,464] Trial 825 finished with value: -0.003676651909839265 and parameters: {'learning_rate': 0.044263954969550455, 'sigma_multiplier': 0.5145608163645226, 'initialization_multiplier': 0.7233679394923354}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 825 final loss: -0.00367665
Trial 826:
  Learning Rate: 0.022259511924264105
  Sigma Multiplier: 0.6052481910313924
  Initialization Multiplier: 0.7195570450064851


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.57it/s, loss=-0.003440, elapsed time=0.04, total time=6.24]
[I 2025-06-08 00:24:53,746] Trial 826 finished with value: -0.003440357227670908 and parameters: {'learning_rate': 0.022259511924264105, 'sigma_multiplier': 0.6052481910313924, 'initialization_multiplier': 0.7195570450064851}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 826 final loss: -0.00344036
Trial 827:
  Learning Rate: 0.028548074288413484
  Sigma Multiplier: 0.5473095035267986
  Initialization Multiplier: 0.777635418291901


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.99it/s, loss=-0.003589, elapsed time=0.04, total time=6.39]
[I 2025-06-08 00:25:00,176] Trial 827 finished with value: -0.0035885297465054543 and parameters: {'learning_rate': 0.028548074288413484, 'sigma_multiplier': 0.5473095035267986, 'initialization_multiplier': 0.777635418291901}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 827 final loss: -0.00358853
Trial 828:
  Learning Rate: 0.03618122850562784
  Sigma Multiplier: 0.4684839546820608
  Initialization Multiplier: 0.8148484409564799


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.69it/s, loss=-0.003411, elapsed time=0.04, total time=6.76]
[I 2025-06-08 00:25:06,973] Trial 828 finished with value: -0.0034109032837874964 and parameters: {'learning_rate': 0.03618122850562784, 'sigma_multiplier': 0.4684839546820608, 'initialization_multiplier': 0.8148484409564799}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 828 final loss: -0.00341090
Trial 829:
  Learning Rate: 0.0313797727857074
  Sigma Multiplier: 0.5739077053474887
  Initialization Multiplier: 0.7402572713211476


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.87it/s, loss=-0.003648, elapsed time=0.04, total time=6.41]
[I 2025-06-08 00:25:13,424] Trial 829 finished with value: -0.0036477864478249486 and parameters: {'learning_rate': 0.0313797727857074, 'sigma_multiplier': 0.5739077053474887, 'initialization_multiplier': 0.7402572713211476}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 829 final loss: -0.00364779
Trial 830:
  Learning Rate: 0.050656923893594816
  Sigma Multiplier: 0.6444123926677455
  Initialization Multiplier: 0.7102852767825942


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.68it/s, loss=-0.003431, elapsed time=0.04, total time=6.21]
[I 2025-06-08 00:25:19,670] Trial 830 finished with value: -0.0034313187992598 and parameters: {'learning_rate': 0.050656923893594816, 'sigma_multiplier': 0.6444123926677455, 'initialization_multiplier': 0.7102852767825942}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 830 final loss: -0.00343132
Trial 831:
  Learning Rate: 0.025482518052874076
  Sigma Multiplier: 1.1915514421684865
  Initialization Multiplier: 0.848034613467514


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 29.54it/s, loss=-0.001777, elapsed time=0.03, total time=5.21]
[I 2025-06-08 00:25:24,915] Trial 831 finished with value: -0.0017773496913431463 and parameters: {'learning_rate': 0.025482518052874076, 'sigma_multiplier': 1.1915514421684865, 'initialization_multiplier': 0.848034613467514}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 831 final loss: -0.00177735
Trial 832:
  Learning Rate: 0.021505100178253918
  Sigma Multiplier: 0.501553790743859
  Initialization Multiplier: 1.5929644523837296


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.82it/s, loss=0.056945, elapsed time=0.04, total time=6.71]
[I 2025-06-08 00:25:31,662] Trial 832 finished with value: 0.056944910660970335 and parameters: {'learning_rate': 0.021505100178253918, 'sigma_multiplier': 0.501553790743859, 'initialization_multiplier': 1.5929644523837296}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 832 final loss: 0.05694491
Trial 833:
  Learning Rate: 0.00011236504492141057
  Sigma Multiplier: 0.5969527988641805
  Initialization Multiplier: 0.7632902363359122


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.33it/s, loss=0.072836, elapsed time=0.04, total time=6.3] 
[I 2025-06-08 00:25:38,003] Trial 833 finished with value: 0.07283632293617737 and parameters: {'learning_rate': 0.00011236504492141057, 'sigma_multiplier': 0.5969527988641805, 'initialization_multiplier': 0.7632902363359122}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 833 final loss: 0.07283632
Trial 834:
  Learning Rate: 0.04028537621290199
  Sigma Multiplier: 0.5415729498656047
  Initialization Multiplier: 0.71719857843816


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.86it/s, loss=-0.003594, elapsed time=0.04, total time=6.43]
[I 2025-06-08 00:25:44,466] Trial 834 finished with value: -0.0035937677106643163 and parameters: {'learning_rate': 0.04028537621290199, 'sigma_multiplier': 0.5415729498656047, 'initialization_multiplier': 0.71719857843816}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 834 final loss: -0.00359377
Trial 835:
  Learning Rate: 0.001655469982600821
  Sigma Multiplier: 0.6698784697955389
  Initialization Multiplier: 0.7869313177813837


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.95it/s, loss=0.000085, elapsed time=0.04, total time=6.15] 
[I 2025-06-08 00:25:50,657] Trial 835 finished with value: 8.468093935661586e-05 and parameters: {'learning_rate': 0.001655469982600821, 'sigma_multiplier': 0.6698784697955389, 'initialization_multiplier': 0.7869313177813837}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 835 final loss: 0.00008468
Trial 836:
  Learning Rate: 0.01649562036874656
  Sigma Multiplier: 0.5642540383504744
  Initialization Multiplier: 0.7009646129869064


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.99it/s, loss=-0.003567, elapsed time=0.04, total time=6.39]
[I 2025-06-08 00:25:57,083] Trial 836 finished with value: -0.003566719741008312 and parameters: {'learning_rate': 0.01649562036874656, 'sigma_multiplier': 0.5642540383504744, 'initialization_multiplier': 0.7009646129869064}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 836 final loss: -0.00356672
Trial 837:
  Learning Rate: 0.032358304783321495
  Sigma Multiplier: 0.628045290674989
  Initialization Multiplier: 1.0210786329434811


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.05it/s, loss=-0.003052, elapsed time=0.04, total time=6.37]
[I 2025-06-08 00:26:03,491] Trial 837 finished with value: -0.0030524766565099446 and parameters: {'learning_rate': 0.032358304783321495, 'sigma_multiplier': 0.628045290674989, 'initialization_multiplier': 1.0210786329434811}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 837 final loss: -0.00305248
Trial 838:
  Learning Rate: 0.027079795037745942
  Sigma Multiplier: 0.715350718145634
  Initialization Multiplier: 0.8180318030867672


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.46it/s, loss=-0.003503, elapsed time=0.04, total time=6.02]
[I 2025-06-08 00:26:09,553] Trial 838 finished with value: -0.0035034216795657605 and parameters: {'learning_rate': 0.027079795037745942, 'sigma_multiplier': 0.715350718145634, 'initialization_multiplier': 0.8180318030867672}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 838 final loss: -0.00350342
Trial 839:
  Learning Rate: 0.01984891255880698
  Sigma Multiplier: 0.4500516768872992
  Initialization Multiplier: 0.7554776288190901


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.73it/s, loss=-0.003033, elapsed time=0.04, total time=6.73]
[I 2025-06-08 00:26:16,324] Trial 839 finished with value: -0.0030330719866888457 and parameters: {'learning_rate': 0.01984891255880698, 'sigma_multiplier': 0.4500516768872992, 'initialization_multiplier': 0.7554776288190901}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 839 final loss: -0.00303307
Trial 840:
  Learning Rate: 0.02382748947227738
  Sigma Multiplier: 0.5160268862547955
  Initialization Multiplier: 0.9411680475478583


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.28it/s, loss=-0.003446, elapsed time=0.04, total time=6.58]
[I 2025-06-08 00:26:22,947] Trial 840 finished with value: -0.0034464162367260675 and parameters: {'learning_rate': 0.02382748947227738, 'sigma_multiplier': 0.5160268862547955, 'initialization_multiplier': 0.9411680475478583}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 840 final loss: -0.00344642
Trial 841:
  Learning Rate: 0.035548612271534184
  Sigma Multiplier: 0.6053549763240226
  Initialization Multiplier: 0.8629095360656732


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.28it/s, loss=-0.003788, elapsed time=0.04, total time=6.31]
[I 2025-06-08 00:26:29,301] Trial 841 finished with value: -0.00378820013878497 and parameters: {'learning_rate': 0.035548612271534184, 'sigma_multiplier': 0.6053549763240226, 'initialization_multiplier': 0.8629095360656732}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 841 final loss: -0.00378820
Trial 842:
  Learning Rate: 0.028846559159963148
  Sigma Multiplier: 0.5512970399741117
  Initialization Multiplier: 0.6912851852509961


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.73it/s, loss=-0.004004, elapsed time=0.04, total time=6.46]
[I 2025-06-08 00:26:35,794] Trial 842 finished with value: -0.00400352205209691 and parameters: {'learning_rate': 0.028846559159963148, 'sigma_multiplier': 0.5512970399741117, 'initialization_multiplier': 0.6912851852509961}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 842 final loss: -0.00400352
Trial 843:
  Learning Rate: 0.022646456688942163
  Sigma Multiplier: 0.4878869966528212
  Initialization Multiplier: 0.738034673504257


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.10it/s, loss=-0.003496, elapsed time=0.04, total time=6.63]
[I 2025-06-08 00:26:42,461] Trial 843 finished with value: -0.0034962827945156745 and parameters: {'learning_rate': 0.022646456688942163, 'sigma_multiplier': 0.4878869966528212, 'initialization_multiplier': 0.738034673504257}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 843 final loss: -0.00349628
Trial 844:
  Learning Rate: 0.027662338767230732
  Sigma Multiplier: 0.5359787132363109
  Initialization Multiplier: 0.7019694004180709


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.31it/s, loss=-0.003018, elapsed time=0.04, total time=6.57]
[I 2025-06-08 00:26:49,069] Trial 844 finished with value: -0.003017694326450264 and parameters: {'learning_rate': 0.027662338767230732, 'sigma_multiplier': 0.5359787132363109, 'initialization_multiplier': 0.7019694004180709}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 844 final loss: -0.00301769
Trial 845:
  Learning Rate: 0.01797518746029694
  Sigma Multiplier: 0.4727976595211009
  Initialization Multiplier: 0.8056879863532767


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.99it/s, loss=-0.002887, elapsed time=0.04, total time=6.65]
[I 2025-06-08 00:26:55,766] Trial 845 finished with value: -0.0028872496958062947 and parameters: {'learning_rate': 0.01797518746029694, 'sigma_multiplier': 0.4727976595211009, 'initialization_multiplier': 0.8056879863532767}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 845 final loss: -0.00288725
Trial 846:
  Learning Rate: 0.011064097779110268
  Sigma Multiplier: 0.5231799534567004
  Initialization Multiplier: 0.7357392323567735


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.23it/s, loss=-0.003014, elapsed time=0.04, total time=6.6] 
[I 2025-06-08 00:27:02,408] Trial 846 finished with value: -0.003013626031743246 and parameters: {'learning_rate': 0.011064097779110268, 'sigma_multiplier': 0.5231799534567004, 'initialization_multiplier': 0.7357392323567735}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 846 final loss: -0.00301363
Trial 847:
  Learning Rate: 0.02514642961684369
  Sigma Multiplier: 0.5577488562747238
  Initialization Multiplier: 0.7028536808676008


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.77it/s, loss=-0.003712, elapsed time=0.04, total time=6.44]
[I 2025-06-08 00:27:08,892] Trial 847 finished with value: -0.0037120962051520358 and parameters: {'learning_rate': 0.02514642961684369, 'sigma_multiplier': 0.5577488562747238, 'initialization_multiplier': 0.7028536808676008}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 847 final loss: -0.00371210
Trial 848:
  Learning Rate: 0.02967112508429562
  Sigma Multiplier: 0.40500900753736535
  Initialization Multiplier: 0.7518702462056768


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.63it/s, loss=-0.002803, elapsed time=0.04, total time=6.76]
[I 2025-06-08 00:27:15,693] Trial 848 finished with value: -0.0028025491882885875 and parameters: {'learning_rate': 0.02967112508429562, 'sigma_multiplier': 0.40500900753736535, 'initialization_multiplier': 0.7518702462056768}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 848 final loss: -0.00280255
Trial 849:
  Learning Rate: 0.015205771464793627
  Sigma Multiplier: 0.5039207095819349
  Initialization Multiplier: 0.6874034589247846


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.34it/s, loss=-0.002539, elapsed time=0.04, total time=6.57]
[I 2025-06-08 00:27:22,304] Trial 849 finished with value: -0.002538822158295134 and parameters: {'learning_rate': 0.015205771464793627, 'sigma_multiplier': 0.5039207095819349, 'initialization_multiplier': 0.6874034589247846}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 849 final loss: -0.00253882
Trial 850:
  Learning Rate: 0.020068585534662106
  Sigma Multiplier: 0.57038131919422
  Initialization Multiplier: 0.7889071319162607


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.89it/s, loss=-0.003589, elapsed time=0.04, total time=6.41]
[I 2025-06-08 00:27:28,751] Trial 850 finished with value: -0.0035889611892738075 and parameters: {'learning_rate': 0.020068585534662106, 'sigma_multiplier': 0.57038131919422, 'initialization_multiplier': 0.7889071319162607}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 850 final loss: -0.00358896
Trial 851:
  Learning Rate: 0.04166038014969597
  Sigma Multiplier: 0.5377308576740782
  Initialization Multiplier: 0.6797684370638334


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.17it/s, loss=-0.003814, elapsed time=0.04, total time=6.61]
[I 2025-06-08 00:27:35,399] Trial 851 finished with value: -0.0038139253391807747 and parameters: {'learning_rate': 0.04166038014969597, 'sigma_multiplier': 0.5377308576740782, 'initialization_multiplier': 0.6797684370638334}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 851 final loss: -0.00381393
Trial 852:
  Learning Rate: 0.033252450317757684
  Sigma Multiplier: 0.4725010136848201
  Initialization Multiplier: 0.9009136402541854


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.92it/s, loss=-0.003011, elapsed time=0.04, total time=6.68]
[I 2025-06-08 00:27:42,118] Trial 852 finished with value: -0.0030105305924861847 and parameters: {'learning_rate': 0.033252450317757684, 'sigma_multiplier': 0.4725010136848201, 'initialization_multiplier': 0.9009136402541854}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 852 final loss: -0.00301053
Trial 853:
  Learning Rate: 0.049413020745960816
  Sigma Multiplier: 0.4413325552391649
  Initialization Multiplier: 0.7236428749238735


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.72it/s, loss=-0.003079, elapsed time=0.04, total time=6.74]
[I 2025-06-08 00:27:48,894] Trial 853 finished with value: -0.003079075846603269 and parameters: {'learning_rate': 0.049413020745960816, 'sigma_multiplier': 0.4413325552391649, 'initialization_multiplier': 0.7236428749238735}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 853 final loss: -0.00307908
Trial 854:
  Learning Rate: 0.02433011235881822
  Sigma Multiplier: 0.5829775249762076
  Initialization Multiplier: 0.7717835068997656


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.68it/s, loss=-0.003880, elapsed time=0.04, total time=6.76]
[I 2025-06-08 00:27:55,692] Trial 854 finished with value: -0.00387985129942222 and parameters: {'learning_rate': 0.02433011235881822, 'sigma_multiplier': 0.5829775249762076, 'initialization_multiplier': 0.7717835068997656}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 854 final loss: -0.00387985
Trial 855:
  Learning Rate: 0.028364166251672256
  Sigma Multiplier: 0.5108263259876088
  Initialization Multiplier: 0.6895354407384608


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.97it/s, loss=-0.003471, elapsed time=0.04, total time=6.68]
[I 2025-06-08 00:28:02,412] Trial 855 finished with value: -0.003471377209025297 and parameters: {'learning_rate': 0.028364166251672256, 'sigma_multiplier': 0.5108263259876088, 'initialization_multiplier': 0.6895354407384608}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 855 final loss: -0.00347138
Trial 856:
  Learning Rate: 0.03902926494221854
  Sigma Multiplier: 0.5541802829745359
  Initialization Multiplier: 0.8070959965478486


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.44it/s, loss=-0.003654, elapsed time=0.04, total time=6.55]
[I 2025-06-08 00:28:08,999] Trial 856 finished with value: -0.0036540603553720298 and parameters: {'learning_rate': 0.03902926494221854, 'sigma_multiplier': 0.5541802829745359, 'initialization_multiplier': 0.8070959965478486}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 856 final loss: -0.00365406
Trial 857:
  Learning Rate: 0.02051515991774553
  Sigma Multiplier: 0.613763204398417
  Initialization Multiplier: 0.8430133111407002


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.51it/s, loss=-0.003475, elapsed time=0.04, total time=6.52]
[I 2025-06-08 00:28:15,561] Trial 857 finished with value: -0.0034751992515274165 and parameters: {'learning_rate': 0.02051515991774553, 'sigma_multiplier': 0.613763204398417, 'initialization_multiplier': 0.8430133111407002}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 857 final loss: -0.00347520
Trial 858:
  Learning Rate: 0.03230611820500866
  Sigma Multiplier: 1.6880913912368514
  Initialization Multiplier: 0.7256120701575168


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 29.04it/s, loss=-0.001053, elapsed time=0.04, total time=5.31]
[I 2025-06-08 00:28:20,917] Trial 858 finished with value: -0.0010534460375420261 and parameters: {'learning_rate': 0.03230611820500866, 'sigma_multiplier': 1.6880913912368514, 'initialization_multiplier': 0.7256120701575168}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 858 final loss: -0.00105345
Trial 859:
  Learning Rate: 0.022764338628847365
  Sigma Multiplier: 0.5793938117288528
  Initialization Multiplier: 0.6799788114694395


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.56it/s, loss=-0.003862, elapsed time=0.04, total time=6.53]
[I 2025-06-08 00:28:27,491] Trial 859 finished with value: -0.003862442281847495 and parameters: {'learning_rate': 0.022764338628847365, 'sigma_multiplier': 0.5793938117288528, 'initialization_multiplier': 0.6799788114694395}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 859 final loss: -0.00386244
Trial 860:
  Learning Rate: 0.005011156344002316
  Sigma Multiplier: 0.5042873371662461
  Initialization Multiplier: 0.747078451212218


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.32it/s, loss=-0.002616, elapsed time=0.04, total time=6.58]
[I 2025-06-08 00:28:34,106] Trial 860 finished with value: -0.00261566315155405 and parameters: {'learning_rate': 0.005011156344002316, 'sigma_multiplier': 0.5042873371662461, 'initialization_multiplier': 0.747078451212218}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 860 final loss: -0.00261566
Trial 861:
  Learning Rate: 0.028416435977846377
  Sigma Multiplier: 0.6388812016708004
  Initialization Multiplier: 0.6660530146051047


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.51it/s, loss=-0.003641, elapsed time=0.04, total time=6.25]
[I 2025-06-08 00:28:40,394] Trial 861 finished with value: -0.0036405114617266486 and parameters: {'learning_rate': 0.028416435977846377, 'sigma_multiplier': 0.6388812016708004, 'initialization_multiplier': 0.6660530146051047}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 861 final loss: -0.00364051
Trial 862:
  Learning Rate: 0.03748230725032764
  Sigma Multiplier: 0.5475121418379184
  Initialization Multiplier: 0.7086914283884082


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.81it/s, loss=-0.003712, elapsed time=0.04, total time=6.44]
[I 2025-06-08 00:28:46,877] Trial 862 finished with value: -0.0037120420068338105 and parameters: {'learning_rate': 0.03748230725032764, 'sigma_multiplier': 0.5475121418379184, 'initialization_multiplier': 0.7086914283884082}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 862 final loss: -0.00371204
Trial 863:
  Learning Rate: 0.017616129029805815
  Sigma Multiplier: 1.3195477170179646
  Initialization Multiplier: 0.7771640415708272


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 29.97it/s, loss=-0.001568, elapsed time=0.04, total time=5.14]
[I 2025-06-08 00:28:52,059] Trial 863 finished with value: -0.001567836822073256 and parameters: {'learning_rate': 0.017616129029805815, 'sigma_multiplier': 1.3195477170179646, 'initialization_multiplier': 0.7771640415708272}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 863 final loss: -0.00156784
Trial 864:
  Learning Rate: 0.025989057373900747
  Sigma Multiplier: 0.5996547890847452
  Initialization Multiplier: 0.6606920148569332


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.39it/s, loss=-0.003924, elapsed time=0.04, total time=6.29]
[I 2025-06-08 00:28:58,401] Trial 864 finished with value: -0.003924100739194999 and parameters: {'learning_rate': 0.025989057373900747, 'sigma_multiplier': 0.5996547890847452, 'initialization_multiplier': 0.6606920148569332}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 864 final loss: -0.00392410
Trial 865:
  Learning Rate: 0.047272264117995476
  Sigma Multiplier: 0.4637811021475855
  Initialization Multiplier: 0.835201527826516


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.96it/s, loss=-0.003277, elapsed time=0.04, total time=6.66]
[I 2025-06-08 00:29:05,105] Trial 865 finished with value: -0.003276510040844686 and parameters: {'learning_rate': 0.047272264117995476, 'sigma_multiplier': 0.4637811021475855, 'initialization_multiplier': 0.835201527826516}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 865 final loss: -0.00327651
Trial 866:
  Learning Rate: 0.03110268826159759
  Sigma Multiplier: 0.5337595026146159
  Initialization Multiplier: 0.7250304129761135


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.85it/s, loss=-0.003619, elapsed time=0.04, total time=6.7] 
[I 2025-06-08 00:29:11,845] Trial 866 finished with value: -0.003619068379632042 and parameters: {'learning_rate': 0.03110268826159759, 'sigma_multiplier': 0.5337595026146159, 'initialization_multiplier': 0.7250304129761135}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 866 final loss: -0.00361907
Trial 867:
  Learning Rate: 0.02036484200059506
  Sigma Multiplier: 0.6656679288297483
  Initialization Multiplier: 0.7639853199764247


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.77it/s, loss=-0.003529, elapsed time=0.04, total time=6.19]
[I 2025-06-08 00:29:18,074] Trial 867 finished with value: -0.0035290776942367673 and parameters: {'learning_rate': 0.02036484200059506, 'sigma_multiplier': 0.6656679288297483, 'initialization_multiplier': 0.7639853199764247}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 867 final loss: -0.00352908
Trial 868:
  Learning Rate: 0.02534325810697039
  Sigma Multiplier: 0.5730484343445256
  Initialization Multiplier: 0.6707310355100952


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.98it/s, loss=-0.003789, elapsed time=0.04, total time=6.4] 
[I 2025-06-08 00:29:24,511] Trial 868 finished with value: -0.003789342736752004 and parameters: {'learning_rate': 0.02534325810697039, 'sigma_multiplier': 0.5730484343445256, 'initialization_multiplier': 0.6707310355100952}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 868 final loss: -0.00378934
Trial 869:
  Learning Rate: 0.056149013758531166
  Sigma Multiplier: 0.35383965243414534
  Initialization Multiplier: 0.8711241499748421


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.38it/s, loss=-0.001862, elapsed time=0.04, total time=6.85]
[I 2025-06-08 00:29:31,395] Trial 869 finished with value: -0.0018617850827084678 and parameters: {'learning_rate': 0.056149013758531166, 'sigma_multiplier': 0.35383965243414534, 'initialization_multiplier': 0.8711241499748421}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 869 final loss: -0.00186179
Trial 870:
  Learning Rate: 0.03505385865002976
  Sigma Multiplier: 0.6219345927485104
  Initialization Multiplier: 0.7032776880702584


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.49it/s, loss=-0.003699, elapsed time=0.04, total time=6.26]
[I 2025-06-08 00:29:37,698] Trial 870 finished with value: -0.003699285760164588 and parameters: {'learning_rate': 0.03505385865002976, 'sigma_multiplier': 0.6219345927485104, 'initialization_multiplier': 0.7032776880702584}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 870 final loss: -0.00369929
Trial 871:
  Learning Rate: 0.042129212694455645
  Sigma Multiplier: 0.49732169462617876
  Initialization Multiplier: 0.8130021083747077


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.30it/s, loss=-0.003589, elapsed time=0.04, total time=6.58]
[I 2025-06-08 00:29:44,318] Trial 871 finished with value: -0.0035889229043843426 and parameters: {'learning_rate': 0.042129212694455645, 'sigma_multiplier': 0.49732169462617876, 'initialization_multiplier': 0.8130021083747077}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 871 final loss: -0.00358892
Trial 872:
  Learning Rate: 0.02336607980654257
  Sigma Multiplier: 0.7682431038572797
  Initialization Multiplier: 0.6546053545434865


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 26.00it/s, loss=-0.003096, elapsed time=0.04, total time=5.9] 
[I 2025-06-08 00:29:50,254] Trial 872 finished with value: -0.0030964891365395327 and parameters: {'learning_rate': 0.02336607980654257, 'sigma_multiplier': 0.7682431038572797, 'initialization_multiplier': 0.6546053545434865}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 872 final loss: -0.00309649
Trial 873:
  Learning Rate: 0.028482089309792413
  Sigma Multiplier: 0.5359050569990951
  Initialization Multiplier: 0.1210577669957168


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.52it/s, loss=-0.002030, elapsed time=0.04, total time=6.52]
[I 2025-06-08 00:29:56,815] Trial 873 finished with value: -0.0020300032694121986 and parameters: {'learning_rate': 0.028482089309792413, 'sigma_multiplier': 0.5359050569990951, 'initialization_multiplier': 0.1210577669957168}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 873 final loss: -0.00203000
Trial 874:
  Learning Rate: 0.018138058791714852
  Sigma Multiplier: 0.5833321441800245
  Initialization Multiplier: 0.903823762451265


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.84it/s, loss=-0.003674, elapsed time=0.04, total time=6.43]
[I 2025-06-08 00:30:03,284] Trial 874 finished with value: -0.003673774937384696 and parameters: {'learning_rate': 0.018138058791714852, 'sigma_multiplier': 0.5833321441800245, 'initialization_multiplier': 0.903823762451265}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 874 final loss: -0.00367377
Trial 875:
  Learning Rate: 0.03358441211614568
  Sigma Multiplier: 0.6166166413921332
  Initialization Multiplier: 0.9677820872647955


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.21it/s, loss=-0.003219, elapsed time=0.04, total time=6.33]
[I 2025-06-08 00:30:09,655] Trial 875 finished with value: -0.0032192093008649733 and parameters: {'learning_rate': 0.03358441211614568, 'sigma_multiplier': 0.6166166413921332, 'initialization_multiplier': 0.9677820872647955}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 875 final loss: -0.00321921
Trial 876:
  Learning Rate: 0.014902939478885897
  Sigma Multiplier: 0.4277410823656247
  Initialization Multiplier: 0.7260555359744244


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.62it/s, loss=-0.002776, elapsed time=0.04, total time=6.76]
[I 2025-06-08 00:30:16,455] Trial 876 finished with value: -0.0027763920835407988 and parameters: {'learning_rate': 0.014902939478885897, 'sigma_multiplier': 0.4277410823656247, 'initialization_multiplier': 0.7260555359744244}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 876 final loss: -0.00277639
Trial 877:
  Learning Rate: 0.0012295424128078818
  Sigma Multiplier: 0.5519729378945322
  Initialization Multiplier: 0.757299268989489


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.49it/s, loss=0.007160, elapsed time=0.04, total time=6.52]
[I 2025-06-08 00:30:23,019] Trial 877 finished with value: 0.007160172636817843 and parameters: {'learning_rate': 0.0012295424128078818, 'sigma_multiplier': 0.5519729378945322, 'initialization_multiplier': 0.757299268989489}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 877 final loss: 0.00716017
Trial 878:
  Learning Rate: 0.021932665037978492
  Sigma Multiplier: 0.6975614510061192
  Initialization Multiplier: 0.6845174453516565


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.22it/s, loss=-0.003415, elapsed time=0.04, total time=6.09]
[I 2025-06-08 00:30:29,147] Trial 878 finished with value: -0.003414695503648021 and parameters: {'learning_rate': 0.021932665037978492, 'sigma_multiplier': 0.6975614510061192, 'initialization_multiplier': 0.6845174453516565}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 878 final loss: -0.00341470
Trial 879:
  Learning Rate: 0.04027363497454865
  Sigma Multiplier: 0.47424057302564726
  Initialization Multiplier: 0.6575056357936072


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.01it/s, loss=-0.003155, elapsed time=0.04, total time=6.65]
[I 2025-06-08 00:30:35,833] Trial 879 finished with value: -0.0031551546569825676 and parameters: {'learning_rate': 0.04027363497454865, 'sigma_multiplier': 0.47424057302564726, 'initialization_multiplier': 0.6575056357936072}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 879 final loss: -0.00315515
Trial 880:
  Learning Rate: 0.02628206760195002
  Sigma Multiplier: 0.5202551110436526
  Initialization Multiplier: 0.7839261823214609


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.32it/s, loss=-0.003365, elapsed time=0.04, total time=6.57]
[I 2025-06-08 00:30:42,442] Trial 880 finished with value: -0.0033652493193983197 and parameters: {'learning_rate': 0.02628206760195002, 'sigma_multiplier': 0.5202551110436526, 'initialization_multiplier': 0.7839261823214609}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 880 final loss: -0.00336525
Trial 881:
  Learning Rate: 0.030793765765416477
  Sigma Multiplier: 0.5851173788958992
  Initialization Multiplier: 0.7079725288150371


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.00it/s, loss=-0.003595, elapsed time=0.04, total time=6.39]
[I 2025-06-08 00:30:48,867] Trial 881 finished with value: -0.003595368056256587 and parameters: {'learning_rate': 0.030793765765416477, 'sigma_multiplier': 0.5851173788958992, 'initialization_multiplier': 0.7079725288150371}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 881 final loss: -0.00359537
Trial 882:
  Learning Rate: 0.022279470595536192
  Sigma Multiplier: 0.6493987091517072
  Initialization Multiplier: 0.6457176277493237


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.65it/s, loss=-0.003426, elapsed time=0.04, total time=6.22]
[I 2025-06-08 00:30:55,127] Trial 882 finished with value: -0.003425709021053345 and parameters: {'learning_rate': 0.022279470595536192, 'sigma_multiplier': 0.6493987091517072, 'initialization_multiplier': 0.6457176277493237}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 882 final loss: -0.00342571
Trial 883:
  Learning Rate: 0.012236256061471587
  Sigma Multiplier: 0.5001680117560509
  Initialization Multiplier: 0.7457163322426087


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.69it/s, loss=-0.002836, elapsed time=0.04, total time=6.75]
[I 2025-06-08 00:31:01,914] Trial 883 finished with value: -0.0028359663131306352 and parameters: {'learning_rate': 0.012236256061471587, 'sigma_multiplier': 0.5001680117560509, 'initialization_multiplier': 0.7457163322426087}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 883 final loss: -0.00283597
Trial 884:
  Learning Rate: 4.224403674128384e-05
  Sigma Multiplier: 0.5577988257581545
  Initialization Multiplier: 0.6873175366497217


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.45it/s, loss=0.061361, elapsed time=0.04, total time=6.53]
[I 2025-06-08 00:31:08,493] Trial 884 finished with value: 0.06136115942328672 and parameters: {'learning_rate': 4.224403674128384e-05, 'sigma_multiplier': 0.5577988257581545, 'initialization_multiplier': 0.6873175366497217}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 884 final loss: 0.06136116
Trial 885:
  Learning Rate: 0.036468334013543116
  Sigma Multiplier: 0.626026464068183
  Initialization Multiplier: 0.6243912823523333


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.39it/s, loss=-0.003663, elapsed time=0.04, total time=6.28]
[I 2025-06-08 00:31:14,811] Trial 885 finished with value: -0.003662504989956311 and parameters: {'learning_rate': 0.036468334013543116, 'sigma_multiplier': 0.626026464068183, 'initialization_multiplier': 0.6243912823523333}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 885 final loss: -0.00366250
Trial 886:
  Learning Rate: 0.028280902867793146
  Sigma Multiplier: 0.5934105986123018
  Initialization Multiplier: 0.8717770528107129


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.93it/s, loss=-0.003765, elapsed time=0.04, total time=6.41]
[I 2025-06-08 00:31:21,263] Trial 886 finished with value: -0.0037653028176153113 and parameters: {'learning_rate': 0.028280902867793146, 'sigma_multiplier': 0.5934105986123018, 'initialization_multiplier': 0.8717770528107129}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 886 final loss: -0.00376530
Trial 887:
  Learning Rate: 0.019216156756947638
  Sigma Multiplier: 0.5334353471483382
  Initialization Multiplier: 1.0782881641693167


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.08it/s, loss=-0.002370, elapsed time=0.04, total time=6.63]
[I 2025-06-08 00:31:27,935] Trial 887 finished with value: -0.002370007752265526 and parameters: {'learning_rate': 0.019216156756947638, 'sigma_multiplier': 0.5334353471483382, 'initialization_multiplier': 1.0782881641693167}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 887 final loss: -0.00237001
Trial 888:
  Learning Rate: 0.04695855274356985
  Sigma Multiplier: 0.4484751132002816
  Initialization Multiplier: 0.7950022044785585


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.70it/s, loss=-0.003174, elapsed time=0.04, total time=6.75]
[I 2025-06-08 00:31:34,722] Trial 888 finished with value: -0.0031741311595497347 and parameters: {'learning_rate': 0.04695855274356985, 'sigma_multiplier': 0.4484751132002816, 'initialization_multiplier': 0.7950022044785585}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 888 final loss: -0.00317413
Trial 889:
  Learning Rate: 0.03248117833201339
  Sigma Multiplier: 0.5576098149182721
  Initialization Multiplier: 0.6588967751492888


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.58it/s, loss=-0.003402, elapsed time=0.04, total time=6.49]
[I 2025-06-08 00:31:41,248] Trial 889 finished with value: -0.003402170510797639 and parameters: {'learning_rate': 0.03248117833201339, 'sigma_multiplier': 0.5576098149182721, 'initialization_multiplier': 0.6588967751492888}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 889 final loss: -0.00340217
Trial 890:
  Learning Rate: 0.026199693899003736
  Sigma Multiplier: 0.4999370030937177
  Initialization Multiplier: 0.8264340143762346


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.96it/s, loss=-0.003131, elapsed time=0.04, total time=6.67]
[I 2025-06-08 00:31:47,958] Trial 890 finished with value: -0.003131196667366519 and parameters: {'learning_rate': 0.026199693899003736, 'sigma_multiplier': 0.4999370030937177, 'initialization_multiplier': 0.8264340143762346}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 890 final loss: -0.00313120
Trial 891:
  Learning Rate: 0.023472194922310834
  Sigma Multiplier: 0.670469337848421
  Initialization Multiplier: 1.990631999498771


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.81it/s, loss=0.093975, elapsed time=0.04, total time=6.43]
[I 2025-06-08 00:31:54,428] Trial 891 finished with value: 0.09397495595379775 and parameters: {'learning_rate': 0.023472194922310834, 'sigma_multiplier': 0.670469337848421, 'initialization_multiplier': 1.990631999498771}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 891 final loss: 0.09397496
Trial 892:
  Learning Rate: 0.039752499946624484
  Sigma Multiplier: 0.5886590941455687
  Initialization Multiplier: 0.5958117807167023


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.76it/s, loss=-0.003307, elapsed time=0.04, total time=6.44]
[I 2025-06-08 00:32:00,910] Trial 892 finished with value: -0.003307348140129871 and parameters: {'learning_rate': 0.039752499946624484, 'sigma_multiplier': 0.5886590941455687, 'initialization_multiplier': 0.5958117807167023}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 892 final loss: -0.00330735
Trial 893:
  Learning Rate: 0.01717053144797317
  Sigma Multiplier: 0.6167246215353095
  Initialization Multiplier: 0.719414390578142


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.08it/s, loss=-0.003338, elapsed time=0.04, total time=6.36]
[I 2025-06-08 00:32:07,307] Trial 893 finished with value: -0.003338023720347799 and parameters: {'learning_rate': 0.01717053144797317, 'sigma_multiplier': 0.6167246215353095, 'initialization_multiplier': 0.719414390578142}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 893 final loss: -0.00333802
Trial 894:
  Learning Rate: 0.030073305926446938
  Sigma Multiplier: 0.5172073795097187
  Initialization Multiplier: 0.6370428703578027


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.21it/s, loss=-0.003576, elapsed time=0.04, total time=6.59]
[I 2025-06-08 00:32:13,943] Trial 894 finished with value: -0.0035757619661118674 and parameters: {'learning_rate': 0.030073305926446938, 'sigma_multiplier': 0.5172073795097187, 'initialization_multiplier': 0.6370428703578027}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 894 final loss: -0.00357576
Trial 895:
  Learning Rate: 0.056372682290444676
  Sigma Multiplier: 0.5609373527327125
  Initialization Multiplier: 0.6954492432949826


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.59it/s, loss=-0.003747, elapsed time=0.04, total time=6.49]
[I 2025-06-08 00:32:20,475] Trial 895 finished with value: -0.003747259332595416 and parameters: {'learning_rate': 0.056372682290444676, 'sigma_multiplier': 0.5609373527327125, 'initialization_multiplier': 0.6954492432949826}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 895 final loss: -0.00374726
Trial 896:
  Learning Rate: 0.034859887590386614
  Sigma Multiplier: 0.47732319503958776
  Initialization Multiplier: 0.7539769934643462


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.04it/s, loss=-0.003561, elapsed time=0.04, total time=6.65]
[I 2025-06-08 00:32:27,163] Trial 896 finished with value: -0.0035610403776606923 and parameters: {'learning_rate': 0.034859887590386614, 'sigma_multiplier': 0.47732319503958776, 'initialization_multiplier': 0.7539769934643462}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 896 final loss: -0.00356104
Trial 897:
  Learning Rate: 0.02120983033555165
  Sigma Multiplier: 0.6540212927093507
  Initialization Multiplier: 0.5802832497737332


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.61it/s, loss=-0.003502, elapsed time=0.04, total time=6.24]
[I 2025-06-08 00:32:33,441] Trial 897 finished with value: -0.0035016103990217757 and parameters: {'learning_rate': 0.02120983033555165, 'sigma_multiplier': 0.6540212927093507, 'initialization_multiplier': 0.5802832497737332}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 897 final loss: -0.00350161
Trial 898:
  Learning Rate: 0.04675174787645606
  Sigma Multiplier: 0.5392147317452737
  Initialization Multiplier: 0.6674385837351072


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.40it/s, loss=-0.003755, elapsed time=0.04, total time=6.55]
[I 2025-06-08 00:32:40,035] Trial 898 finished with value: -0.003755052728577689 and parameters: {'learning_rate': 0.04675174787645606, 'sigma_multiplier': 0.5392147317452737, 'initialization_multiplier': 0.6674385837351072}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 898 final loss: -0.00375505
Trial 899:
  Learning Rate: 0.025886648969564215
  Sigma Multiplier: 0.602811307489659
  Initialization Multiplier: 0.6189032601893697


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.10it/s, loss=-0.003698, elapsed time=0.04, total time=6.36]
[I 2025-06-08 00:32:46,431] Trial 899 finished with value: -0.003697808872110585 and parameters: {'learning_rate': 0.025886648969564215, 'sigma_multiplier': 0.602811307489659, 'initialization_multiplier': 0.6189032601893697}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 899 final loss: -0.00369781
Trial 900:
  Learning Rate: 0.029993699394543768
  Sigma Multiplier: 0.5743325978452049
  Initialization Multiplier: 0.724913355381742


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.78it/s, loss=-0.003765, elapsed time=0.04, total time=6.45]
[I 2025-06-08 00:32:52,916] Trial 900 finished with value: -0.0037648195083452613 and parameters: {'learning_rate': 0.029993699394543768, 'sigma_multiplier': 0.5743325978452049, 'initialization_multiplier': 0.724913355381742}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 900 final loss: -0.00376482
Trial 901:
  Learning Rate: 0.03810006754178402
  Sigma Multiplier: 0.3989850426777687
  Initialization Multiplier: 0.923148647319608


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.55it/s, loss=-0.002672, elapsed time=0.04, total time=6.78]
[I 2025-06-08 00:32:59,739] Trial 901 finished with value: -0.0026720590215547546 and parameters: {'learning_rate': 0.03810006754178402, 'sigma_multiplier': 0.3989850426777687, 'initialization_multiplier': 0.923148647319608}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 901 final loss: -0.00267206
Trial 902:
  Learning Rate: 0.023983724555455034
  Sigma Multiplier: 0.5223895263457875
  Initialization Multiplier: 1.4531658910462315


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.52it/s, loss=0.064373, elapsed time=0.04, total time=6.8] 
[I 2025-06-08 00:33:06,577] Trial 902 finished with value: 0.06437266006746081 and parameters: {'learning_rate': 0.023983724555455034, 'sigma_multiplier': 0.5223895263457875, 'initialization_multiplier': 1.4531658910462315}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 902 final loss: 0.06437266
Trial 903:
  Learning Rate: 0.019751876848281145
  Sigma Multiplier: 0.6211798820660408
  Initialization Multiplier: 1.7058502979744865


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.42it/s, loss=0.091647, elapsed time=0.04, total time=6.54]
[I 2025-06-08 00:33:13,159] Trial 903 finished with value: 0.09164744217926077 and parameters: {'learning_rate': 0.019751876848281145, 'sigma_multiplier': 0.6211798820660408, 'initialization_multiplier': 1.7058502979744865}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 903 final loss: 0.09164744
Trial 904:
  Learning Rate: 0.03389927798508815
  Sigma Multiplier: 0.698917441167216
  Initialization Multiplier: 0.554705281493047


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.11it/s, loss=-0.003619, elapsed time=0.04, total time=6.1] 
[I 2025-06-08 00:33:19,303] Trial 904 finished with value: -0.0036191914431733707 and parameters: {'learning_rate': 0.03389927798508815, 'sigma_multiplier': 0.698917441167216, 'initialization_multiplier': 0.554705281493047}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 904 final loss: -0.00361919
Trial 905:
  Learning Rate: 0.014591529915503136
  Sigma Multiplier: 0.4734303019134881
  Initialization Multiplier: 0.7841537971544843


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.92it/s, loss=-0.003283, elapsed time=0.04, total time=6.67]
[I 2025-06-08 00:33:26,015] Trial 905 finished with value: -0.00328335273601424 and parameters: {'learning_rate': 0.014591529915503136, 'sigma_multiplier': 0.4734303019134881, 'initialization_multiplier': 0.7841537971544843}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 905 final loss: -0.00328335
Trial 906:
  Learning Rate: 0.06624532330003588
  Sigma Multiplier: 0.5594337445032287
  Initialization Multiplier: 0.656295612006223


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.72it/s, loss=-0.003500, elapsed time=0.04, total time=6.46]
[I 2025-06-08 00:33:32,515] Trial 906 finished with value: -0.0034995613000590596 and parameters: {'learning_rate': 0.06624532330003588, 'sigma_multiplier': 0.5594337445032287, 'initialization_multiplier': 0.656295612006223}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 906 final loss: -0.00349956
Trial 907:
  Learning Rate: 0.027996042821730762
  Sigma Multiplier: 0.5115734436765524
  Initialization Multiplier: 0.8446754079330636


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.23it/s, loss=-0.003341, elapsed time=0.05, total time=6.59]
[I 2025-06-08 00:33:39,142] Trial 907 finished with value: -0.0033411247480550966 and parameters: {'learning_rate': 0.027996042821730762, 'sigma_multiplier': 0.5115734436765524, 'initialization_multiplier': 0.8446754079330636}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 907 final loss: -0.00334112
Trial 908:
  Learning Rate: 0.04317477377876097
  Sigma Multiplier: 0.5939475568664511
  Initialization Multiplier: 0.6928497699306235


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.92it/s, loss=-0.003766, elapsed time=0.04, total time=6.42]
[I 2025-06-08 00:33:45,607] Trial 908 finished with value: -0.00376649216980982 and parameters: {'learning_rate': 0.04317477377876097, 'sigma_multiplier': 0.5939475568664511, 'initialization_multiplier': 0.6928497699306235}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 908 final loss: -0.00376649
Trial 909:
  Learning Rate: 0.022858357516934196
  Sigma Multiplier: 0.6489038478210751
  Initialization Multiplier: 0.6130132659236535


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.29it/s, loss=-0.003710, elapsed time=0.04, total time=6.32]
[I 2025-06-08 00:33:51,964] Trial 909 finished with value: -0.003709669494179384 and parameters: {'learning_rate': 0.022858357516934196, 'sigma_multiplier': 0.6489038478210751, 'initialization_multiplier': 0.6130132659236535}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 909 final loss: -0.00370967
Trial 910:
  Learning Rate: 0.03274097153309899
  Sigma Multiplier: 0.4347588366505819
  Initialization Multiplier: 0.5833662800699844


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.51it/s, loss=-0.003354, elapsed time=0.04, total time=6.8] 
[I 2025-06-08 00:33:58,804] Trial 910 finished with value: -0.0033539352394694644 and parameters: {'learning_rate': 0.03274097153309899, 'sigma_multiplier': 0.4347588366505819, 'initialization_multiplier': 0.5833662800699844}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 910 final loss: -0.00335394
Trial 911:
  Learning Rate: 0.01777241259946149
  Sigma Multiplier: 0.55253851357456
  Initialization Multiplier: 0.7357253886836017


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.55it/s, loss=-0.003331, elapsed time=0.04, total time=6.51]
[I 2025-06-08 00:34:05,350] Trial 911 finished with value: -0.0033308555771828126 and parameters: {'learning_rate': 0.01777241259946149, 'sigma_multiplier': 0.55253851357456, 'initialization_multiplier': 0.7357253886836017}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 911 final loss: -0.00333086
Trial 912:
  Learning Rate: 0.026457527246997608
  Sigma Multiplier: 0.6166657581681005
  Initialization Multiplier: 0.648799212971675


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.41it/s, loss=-0.003693, elapsed time=0.04, total time=6.28]
[I 2025-06-08 00:34:11,669] Trial 912 finished with value: -0.0036925555922425017 and parameters: {'learning_rate': 0.026457527246997608, 'sigma_multiplier': 0.6166657581681005, 'initialization_multiplier': 0.648799212971675}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 912 final loss: -0.00369256
Trial 913:
  Learning Rate: 0.035867324253150826
  Sigma Multiplier: 0.49706854452066596
  Initialization Multiplier: 0.7658040329916842


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.09it/s, loss=-0.003302, elapsed time=0.04, total time=6.62]
[I 2025-06-08 00:34:18,333] Trial 913 finished with value: -0.0033018487755714855 and parameters: {'learning_rate': 0.035867324253150826, 'sigma_multiplier': 0.49706854452066596, 'initialization_multiplier': 0.7658040329916842}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 913 final loss: -0.00330185
Trial 914:
  Learning Rate: 0.020956229972104775
  Sigma Multiplier: 1.155316697458085
  Initialization Multiplier: 0.6902192417262175


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 29.21it/s, loss=-0.001906, elapsed time=0.03, total time=5.28]
[I 2025-06-08 00:34:23,654] Trial 914 finished with value: -0.0019062248269850483 and parameters: {'learning_rate': 0.020956229972104775, 'sigma_multiplier': 1.155316697458085, 'initialization_multiplier': 0.6902192417262175}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 914 final loss: -0.00190622
Trial 915:
  Learning Rate: 0.00779873858779148
  Sigma Multiplier: 0.5694306234307787
  Initialization Multiplier: 0.6210859895903927


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.70it/s, loss=-0.003100, elapsed time=0.04, total time=6.47]
[I 2025-06-08 00:34:30,163] Trial 915 finished with value: -0.003100388522442644 and parameters: {'learning_rate': 0.00779873858779148, 'sigma_multiplier': 0.5694306234307787, 'initialization_multiplier': 0.6210859895903927}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 915 final loss: -0.00310039
Trial 916:
  Learning Rate: 0.027424115589192655
  Sigma Multiplier: 0.5312604978939062
  Initialization Multiplier: 0.5319085627430005


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.12it/s, loss=-0.003699, elapsed time=0.04, total time=6.62]
[I 2025-06-08 00:34:36,825] Trial 916 finished with value: -0.003699278467309344 and parameters: {'learning_rate': 0.027424115589192655, 'sigma_multiplier': 0.5312604978939062, 'initialization_multiplier': 0.5319085627430005}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 916 final loss: -0.00369928
Trial 917:
  Learning Rate: 0.040097185552374535
  Sigma Multiplier: 0.5922475179247327
  Initialization Multiplier: 0.8241321205458305


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.81it/s, loss=-0.003377, elapsed time=0.04, total time=6.44]
[I 2025-06-08 00:34:43,304] Trial 917 finished with value: -0.003376979017538157 and parameters: {'learning_rate': 0.040097185552374535, 'sigma_multiplier': 0.5922475179247327, 'initialization_multiplier': 0.8241321205458305}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 917 final loss: -0.00337698
Trial 918:
  Learning Rate: 0.04998636883642308
  Sigma Multiplier: 0.6518168868442804
  Initialization Multiplier: 0.7111608086745024


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.64it/s, loss=-0.003445, elapsed time=0.04, total time=6.23]
[I 2025-06-08 00:34:49,574] Trial 918 finished with value: -0.003445459141823361 and parameters: {'learning_rate': 0.04998636883642308, 'sigma_multiplier': 0.6518168868442804, 'initialization_multiplier': 0.7111608086745024}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 918 final loss: -0.00344546
Trial 919:
  Learning Rate: 0.03106879539915334
  Sigma Multiplier: 1.4047845316709358
  Initialization Multiplier: 0.5943412218019287


Training Progress: 100%|██████████| 150/150 [00:04<00:00, 30.17it/s, loss=-0.001508, elapsed time=0.03, total time=5.11]
[I 2025-06-08 00:34:54,721] Trial 919 finished with value: -0.0015077181516489136 and parameters: {'learning_rate': 0.03106879539915334, 'sigma_multiplier': 1.4047845316709358, 'initialization_multiplier': 0.5943412218019287}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 919 final loss: -0.00150772
Trial 920:
  Learning Rate: 0.01601716476768677
  Sigma Multiplier: 0.4883020180803007
  Initialization Multiplier: 0.6805187084710389


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.00it/s, loss=-0.003591, elapsed time=0.04, total time=6.66]
[I 2025-06-08 00:35:01,418] Trial 920 finished with value: -0.0035909446349871683 and parameters: {'learning_rate': 0.01601716476768677, 'sigma_multiplier': 0.4883020180803007, 'initialization_multiplier': 0.6805187084710389}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 920 final loss: -0.00359094
Trial 921:
  Learning Rate: 0.023578182196396645
  Sigma Multiplier: 0.5319584456855927
  Initialization Multiplier: 0.6459606718122395


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.06it/s, loss=-0.003788, elapsed time=0.04, total time=6.64]
[I 2025-06-08 00:35:08,096] Trial 921 finished with value: -0.0037881229702662464 and parameters: {'learning_rate': 0.023578182196396645, 'sigma_multiplier': 0.5319584456855927, 'initialization_multiplier': 0.6459606718122395}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 921 final loss: -0.00378812
Trial 922:
  Learning Rate: 0.028852528981550128
  Sigma Multiplier: 0.5762029857357626
  Initialization Multiplier: 0.7963770844446897


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.40it/s, loss=-0.003602, elapsed time=0.04, total time=6.54]
[I 2025-06-08 00:35:14,681] Trial 922 finished with value: -0.0036021899816882676 and parameters: {'learning_rate': 0.028852528981550128, 'sigma_multiplier': 0.5762029857357626, 'initialization_multiplier': 0.7963770844446897}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 922 final loss: -0.00360219
Trial 923:
  Learning Rate: 0.018926298429318315
  Sigma Multiplier: 0.7308412596767548
  Initialization Multiplier: 0.5645788667374231


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.20it/s, loss=-0.003219, elapsed time=0.04, total time=6.09]
[I 2025-06-08 00:35:20,809] Trial 923 finished with value: -0.0032189837416149577 and parameters: {'learning_rate': 0.018926298429318315, 'sigma_multiplier': 0.7308412596767548, 'initialization_multiplier': 0.5645788667374231}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 923 final loss: -0.00321898
Trial 924:
  Learning Rate: 0.037356137279390154
  Sigma Multiplier: 0.6193795218752004
  Initialization Multiplier: 0.7472698978589184


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.22it/s, loss=-0.003746, elapsed time=0.04, total time=6.34]
[I 2025-06-08 00:35:27,190] Trial 924 finished with value: -0.003745623421477487 and parameters: {'learning_rate': 0.037356137279390154, 'sigma_multiplier': 0.6193795218752004, 'initialization_multiplier': 0.7472698978589184}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 924 final loss: -0.00374562
Trial 925:
  Learning Rate: 0.024492211314471483
  Sigma Multiplier: 0.44766046752320937
  Initialization Multiplier: 0.6096672022868566


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.80it/s, loss=-0.003257, elapsed time=0.04, total time=6.71]
[I 2025-06-08 00:35:33,941] Trial 925 finished with value: -0.0032574226313111295 and parameters: {'learning_rate': 0.024492211314471483, 'sigma_multiplier': 0.44766046752320937, 'initialization_multiplier': 0.6096672022868566}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 925 final loss: -0.00325742
Trial 926:
  Learning Rate: 0.03230845572859256
  Sigma Multiplier: 0.5553667623100773
  Initialization Multiplier: 0.6647640432073347


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.43it/s, loss=-0.003718, elapsed time=0.04, total time=6.55]
[I 2025-06-08 00:35:40,535] Trial 926 finished with value: -0.0037177835941887747 and parameters: {'learning_rate': 0.03230845572859256, 'sigma_multiplier': 0.5553667623100773, 'initialization_multiplier': 0.6647640432073347}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 926 final loss: -0.00371778
Trial 927:
  Learning Rate: 0.021477414289823187
  Sigma Multiplier: 0.5261263826176064
  Initialization Multiplier: 0.8947982796059127


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.28it/s, loss=-0.003582, elapsed time=0.04, total time=6.58]
[I 2025-06-08 00:35:47,154] Trial 927 finished with value: -0.0035822348558021324 and parameters: {'learning_rate': 0.021477414289823187, 'sigma_multiplier': 0.5261263826176064, 'initialization_multiplier': 0.8947982796059127}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 927 final loss: -0.00358223
Trial 928:
  Learning Rate: 0.04281090347529998
  Sigma Multiplier: 0.6884821379892709
  Initialization Multiplier: 0.7126001644603573


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.10it/s, loss=-0.003571, elapsed time=0.04, total time=6.11]
[I 2025-06-08 00:35:53,299] Trial 928 finished with value: -0.0035714801177564506 and parameters: {'learning_rate': 0.04281090347529998, 'sigma_multiplier': 0.6884821379892709, 'initialization_multiplier': 0.7126001644603573}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 928 final loss: -0.00357148
Trial 929:
  Learning Rate: 0.026096156014488774
  Sigma Multiplier: 0.5866190284588794
  Initialization Multiplier: 0.5399350186066536


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.90it/s, loss=-0.003121, elapsed time=0.04, total time=6.69]
[I 2025-06-08 00:36:00,027] Trial 929 finished with value: -0.003120695236354677 and parameters: {'learning_rate': 0.026096156014488774, 'sigma_multiplier': 0.5866190284588794, 'initialization_multiplier': 0.5399350186066536}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 929 final loss: -0.00312070
Trial 930:
  Learning Rate: 0.05513776547102145
  Sigma Multiplier: 0.48185516223702707
  Initialization Multiplier: 0.7561281164478271


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.02it/s, loss=-0.003192, elapsed time=0.04, total time=6.66]
[I 2025-06-08 00:36:06,730] Trial 930 finished with value: -0.0031918088922094343 and parameters: {'learning_rate': 0.05513776547102145, 'sigma_multiplier': 0.48185516223702707, 'initialization_multiplier': 0.7561281164478271}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 930 final loss: -0.00319181
Trial 931:
  Learning Rate: 0.03057074074691487
  Sigma Multiplier: 0.6358137884753661
  Initialization Multiplier: 0.6391674288616366


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.56it/s, loss=-0.003742, elapsed time=0.04, total time=6.24]
[I 2025-06-08 00:36:13,007] Trial 931 finished with value: -0.003742385769548836 and parameters: {'learning_rate': 0.03057074074691487, 'sigma_multiplier': 0.6358137884753661, 'initialization_multiplier': 0.6391674288616366}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 931 final loss: -0.00374239
Trial 932:
  Learning Rate: 0.034769732322947854
  Sigma Multiplier: 0.5526200255212633
  Initialization Multiplier: 0.8662340341445218


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.52it/s, loss=-0.003429, elapsed time=0.04, total time=6.52]
[I 2025-06-08 00:36:19,568] Trial 932 finished with value: -0.0034288983450427935 and parameters: {'learning_rate': 0.034769732322947854, 'sigma_multiplier': 0.5526200255212633, 'initialization_multiplier': 0.8662340341445218}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 932 final loss: -0.00342890
Trial 933:
  Learning Rate: 0.01961692253697229
  Sigma Multiplier: 0.5159316944092719
  Initialization Multiplier: 0.5872584533841203


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.34it/s, loss=-0.003596, elapsed time=0.04, total time=6.56]
[I 2025-06-08 00:36:26,166] Trial 933 finished with value: -0.003595591278692734 and parameters: {'learning_rate': 0.01961692253697229, 'sigma_multiplier': 0.5159316944092719, 'initialization_multiplier': 0.5872584533841203}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 933 final loss: -0.00359559
Trial 934:
  Learning Rate: 0.009681722822716928
  Sigma Multiplier: 0.5974697331012132
  Initialization Multiplier: 0.6953808789467547


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.69it/s, loss=-0.002974, elapsed time=0.04, total time=6.47]
[I 2025-06-08 00:36:32,680] Trial 934 finished with value: -0.002974150003467138 and parameters: {'learning_rate': 0.009681722822716928, 'sigma_multiplier': 0.5974697331012132, 'initialization_multiplier': 0.6953808789467547}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 934 final loss: -0.00297415
Trial 935:
  Learning Rate: 0.0444846416868503
  Sigma Multiplier: 0.6357468469810883
  Initialization Multiplier: 0.8156162605170532


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.05it/s, loss=-0.003525, elapsed time=0.04, total time=6.64]
[I 2025-06-08 00:36:39,359] Trial 935 finished with value: -0.003525389082927173 and parameters: {'learning_rate': 0.0444846416868503, 'sigma_multiplier': 0.6357468469810883, 'initialization_multiplier': 0.8156162605170532}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 935 final loss: -0.00352539
Trial 936:
  Learning Rate: 0.024963036974536876
  Sigma Multiplier: 0.5702455367463009
  Initialization Multiplier: 0.6317872716956958


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.64it/s, loss=-0.003292, elapsed time=0.04, total time=6.49]
[I 2025-06-08 00:36:45,892] Trial 936 finished with value: -0.003292232562322412 and parameters: {'learning_rate': 0.024963036974536876, 'sigma_multiplier': 0.5702455367463009, 'initialization_multiplier': 0.6317872716956958}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 936 final loss: -0.00329223
Trial 937:
  Learning Rate: 0.02932629555971471
  Sigma Multiplier: 0.4547123713852027
  Initialization Multiplier: 0.6712278802531253


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.56it/s, loss=-0.003510, elapsed time=0.04, total time=6.78]
[I 2025-06-08 00:36:52,717] Trial 937 finished with value: -0.0035104081482111573 and parameters: {'learning_rate': 0.02932629555971471, 'sigma_multiplier': 0.4547123713852027, 'initialization_multiplier': 0.6712278802531253}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 937 final loss: -0.00351041
Trial 938:
  Learning Rate: 0.022118528448164053
  Sigma Multiplier: 0.510124179955464
  Initialization Multiplier: 0.5284328230808374


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.30it/s, loss=-0.003540, elapsed time=0.04, total time=6.58]
[I 2025-06-08 00:36:59,337] Trial 938 finished with value: -0.0035404930939870825 and parameters: {'learning_rate': 0.022118528448164053, 'sigma_multiplier': 0.510124179955464, 'initialization_multiplier': 0.5284328230808374}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 938 final loss: -0.00354049
Trial 939:
  Learning Rate: 0.037205592267973236
  Sigma Multiplier: 0.6061691604390671
  Initialization Multiplier: 0.7739436211566881


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.99it/s, loss=-0.003305, elapsed time=0.04, total time=6.39]
[I 2025-06-08 00:37:05,764] Trial 939 finished with value: -0.0033050394633200543 and parameters: {'learning_rate': 0.037205592267973236, 'sigma_multiplier': 0.6061691604390671, 'initialization_multiplier': 0.7739436211566881}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 939 final loss: -0.00330504
Trial 940:
  Learning Rate: 0.0036521753451758394
  Sigma Multiplier: 0.5454803144534749
  Initialization Multiplier: 0.7200890575940627


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.57it/s, loss=-0.002208, elapsed time=0.04, total time=6.5] 
[I 2025-06-08 00:37:12,300] Trial 940 finished with value: -0.0022076029610512775 and parameters: {'learning_rate': 0.0036521753451758394, 'sigma_multiplier': 0.5454803144534749, 'initialization_multiplier': 0.7200890575940627}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 940 final loss: -0.00220760
Trial 941:
  Learning Rate: 0.016343452241574106
  Sigma Multiplier: 0.37926992772573564
  Initialization Multiplier: 0.5668651518071987


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.33it/s, loss=-0.002443, elapsed time=0.04, total time=6.85]
[I 2025-06-08 00:37:19,188] Trial 941 finished with value: -0.002442726018618388 and parameters: {'learning_rate': 0.016343452241574106, 'sigma_multiplier': 0.37926992772573564, 'initialization_multiplier': 0.5668651518071987}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 941 final loss: -0.00244273
Trial 942:
  Learning Rate: 0.026737217999046027
  Sigma Multiplier: 0.6846279942795079
  Initialization Multiplier: 0.975167359165672


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.82it/s, loss=-0.003405, elapsed time=0.04, total time=6.19]
[I 2025-06-08 00:37:25,417] Trial 942 finished with value: -0.003404990364802198 and parameters: {'learning_rate': 0.026737217999046027, 'sigma_multiplier': 0.6846279942795079, 'initialization_multiplier': 0.975167359165672}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 942 final loss: -0.00340499
Trial 943:
  Learning Rate: 0.03378221123256269
  Sigma Multiplier: 0.48627226756528286
  Initialization Multiplier: 0.6184143981030641


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.07it/s, loss=-0.002895, elapsed time=0.04, total time=6.64]
[I 2025-06-08 00:37:32,095] Trial 943 finished with value: -0.0028954353233375525 and parameters: {'learning_rate': 0.03378221123256269, 'sigma_multiplier': 0.48627226756528286, 'initialization_multiplier': 0.6184143981030641}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 943 final loss: -0.00289544
Trial 944:
  Learning Rate: 0.041575392595089244
  Sigma Multiplier: 0.5824173326878386
  Initialization Multiplier: 0.6655799200831198


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.87it/s, loss=-0.003651, elapsed time=0.04, total time=6.42]
[I 2025-06-08 00:37:38,555] Trial 944 finished with value: -0.0036513132418255793 and parameters: {'learning_rate': 0.041575392595089244, 'sigma_multiplier': 0.5824173326878386, 'initialization_multiplier': 0.6655799200831198}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 944 final loss: -0.00365131
Trial 945:
  Learning Rate: 0.01298668443881346
  Sigma Multiplier: 0.65173767484148
  Initialization Multiplier: 0.7312657592139011


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.69it/s, loss=-0.003005, elapsed time=0.04, total time=6.21]
[I 2025-06-08 00:37:44,801] Trial 945 finished with value: -0.003004821765685474 and parameters: {'learning_rate': 0.01298668443881346, 'sigma_multiplier': 0.65173767484148, 'initialization_multiplier': 0.7312657592139011}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 945 final loss: -0.00300482
Trial 946:
  Learning Rate: 0.022605934454469755
  Sigma Multiplier: 0.5371558934439425
  Initialization Multiplier: 0.7995884914398591


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.55it/s, loss=-0.003177, elapsed time=0.04, total time=6.51]
[I 2025-06-08 00:37:51,355] Trial 946 finished with value: -0.0031769598258163468 and parameters: {'learning_rate': 0.022605934454469755, 'sigma_multiplier': 0.5371558934439425, 'initialization_multiplier': 0.7995884914398591}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 946 final loss: -0.00317696
Trial 947:
  Learning Rate: 0.0289631289325899
  Sigma Multiplier: 0.6157634251164315
  Initialization Multiplier: 0.6091122321357658


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.39it/s, loss=-0.003668, elapsed time=0.04, total time=6.29]
[I 2025-06-08 00:37:57,682] Trial 947 finished with value: -0.0036679083370639125 and parameters: {'learning_rate': 0.0289631289325899, 'sigma_multiplier': 0.6157634251164315, 'initialization_multiplier': 0.6091122321357658}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 947 final loss: -0.00366791
Trial 948:
  Learning Rate: 0.018879795785642656
  Sigma Multiplier: 0.5593025715951367
  Initialization Multiplier: 0.6875926157917803


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.65it/s, loss=-0.003867, elapsed time=0.04, total time=6.47]
[I 2025-06-08 00:38:04,192] Trial 948 finished with value: -0.0038671387728046983 and parameters: {'learning_rate': 0.018879795785642656, 'sigma_multiplier': 0.5593025715951367, 'initialization_multiplier': 0.6875926157917803}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 948 final loss: -0.00386714
Trial 949:
  Learning Rate: 0.025044054450152533
  Sigma Multiplier: 1.0779915604519237
  Initialization Multiplier: 0.5836116250868401


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 28.51it/s, loss=-0.002247, elapsed time=0.05, total time=5.4] 
[I 2025-06-08 00:38:09,630] Trial 949 finished with value: -0.002246670382778727 and parameters: {'learning_rate': 0.025044054450152533, 'sigma_multiplier': 1.0779915604519237, 'initialization_multiplier': 0.5836116250868401}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 949 final loss: -0.00224667
Trial 950:
  Learning Rate: 0.047506538534903726
  Sigma Multiplier: 0.5169445122502702
  Initialization Multiplier: 0.8546665860593237


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.30it/s, loss=-0.003328, elapsed time=0.04, total time=6.57]
[I 2025-06-08 00:38:16,248] Trial 950 finished with value: -0.0033279135667109256 and parameters: {'learning_rate': 0.047506538534903726, 'sigma_multiplier': 0.5169445122502702, 'initialization_multiplier': 0.8546665860593237}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 950 final loss: -0.00332791
Trial 951:
  Learning Rate: 0.03223193213604712
  Sigma Multiplier: 0.5892355181806728
  Initialization Multiplier: 0.6438409321983564


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.01it/s, loss=-0.003549, elapsed time=0.04, total time=6.39]
[I 2025-06-08 00:38:22,679] Trial 951 finished with value: -0.003548723702721925 and parameters: {'learning_rate': 0.03223193213604712, 'sigma_multiplier': 0.5892355181806728, 'initialization_multiplier': 0.6438409321983564}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 951 final loss: -0.00354872
Trial 952:
  Learning Rate: 0.03769351760615295
  Sigma Multiplier: 0.30983452453300186
  Initialization Multiplier: 0.7447469113341506


Training Progress: 100%|██████████| 150/150 [00:11<00:00, 13.16it/s, loss=-0.001183, elapsed time=0.05, total time=11.5]
[I 2025-06-08 00:38:34,257] Trial 952 finished with value: -0.0011826240008132368 and parameters: {'learning_rate': 0.03769351760615295, 'sigma_multiplier': 0.30983452453300186, 'initialization_multiplier': 0.7447469113341506}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 952 final loss: -0.00118262
Trial 953:
  Learning Rate: 0.02130347227711069
  Sigma Multiplier: 0.4675457342831623
  Initialization Multiplier: 0.5213851921575532


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 19.90it/s, loss=-0.002769, elapsed time=0.05, total time=7.72]
[I 2025-06-08 00:38:42,034] Trial 953 finished with value: -0.0027694378262679066 and parameters: {'learning_rate': 0.02130347227711069, 'sigma_multiplier': 0.4675457342831623, 'initialization_multiplier': 0.5213851921575532}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 953 final loss: -0.00276944
Trial 954:
  Learning Rate: 0.026874745372047388
  Sigma Multiplier: 0.6281176782909516
  Initialization Multiplier: 1.2869788984534782


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 20.94it/s, loss=0.056735, elapsed time=0.05, total time=7.34]
[I 2025-06-08 00:38:49,422] Trial 954 finished with value: 0.05673508913345658 and parameters: {'learning_rate': 0.026874745372047388, 'sigma_multiplier': 0.6281176782909516, 'initialization_multiplier': 1.2869788984534782}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 954 final loss: 0.05673509
Trial 955:
  Learning Rate: 0.06036118698985347
  Sigma Multiplier: 0.41997961188396804
  Initialization Multiplier: 0.6875803570957992


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 18.94it/s, loss=-0.003054, elapsed time=0.04, total time=8.09]
[I 2025-06-08 00:38:57,562] Trial 955 finished with value: -0.0030544275015229245 and parameters: {'learning_rate': 0.06036118698985347, 'sigma_multiplier': 0.41997961188396804, 'initialization_multiplier': 0.6875803570957992}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 955 final loss: -0.00305443
Trial 956:
  Learning Rate: 0.03162094768362261
  Sigma Multiplier: 0.5494226935656138
  Initialization Multiplier: 0.5814281882046622


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 20.93it/s, loss=-0.003528, elapsed time=0.04, total time=7.32]
[I 2025-06-08 00:39:04,933] Trial 956 finished with value: -0.0035277501925834323 and parameters: {'learning_rate': 0.03162094768362261, 'sigma_multiplier': 0.5494226935656138, 'initialization_multiplier': 0.5814281882046622}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 956 final loss: -0.00352775
Trial 957:
  Learning Rate: 0.01731021231531724
  Sigma Multiplier: 0.49800337344953693
  Initialization Multiplier: 0.7744448379596407


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 19.96it/s, loss=-0.003265, elapsed time=0.05, total time=7.67] 
[I 2025-06-08 00:39:12,644] Trial 957 finished with value: -0.0032652973820342392 and parameters: {'learning_rate': 0.01731021231531724, 'sigma_multiplier': 0.49800337344953693, 'initialization_multiplier': 0.7744448379596407}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 957 final loss: -0.00326530
Trial 958:
  Learning Rate: 0.023633134578128788
  Sigma Multiplier: 0.6692394952183478
  Initialization Multiplier: 0.6309092434188535


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 19.03it/s, loss=-0.003628, elapsed time=0.04, total time=8.03]
[I 2025-06-08 00:39:20,728] Trial 958 finished with value: -0.003628184064345538 and parameters: {'learning_rate': 0.023633134578128788, 'sigma_multiplier': 0.6692394952183478, 'initialization_multiplier': 0.6309092434188535}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 958 final loss: -0.00362818
Trial 959:
  Learning Rate: 0.00033034620234039056
  Sigma Multiplier: 0.5750252540337395
  Initialization Multiplier: 0.9364726009618788


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 20.99it/s, loss=0.079125, elapsed time=0.05, total time=7.28]
[I 2025-06-08 00:39:28,054] Trial 959 finished with value: 0.07912491402485258 and parameters: {'learning_rate': 0.00033034620234039056, 'sigma_multiplier': 0.5750252540337395, 'initialization_multiplier': 0.9364726009618788}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 959 final loss: 0.07912491
Trial 960:
  Learning Rate: 0.03968746938793326
  Sigma Multiplier: 0.6243736346364773
  Initialization Multiplier: 0.7148481437994815


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 19.38it/s, loss=-0.003659, elapsed time=0.06, total time=7.89]
[I 2025-06-08 00:39:35,993] Trial 960 finished with value: -0.003659022660405163 and parameters: {'learning_rate': 0.03968746938793326, 'sigma_multiplier': 0.6243736346364773, 'initialization_multiplier': 0.7148481437994815}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 960 final loss: -0.00365902
Trial 961:
  Learning Rate: 0.028218280965645343
  Sigma Multiplier: 0.5253634766401643
  Initialization Multiplier: 0.6557921177296515


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 18.77it/s, loss=-0.003682, elapsed time=0.04, total time=8.15]
[I 2025-06-08 00:39:44,193] Trial 961 finished with value: -0.003681733832202183 and parameters: {'learning_rate': 0.028218280965645343, 'sigma_multiplier': 0.5253634766401643, 'initialization_multiplier': 0.6557921177296515}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 961 final loss: -0.00368173
Trial 962:
  Learning Rate: 0.0518340167226046
  Sigma Multiplier: 0.5856229400735504
  Initialization Multiplier: 0.5609174638651764


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 21.00it/s, loss=-0.003528, elapsed time=0.05, total time=7.27]
[I 2025-06-08 00:39:51,511] Trial 962 finished with value: -0.0035277502814574503 and parameters: {'learning_rate': 0.0518340167226046, 'sigma_multiplier': 0.5856229400735504, 'initialization_multiplier': 0.5609174638651764}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 962 final loss: -0.00352775
Trial 963:
  Learning Rate: 0.01964301566499491
  Sigma Multiplier: 0.48850149921402736
  Initialization Multiplier: 0.8171633887011359


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 19.42it/s, loss=-0.003410, elapsed time=0.05, total time=7.9] 
[I 2025-06-08 00:39:59,459] Trial 963 finished with value: -0.003410382335823897 and parameters: {'learning_rate': 0.01964301566499491, 'sigma_multiplier': 0.48850149921402736, 'initialization_multiplier': 0.8171633887011359}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 963 final loss: -0.00341038
Trial 964:
  Learning Rate: 0.03495354913670013
  Sigma Multiplier: 1.4587267547593155
  Initialization Multiplier: 0.6122747035526761


Training Progress: 100%|██████████| 150/150 [00:05<00:00, 25.13it/s, loss=-0.001355, elapsed time=0.04, total time=6.12]
[I 2025-06-08 00:40:05,635] Trial 964 finished with value: -0.001354724898010285 and parameters: {'learning_rate': 0.03495354913670013, 'sigma_multiplier': 1.4587267547593155, 'initialization_multiplier': 0.6122747035526761}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 964 final loss: -0.00135472
Trial 965:
  Learning Rate: 0.01476456606802731
  Sigma Multiplier: 0.5520577630706954
  Initialization Multiplier: 0.7547731972432034


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 20.34it/s, loss=-0.003222, elapsed time=0.04, total time=7.58]
[I 2025-06-08 00:40:13,272] Trial 965 finished with value: -0.00322176179402741 and parameters: {'learning_rate': 0.01476456606802731, 'sigma_multiplier': 0.5520577630706954, 'initialization_multiplier': 0.7547731972432034}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 965 final loss: -0.00322176
Trial 966:
  Learning Rate: 0.023641241541846637
  Sigma Multiplier: 0.6043226925392621
  Initialization Multiplier: 0.6986081421248109


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 20.46it/s, loss=-0.003946, elapsed time=0.04, total time=7.49]
[I 2025-06-08 00:40:20,802] Trial 966 finished with value: -0.003945795035852925 and parameters: {'learning_rate': 0.023641241541846637, 'sigma_multiplier': 0.6043226925392621, 'initialization_multiplier': 0.6986081421248109}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 966 final loss: -0.00394580
Trial 967:
  Learning Rate: 0.029637019969095086
  Sigma Multiplier: 0.7002333248580707
  Initialization Multiplier: 0.7804013055409946


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 20.32it/s, loss=-0.003784, elapsed time=0.05, total time=7.54]
[I 2025-06-08 00:40:28,389] Trial 967 finished with value: -0.0037838971621405766 and parameters: {'learning_rate': 0.029637019969095086, 'sigma_multiplier': 0.7002333248580707, 'initialization_multiplier': 0.7804013055409946}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 967 final loss: -0.00378390
Trial 968:
  Learning Rate: 0.025521904284352945
  Sigma Multiplier: 0.6586204352445595
  Initialization Multiplier: 0.7161227208164335


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 19.54it/s, loss=-0.003489, elapsed time=0.04, total time=7.84]
[I 2025-06-08 00:40:36,278] Trial 968 finished with value: -0.00348936641739486 and parameters: {'learning_rate': 0.025521904284352945, 'sigma_multiplier': 0.6586204352445595, 'initialization_multiplier': 0.7161227208164335}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 968 final loss: -0.00348937
Trial 969:
  Learning Rate: 0.03648916503642097
  Sigma Multiplier: 0.7601189728236065
  Initialization Multiplier: 0.8436213207362713


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 20.38it/s, loss=-0.003387, elapsed time=0.05, total time=7.51]
[I 2025-06-08 00:40:43,835] Trial 969 finished with value: -0.003387364574281549 and parameters: {'learning_rate': 0.03648916503642097, 'sigma_multiplier': 0.7601189728236065, 'initialization_multiplier': 0.8436213207362713}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 969 final loss: -0.00338736
Trial 970:
  Learning Rate: 0.031732702783785253
  Sigma Multiplier: 0.24957484877358876
  Initialization Multiplier: 0.7428872100648917


Training Progress: 100%|██████████| 150/150 [00:09<00:00, 15.89it/s, loss=-0.000452, elapsed time=0.06, total time=9.69]
[I 2025-06-08 00:40:53,572] Trial 970 finished with value: -0.00045234309161572224 and parameters: {'learning_rate': 0.031732702783785253, 'sigma_multiplier': 0.24957484877358876, 'initialization_multiplier': 0.7428872100648917}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 970 final loss: -0.00045234
Trial 971:
  Learning Rate: 0.04516273576927441
  Sigma Multiplier: 0.6363646076330342
  Initialization Multiplier: 0.7032211502985909


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 19.90it/s, loss=-0.003518, elapsed time=0.04, total time=7.7] 
[I 2025-06-08 00:41:01,322] Trial 971 finished with value: -0.0035177153636327 and parameters: {'learning_rate': 0.04516273576927441, 'sigma_multiplier': 0.6363646076330342, 'initialization_multiplier': 0.7032211502985909}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 971 final loss: -0.00351772
Trial 972:
  Learning Rate: 0.025153135248338045
  Sigma Multiplier: 0.6751199263416006
  Initialization Multiplier: 0.890196950784343


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 17.03it/s, loss=-0.003205, elapsed time=0.05, total time=8.98]
[I 2025-06-08 00:41:10,348] Trial 972 finished with value: -0.003205262890520019 and parameters: {'learning_rate': 0.025153135248338045, 'sigma_multiplier': 0.6751199263416006, 'initialization_multiplier': 0.890196950784343}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 972 final loss: -0.00320526
Trial 973:
  Learning Rate: 0.028676334015144727
  Sigma Multiplier: 0.6104702162980546
  Initialization Multiplier: 0.8009205674975646


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 19.01it/s, loss=-0.003352, elapsed time=0.05, total time=8.13]
[I 2025-06-08 00:41:18,533] Trial 973 finished with value: -0.003352319092523785 and parameters: {'learning_rate': 0.028676334015144727, 'sigma_multiplier': 0.6104702162980546, 'initialization_multiplier': 0.8009205674975646}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 973 final loss: -0.00335232
Trial 974:
  Learning Rate: 0.0398968282103411
  Sigma Multiplier: 0.6027881931231726
  Initialization Multiplier: 0.6883670077640941


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 19.37it/s, loss=-0.003543, elapsed time=0.05, total time=7.98]
[I 2025-06-08 00:41:26,567] Trial 974 finished with value: -0.003542726255267907 and parameters: {'learning_rate': 0.0398968282103411, 'sigma_multiplier': 0.6027881931231726, 'initialization_multiplier': 0.6883670077640941}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 974 final loss: -0.00354273
Trial 975:
  Learning Rate: 0.021436047561989663
  Sigma Multiplier: 0.7331710574676852
  Initialization Multiplier: 0.741947031626062


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 21.39it/s, loss=-0.003155, elapsed time=0.04, total time=7.17]
[I 2025-06-08 00:41:33,780] Trial 975 finished with value: -0.0031554031855157536 and parameters: {'learning_rate': 0.021436047561989663, 'sigma_multiplier': 0.7331710574676852, 'initialization_multiplier': 0.741947031626062}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 975 final loss: -0.00315540
Trial 976:
  Learning Rate: 0.033433507816233596
  Sigma Multiplier: 0.6490146737526726
  Initialization Multiplier: 0.6769078257043041


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 16.81it/s, loss=-0.003768, elapsed time=0.04, total time=9.17]
[I 2025-06-08 00:41:43,000] Trial 976 finished with value: -0.0037675239262787512 and parameters: {'learning_rate': 0.033433507816233596, 'sigma_multiplier': 0.6490146737526726, 'initialization_multiplier': 0.6769078257043041}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 976 final loss: -0.00376752
Trial 977:
  Learning Rate: 0.023874318157495654
  Sigma Multiplier: 0.5911913538588032
  Initialization Multiplier: 0.6509416705369593


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 20.90it/s, loss=-0.003671, elapsed time=0.04, total time=7.32]
[I 2025-06-08 00:41:50,366] Trial 977 finished with value: -0.003670888358900091 and parameters: {'learning_rate': 0.023874318157495654, 'sigma_multiplier': 0.5911913538588032, 'initialization_multiplier': 0.6509416705369593}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 977 final loss: -0.00367089
Trial 978:
  Learning Rate: 0.00020719115609189593
  Sigma Multiplier: 0.609861638221689
  Initialization Multiplier: 0.5299630960283574


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 20.56it/s, loss=0.025509, elapsed time=0.04, total time=7.44]
[I 2025-06-08 00:41:57,855] Trial 978 finished with value: 0.025508731984762858 and parameters: {'learning_rate': 0.00020719115609189593, 'sigma_multiplier': 0.609861638221689, 'initialization_multiplier': 0.5299630960283574}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 978 final loss: 0.02550873
Trial 979:
  Learning Rate: 0.028985278694626603
  Sigma Multiplier: 0.5726467964003646
  Initialization Multiplier: 0.5954612079826156


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 19.89it/s, loss=-0.003881, elapsed time=0.04, total time=7.69]
[I 2025-06-08 00:42:05,594] Trial 979 finished with value: -0.003881290901358833 and parameters: {'learning_rate': 0.028985278694626603, 'sigma_multiplier': 0.5726467964003646, 'initialization_multiplier': 0.5954612079826156}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 979 final loss: -0.00388129
Trial 980:
  Learning Rate: 0.049270049385909225
  Sigma Multiplier: 0.6768936112611171
  Initialization Multiplier: 0.7810091099410333


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 20.31it/s, loss=-0.003665, elapsed time=0.04, total time=7.54]
[I 2025-06-08 00:42:13,184] Trial 980 finished with value: -0.0036654180170466078 and parameters: {'learning_rate': 0.049270049385909225, 'sigma_multiplier': 0.6768936112611171, 'initialization_multiplier': 0.7810091099410333}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 980 final loss: -0.00366542
Trial 981:
  Learning Rate: 0.03449890829341078
  Sigma Multiplier: 0.524797203024768
  Initialization Multiplier: 0.7200368823183175


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 17.43it/s, loss=-0.003265, elapsed time=0.04, total time=8.78]
[I 2025-06-08 00:42:22,011] Trial 981 finished with value: -0.003264919469286152 and parameters: {'learning_rate': 0.03449890829341078, 'sigma_multiplier': 0.524797203024768, 'initialization_multiplier': 0.7200368823183175}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 981 final loss: -0.00326492
Trial 982:
  Learning Rate: 0.025766606586463143
  Sigma Multiplier: 0.6225946205743034
  Initialization Multiplier: 0.6355330809409111


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 19.48it/s, loss=-0.003939, elapsed time=0.04, total time=7.84]
[I 2025-06-08 00:42:29,897] Trial 982 finished with value: -0.003938512081971082 and parameters: {'learning_rate': 0.025766606586463143, 'sigma_multiplier': 0.6225946205743034, 'initialization_multiplier': 0.6355330809409111}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 982 final loss: -0.00393851
Trial 983:
  Learning Rate: 0.09103148464879761
  Sigma Multiplier: 0.7205919633018187
  Initialization Multiplier: 0.49163298769977737


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 19.45it/s, loss=-0.003101, elapsed time=0.05, total time=8.64]
[I 2025-06-08 00:42:38,582] Trial 983 finished with value: -0.0031008249860361306 and parameters: {'learning_rate': 0.09103148464879761, 'sigma_multiplier': 0.7205919633018187, 'initialization_multiplier': 0.49163298769977737}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 983 final loss: -0.00310082
Trial 984:
  Learning Rate: 0.020283586223513256
  Sigma Multiplier: 0.6785728105650749
  Initialization Multiplier: 0.5611724691677371


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 20.03it/s, loss=-0.003318, elapsed time=0.04, total time=7.66]
[I 2025-06-08 00:42:46,292] Trial 984 finished with value: -0.0033175350227997763 and parameters: {'learning_rate': 0.020283586223513256, 'sigma_multiplier': 0.6785728105650749, 'initialization_multiplier': 0.5611724691677371}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 984 final loss: -0.00331754
Trial 985:
  Learning Rate: 0.023867057361217296
  Sigma Multiplier: 0.6461771718431752
  Initialization Multiplier: 0.6088942850427383


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 21.23it/s, loss=-0.003496, elapsed time=0.04, total time=7.24]
[I 2025-06-08 00:42:53,576] Trial 985 finished with value: -0.0034964284848487096 and parameters: {'learning_rate': 0.023867057361217296, 'sigma_multiplier': 0.6461771718431752, 'initialization_multiplier': 0.6088942850427383}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 985 final loss: -0.00349643
Trial 986:
  Learning Rate: 0.018725104894304734
  Sigma Multiplier: 0.7105455204614837
  Initialization Multiplier: 0.6322910210289636


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 20.31it/s, loss=-0.003269, elapsed time=0.05, total time=7.53]
[I 2025-06-08 00:43:01,151] Trial 986 finished with value: -0.0032690540104192084 and parameters: {'learning_rate': 0.018725104894304734, 'sigma_multiplier': 0.7105455204614837, 'initialization_multiplier': 0.6322910210289636}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 986 final loss: -0.00326905
Trial 987:
  Learning Rate: 0.022981907085038805
  Sigma Multiplier: 0.6310162717200225
  Initialization Multiplier: 0.8120063152510439


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 20.37it/s, loss=-0.003208, elapsed time=0.04, total time=7.65]
[I 2025-06-08 00:43:08,855] Trial 987 finished with value: -0.0032078515101202627 and parameters: {'learning_rate': 0.022981907085038805, 'sigma_multiplier': 0.6310162717200225, 'initialization_multiplier': 0.8120063152510439}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 987 final loss: -0.00320785
Trial 988:
  Learning Rate: 0.02728468453352537
  Sigma Multiplier: 0.19251555323887182
  Initialization Multiplier: 1.0304823040058066


Training Progress: 100%|██████████| 150/150 [00:08<00:00, 17.97it/s, loss=0.000410, elapsed time=0.06, total time=8.5]  
[I 2025-06-08 00:43:17,397] Trial 988 finished with value: 0.0004097881235074853 and parameters: {'learning_rate': 0.02728468453352537, 'sigma_multiplier': 0.19251555323887182, 'initialization_multiplier': 1.0304823040058066}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 988 final loss: 0.00040979
Trial 989:
  Learning Rate: 0.06651050161135975
  Sigma Multiplier: 1.8091533927325751
  Initialization Multiplier: 0.5517541832505007


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 24.89it/s, loss=-0.000806, elapsed time=0.03, total time=6.33]
[I 2025-06-08 00:43:23,785] Trial 989 finished with value: -0.0008057167694883989 and parameters: {'learning_rate': 0.06651050161135975, 'sigma_multiplier': 1.8091533927325751, 'initialization_multiplier': 0.5517541832505007}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 989 final loss: -0.00080572
Trial 990:
  Learning Rate: 0.042401862312626076
  Sigma Multiplier: 0.6556269927099126
  Initialization Multiplier: 0.6753856055032571


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.76it/s, loss=-0.003610, elapsed time=0.04, total time=6.74]
[I 2025-06-08 00:43:30,571] Trial 990 finished with value: -0.0036103432414312767 and parameters: {'learning_rate': 0.042401862312626076, 'sigma_multiplier': 0.6556269927099126, 'initialization_multiplier': 0.6753856055032571}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 990 final loss: -0.00361034
Trial 991:
  Learning Rate: 0.016365179105293153
  Sigma Multiplier: 0.6222228096200653
  Initialization Multiplier: 0.8534815760017503


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.42it/s, loss=-0.003194, elapsed time=0.04, total time=6.83]
[I 2025-06-08 00:43:37,444] Trial 991 finished with value: -0.003194018649546709 and parameters: {'learning_rate': 0.016365179105293153, 'sigma_multiplier': 0.6222228096200653, 'initialization_multiplier': 0.8534815760017503}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 991 final loss: -0.00319402
Trial 992:
  Learning Rate: 0.021925735532081596
  Sigma Multiplier: 0.6052679600343225
  Initialization Multiplier: 0.7608712914043448


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.00it/s, loss=-0.003801, elapsed time=0.04, total time=6.67]
[I 2025-06-08 00:43:44,160] Trial 992 finished with value: -0.003801408115848795 and parameters: {'learning_rate': 0.021925735532081596, 'sigma_multiplier': 0.6052679600343225, 'initialization_multiplier': 0.7608712914043448}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 992 final loss: -0.00380141
Trial 993:
  Learning Rate: 0.0252668457571483
  Sigma Multiplier: 0.6696004487018684
  Initialization Multiplier: 0.6996499848510901


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.52it/s, loss=-0.003448, elapsed time=0.04, total time=6.52]
[I 2025-06-08 00:43:50,725] Trial 993 finished with value: -0.003448370843666295 and parameters: {'learning_rate': 0.0252668457571483, 'sigma_multiplier': 0.6696004487018684, 'initialization_multiplier': 0.6996499848510901}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 993 final loss: -0.00344837
Trial 994:
  Learning Rate: 0.03148270735277186
  Sigma Multiplier: 0.5892412053106434
  Initialization Multiplier: 0.5077443225800145


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.11it/s, loss=-0.003963, elapsed time=0.05, total time=6.92]
[I 2025-06-08 00:43:57,684] Trial 994 finished with value: -0.0039628155967464 and parameters: {'learning_rate': 0.03148270735277186, 'sigma_multiplier': 0.5892412053106434, 'initialization_multiplier': 0.5077443225800145}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 994 final loss: -0.00396282
Trial 995:
  Learning Rate: 0.03620040047462166
  Sigma Multiplier: 0.642215271900317
  Initialization Multiplier: 0.4881797194757416


Training Progress: 100%|██████████| 150/150 [00:07<00:00, 19.60it/s, loss=-0.003165, elapsed time=0.04, total time=8.04]
[I 2025-06-08 00:44:05,777] Trial 995 finished with value: -0.003164976644580301 and parameters: {'learning_rate': 0.03620040047462166, 'sigma_multiplier': 0.642215271900317, 'initialization_multiplier': 0.4881797194757416}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 995 final loss: -0.00316498
Trial 996:
  Learning Rate: 0.040904713398591745
  Sigma Multiplier: 0.600354886034346
  Initialization Multiplier: 0.5445117208665786


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 21.91it/s, loss=-0.003768, elapsed time=0.04, total time=7]   
[I 2025-06-08 00:44:12,820] Trial 996 finished with value: -0.003768387245678351 and parameters: {'learning_rate': 0.040904713398591745, 'sigma_multiplier': 0.600354886034346, 'initialization_multiplier': 0.5445117208665786}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 996 final loss: -0.00376839
Trial 997:
  Learning Rate: 0.03263495724161644
  Sigma Multiplier: 0.7995465420343926
  Initialization Multiplier: 0.45954067005768173


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.90it/s, loss=-0.003093, elapsed time=0.04, total time=6.68]
[I 2025-06-08 00:44:19,545] Trial 997 finished with value: -0.003093178233873647 and parameters: {'learning_rate': 0.03263495724161644, 'sigma_multiplier': 0.7995465420343926, 'initialization_multiplier': 0.45954067005768173}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 997 final loss: -0.00309318
Trial 998:
  Learning Rate: 0.04991010798772537
  Sigma Multiplier: 0.671307153425639
  Initialization Multiplier: 0.498094608518577


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 22.96it/s, loss=-0.002916, elapsed time=0.04, total time=6.68]
[I 2025-06-08 00:44:26,273] Trial 998 finished with value: -0.0029157130203282416 and parameters: {'learning_rate': 0.04991010798772537, 'sigma_multiplier': 0.671307153425639, 'initialization_multiplier': 0.498094608518577}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 998 final loss: -0.00291571
Trial 999:
  Learning Rate: 0.03239457774886417
  Sigma Multiplier: 0.6246712946428904
  Initialization Multiplier: 0.5023670601103138


Training Progress: 100%|██████████| 150/150 [00:06<00:00, 23.19it/s, loss=-0.003330, elapsed time=0.04, total time=6.62]
[I 2025-06-08 00:44:32,936] Trial 999 finished with value: -0.0033296215755960618 and parameters: {'learning_rate': 0.03239457774886417, 'sigma_multiplier': 0.6246712946428904, 'initialization_multiplier': 0.5023670601103138}. Best is trial 166 with value: -0.00410615678882426.


Training has not converged after 150 steps
Trial 999 final loss: -0.00332962


In [8]:
best_hyperparams = study.best_params
best_loss_value = study.best_value

print("\nOptimization Finished!")
print(f"Best hyperparameters found: {best_hyperparams}")
print(f"Best loss value: {best_loss_value}")


Optimization Finished!
Best hyperparameters found: {'learning_rate': 0.03297501232362791, 'sigma_multiplier': 0.5931140851110797, 'initialization_multiplier': 0.6042046367376791}
Best loss value: -0.00410615678882426


In [9]:
LR = best_hyperparams['learning_rate']
SIGMA_M = best_hyperparams['sigma_multiplier']
NUM_LAYERS = best_hyperparams['num_layers']
INIT_M = best_hyperparams['initialization_multiplier']

KeyError: 'num_layers'

In [10]:
def train_on_dataset(dataset=train_ds):
    grid_conn= aachen_connectivity()
    num_qubits = NODES * (NODES - 1) // 2
    gates = efficient_connectivity_gates(grid_conn, num_qubits, NUM_LAYERS)
    
    circuit = iqp.IqpSimulator(num_qubits, gates, device="lightning.qubit")
    
    initial_params = initialize_from_data(gates, dataset) * INIT_M
    loss = iqp.gen_qml.mmd_loss_iqp
    learning_rate = LR
    sigma = median_heuristic(dataset) * SIGMA_M
    
    loss_kwarg = {
        "params": initial_params,
        "iqp_circuit": circuit,
        "ground_truth": dataset,
        "sigma": [sigma],
        "n_ops": 2000,
        "n_samples": 2000,
        "key": jax.random.PRNGKey(42),
    }
    
    trainer = iqp.Trainer("Adam", loss, stepsize=learning_rate)
    trainer.train(n_iters= 2000,loss_kwargs=loss_kwarg, turbo=1)
    
    return trainer.final_params

In [None]:
params = train_on_dataset(train_ds)

Training Progress: 100%|██████████| 2000/2000 [01:05<00:00, 30.60it/s, loss=-0.001050, elapsed time=0.03, total time=65.5]


Training has not converged after 2000 steps


In [12]:
import numpy as np
np.save(f'./results/params/params_{NODES}N_{TYPE}_{CONN}_LR{LR}_SIGMA{SIGMA_M}_INIT{INIT_M}_MAXWEIGHT.npy', params)