In [3]:
import random,os

import optuna
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow import keras as keras
import torch
from NBEATS import NeuralBeats

from sklearn.metrics import mean_squared_error
from keras import layers
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
from keras.models import Sequential, Model
from keras.layers import LSTM, Activation, Input, Dense, Dropout, Flatten, Conv1D, MaxPooling1D, Reshape, Concatenate, concatenate

In [4]:
def set_seed():
    seed = 3407
    random.seed(seed)
    np.random.seed(seed)
    os.environ['TF_DETERMINISTIC_OPS'] = str(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    tf.random.set_seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)


In [5]:
def mlp(x, hidden_units, dropout_rate):
    for units in hidden_units:
        x = layers.Dense(units, activation=tf.nn.gelu)(x)
        x = layers.Dropout(dropout_rate)(x)
    return x

In [6]:
def ADNNet_model(x, hidden_1, hidden_2, dropout_rate=0.1, attention_dim=11, num_heads=4):
    x_input = Input(shape=(x.shape[1], x.shape[2]))
    AQI, P = x_input[:, :, 0], x_input[:, :, 1:]
    feature_time_lag = mlp(AQI, [hidden_1, hidden_2], dropout_rate)
    feature_pol_con = mlp(P, [hidden_1, hidden_2], dropout_rate)
    feature_seasonal = mlp(x_input, [hidden_1, hidden_2], dropout_rate)
    x_input1 = layers.Dense(hidden_2)(x_input)
    attention_output = layers.MultiHeadAttention(
        num_heads=num_heads, key_dim=attention_dim, dropout=0.0799
    )(x_input1, x_input1)
    x2 = layers.Add()([attention_output, feature_time_lag, feature_pol_con, feature_seasonal])
    x3 = layers.LayerNormalization(epsilon=1e-6)(x2)
    x3 = mlp(x3, hidden_units=[hidden_2], dropout_rate=dropout_rate)
    encoded_patches = layers.Add()([x3, x2])
    representation = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
    representation = layers.Flatten()(representation)
    representation = layers.Dropout(dropout_rate)(representation)
    logits = layers.Dense(1)(representation)
    logits = layers.Dropout(dropout_rate)(logits)
    return Model(inputs=x_input, outputs=logits)

In [7]:

class Trainer():
    step_size = 7
    in_dim = 7

    def __init__(self, args=None):
        self.args = args
        self.load_data()

    def load_data(self):
        data = pd.read_csv('BJ.csv').iloc[:, [1,3,4,5,6,7,8]]
        self.labels = data['AQI'].values.reshape(-1,1)

        data = data.values
        self.ss = StandardScaler()
        self.data = self.ss.fit_transform(data)
        ts_x, ts_y = self.create_dataset(self.data, self.step_size)
        n = round(len(ts_x) * 0.8)
        self.ts_x_train, self.ts_x_valid = ts_x[:n], ts_x[n:]
        self.ts_y_train, self.ts_y_valid = ts_y[:n], ts_y[n:]

        # n = round(len(self.labels) * 0.8)
        # self.train_data = self.labels[:n]
        # self.valid_data = self.labels[n:]


    @staticmethod
    def create_dataset(data, step_size):
        res_x, res_y = [], []
        for i in range(len(data) - step_size):
            res_x.append(data[i:i + step_size])
            res_y.append(data[i + step_size, 0])
        return np.array(res_x), np.array(res_y)

    def objective(self, trial):
        lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
        hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
        hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
        dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
        attention_dim = trial.suggest_int('attention_dim',32,256,32)
        num_heads = trial.suggest_int('num_heads',2,8,2)
        batch_size = trial.suggest_int('batch_size', 64, 256,32)

        model = transformer_model(self.ts_x_train,hidden_1, hidden_2, dropout_rate, attention_dim, num_heads)
        model.compile(loss='mean_squared_error', optimizer=keras.optimizers.Adam(learning_rate=lr))
        model.fit(
            self.ts_x_train,self.ts_y_train,
            validation_data=(self.ts_x_valid, self.ts_y_valid),
            batch_size=batch_size,
            epochs=20,
            verbose=0
        )

        predict = model.predict(self.ts_x_valid)
        mse = mean_squared_error(self.ts_y_valid, predict)
        return mse

    def optimizer_optuna(self):
        algo = optuna.samplers.TPESampler()
        study = optuna.create_study(
            sampler=algo, direction='minimize'
        )
        study.optimize(
            self.objective,
            n_trials=200,
            show_progress_bar=True
        )
        self.history = study


In [8]:
T = Trainer()
T.optimizer_optuna()
T.history.trials_dataframe().to_csv('ADNNet.csv',index=False,index_label=False)

[I 2024-01-19 18:58:35,367] A new study created in memory with name: no-name-531f39dd-5ea9-4de2-8379-2bae85c3ca7e


  0%|          | 0/200 [00:00<?, ?it/s]

  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 18:58:44,154] Trial 0 finished with value: 0.4781522676947728 and parameters: {'lr': 0.0027376322597026594, 'hidden_1': 64, 'hidden_2': 128, 'dropout': 0.5677112971618625, 'attention_dim': 64, 'num_heads': 4, 'batch_size': 256}. Best is trial 0 with value: 0.4781522676947728.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 18:58:50,468] Trial 1 finished with value: 0.5763723548041151 and parameters: {'lr': 1.883129396519963e-05, 'hidden_1': 256, 'hidden_2': 192, 'dropout': 0.22767952534735222, 'attention_dim': 96, 'num_heads': 8, 'batch_size': 192}. Best is trial 0 with value: 0.4781522676947728.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 18:59:02,650] Trial 2 finished with value: 0.5125268001660107 and parameters: {'lr': 0.003441810802988298, 'hidden_1': 160, 'hidden_2': 64, 'dropout': 0.6112062113145315, 'attention_dim': 224, 'num_heads': 8, 'batch_size': 64}. Best is trial 0 with value: 0.4781522676947728.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 18:59:15,212] Trial 3 finished with value: 0.6091716737813004 and parameters: {'lr': 1.4442141359763972e-05, 'hidden_1': 96, 'hidden_2': 128, 'dropout': 0.33776944900497474, 'attention_dim': 192, 'num_heads': 2, 'batch_size': 64}. Best is trial 0 with value: 0.4781522676947728.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 18:59:21,271] Trial 4 finished with value: 0.4338392142146306 and parameters: {'lr': 0.0014996504932736521, 'hidden_1': 64, 'hidden_2': 160, 'dropout': 0.38492523432986697, 'attention_dim': 128, 'num_heads': 4, 'batch_size': 224}. Best is trial 4 with value: 0.4338392142146306.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 18:59:26,724] Trial 5 finished with value: 0.4736512704528439 and parameters: {'lr': 0.0006689504261188999, 'hidden_1': 192, 'hidden_2': 224, 'dropout': 0.07078502842816424, 'attention_dim': 64, 'num_heads': 8, 'batch_size': 256}. Best is trial 4 with value: 0.4338392142146306.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 18:59:35,952] Trial 6 finished with value: 0.4711283245127485 and parameters: {'lr': 0.00011912990247780449, 'hidden_1': 192, 'hidden_2': 128, 'dropout': 0.050520222752167415, 'attention_dim': 192, 'num_heads': 2, 'batch_size': 96}. Best is trial 4 with value: 0.4338392142146306.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 18:59:45,159] Trial 7 finished with value: 0.6660264901458214 and parameters: {'lr': 0.009270515224048299, 'hidden_1': 128, 'hidden_2': 64, 'dropout': 0.7423586593995415, 'attention_dim': 160, 'num_heads': 4, 'batch_size': 96}. Best is trial 4 with value: 0.4338392142146306.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 18:59:51,610] Trial 8 finished with value: 0.5038054679903278 and parameters: {'lr': 7.055854348931048e-05, 'hidden_1': 160, 'hidden_2': 128, 'dropout': 0.2288049229536344, 'attention_dim': 128, 'num_heads': 2, 'batch_size': 192}. Best is trial 4 with value: 0.4338392142146306.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:00:00,870] Trial 9 finished with value: 0.47936122066546444 and parameters: {'lr': 4.445474274402713e-05, 'hidden_1': 96, 'hidden_2': 160, 'dropout': 0.06473821767643831, 'attention_dim': 64, 'num_heads': 4, 'batch_size': 96}. Best is trial 4 with value: 0.4338392142146306.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:00:07,481] Trial 10 finished with value: 0.4611357528871135 and parameters: {'lr': 0.0006200407088545092, 'hidden_1': 64, 'hidden_2': 256, 'dropout': 0.48263252515515037, 'attention_dim': 256, 'num_heads': 6, 'batch_size': 192}. Best is trial 4 with value: 0.4338392142146306.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:00:14,102] Trial 11 finished with value: 0.4692271294072916 and parameters: {'lr': 0.0004679091072256136, 'hidden_1': 64, 'hidden_2': 256, 'dropout': 0.4661936525914378, 'attention_dim': 256, 'num_heads': 6, 'batch_size': 192}. Best is trial 4 with value: 0.4338392142146306.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:00:20,332] Trial 12 finished with value: 0.4339425270668643 and parameters: {'lr': 0.0011233343535786752, 'hidden_1': 64, 'hidden_2': 256, 'dropout': 0.4678439356824924, 'attention_dim': 128, 'num_heads': 6, 'batch_size': 224}. Best is trial 4 with value: 0.4338392142146306.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:00:26,396] Trial 13 finished with value: 0.407800051321861 and parameters: {'lr': 0.0017699828404467855, 'hidden_1': 96, 'hidden_2': 192, 'dropout': 0.32475719834691485, 'attention_dim': 128, 'num_heads': 6, 'batch_size': 224}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:00:33,276] Trial 14 finished with value: 0.4348121937037442 and parameters: {'lr': 0.0021159952923208093, 'hidden_1': 128, 'hidden_2': 192, 'dropout': 0.329464468169191, 'attention_dim': 160, 'num_heads': 6, 'batch_size': 160}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:00:39,338] Trial 15 finished with value: 0.44525016292020586 and parameters: {'lr': 0.00018872463243668443, 'hidden_1': 96, 'hidden_2': 192, 'dropout': 0.23454508206977487, 'attention_dim': 32, 'num_heads': 4, 'batch_size': 224}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:00:46,166] Trial 16 finished with value: 0.5299959173255118 and parameters: {'lr': 0.009507034515029754, 'hidden_1': 128, 'hidden_2': 160, 'dropout': 0.3492150356979333, 'attention_dim': 96, 'num_heads': 6, 'batch_size': 160}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:00:52,204] Trial 17 finished with value: 0.49295189589910604 and parameters: {'lr': 0.0012773013782050738, 'hidden_1': 256, 'hidden_2': 224, 'dropout': 0.6282393949284095, 'attention_dim': 96, 'num_heads': 4, 'batch_size': 224}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:00:58,210] Trial 18 finished with value: 0.43575052998685715 and parameters: {'lr': 0.0002767512626399733, 'hidden_1': 96, 'hidden_2': 96, 'dropout': 0.15527186742233567, 'attention_dim': 192, 'num_heads': 6, 'batch_size': 256}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:01:05,860] Trial 19 finished with value: 0.549782035911461 and parameters: {'lr': 0.005081009488763179, 'hidden_1': 192, 'hidden_2': 224, 'dropout': 0.3960833513671326, 'attention_dim': 128, 'num_heads': 4, 'batch_size': 128}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:01:11,781] Trial 20 finished with value: 0.45720910109601925 and parameters: {'lr': 0.0016950104444378715, 'hidden_1': 128, 'hidden_2': 160, 'dropout': 0.2882254265669658, 'attention_dim': 160, 'num_heads': 2, 'batch_size': 224}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:01:17,942] Trial 21 finished with value: 0.43426935565987923 and parameters: {'lr': 0.0010344679417595485, 'hidden_1': 64, 'hidden_2': 256, 'dropout': 0.4682732366431402, 'attention_dim': 128, 'num_heads': 6, 'batch_size': 224}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:01:23,885] Trial 22 finished with value: 0.47317186498143804 and parameters: {'lr': 0.0008285945772450346, 'hidden_1': 64, 'hidden_2': 192, 'dropout': 0.5400802214955621, 'attention_dim': 128, 'num_heads': 6, 'batch_size': 224}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:01:29,360] Trial 23 finished with value: 0.44435247671489386 and parameters: {'lr': 0.004307473356671644, 'hidden_1': 96, 'hidden_2': 224, 'dropout': 0.41853799793271523, 'attention_dim': 96, 'num_heads': 8, 'batch_size': 256}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:01:36,266] Trial 24 finished with value: 0.5536509431533337 and parameters: {'lr': 0.00044105095506079403, 'hidden_1': 64, 'hidden_2': 160, 'dropout': 0.7123266530092105, 'attention_dim': 160, 'num_heads': 6, 'batch_size': 192}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:01:42,161] Trial 25 finished with value: 0.448290229330367 and parameters: {'lr': 0.001310013933299073, 'hidden_1': 96, 'hidden_2': 96, 'dropout': 0.4075201167518459, 'attention_dim': 128, 'num_heads': 4, 'batch_size': 224}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:01:49,011] Trial 26 finished with value: 0.4383512771384233 and parameters: {'lr': 0.00029460607014876477, 'hidden_1': 64, 'hidden_2': 192, 'dropout': 0.14510277704368074, 'attention_dim': 96, 'num_heads': 6, 'batch_size': 160}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:01:54,865] Trial 27 finished with value: 0.5104621604691448 and parameters: {'lr': 0.0020620021260288755, 'hidden_1': 96, 'hidden_2': 256, 'dropout': 0.5338895431484689, 'attention_dim': 32, 'num_heads': 8, 'batch_size': 256}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:02:01,841] Trial 28 finished with value: 0.4904299181301814 and parameters: {'lr': 0.005276172359811917, 'hidden_1': 128, 'hidden_2': 224, 'dropout': 0.285239913862561, 'attention_dim': 192, 'num_heads': 4, 'batch_size': 160}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:02:07,388] Trial 29 finished with value: 0.5721018696302046 and parameters: {'lr': 0.002764400430111438, 'hidden_1': 64, 'hidden_2': 96, 'dropout': 0.6199431964197255, 'attention_dim': 64, 'num_heads': 6, 'batch_size': 256}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:02:15,176] Trial 30 finished with value: 0.4270498549701438 and parameters: {'lr': 0.0008969145993348152, 'hidden_1': 64, 'hidden_2': 192, 'dropout': 0.3828731273257094, 'attention_dim': 160, 'num_heads': 4, 'batch_size': 128}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:02:22,930] Trial 31 finished with value: 0.43222209276570295 and parameters: {'lr': 0.0010173683713527147, 'hidden_1': 64, 'hidden_2': 192, 'dropout': 0.378611646161787, 'attention_dim': 160, 'num_heads': 4, 'batch_size': 128}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:02:30,700] Trial 32 finished with value: 0.4118954353252616 and parameters: {'lr': 0.0018170653524140087, 'hidden_1': 64, 'hidden_2': 160, 'dropout': 0.3845011299501751, 'attention_dim': 160, 'num_heads': 4, 'batch_size': 128}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:02:38,245] Trial 33 finished with value: 0.42051675916908277 and parameters: {'lr': 0.002520857071020457, 'hidden_1': 96, 'hidden_2': 192, 'dropout': 0.2985328853400012, 'attention_dim': 224, 'num_heads': 4, 'batch_size': 128}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:02:46,087] Trial 34 finished with value: 0.46161004763093394 and parameters: {'lr': 0.002806430626158667, 'hidden_1': 96, 'hidden_2': 160, 'dropout': 0.27162811367519524, 'attention_dim': 224, 'num_heads': 2, 'batch_size': 128}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:02:53,649] Trial 35 finished with value: 0.42243319923040956 and parameters: {'lr': 0.003611800881492032, 'hidden_1': 96, 'hidden_2': 192, 'dropout': 0.16951178601176337, 'attention_dim': 224, 'num_heads': 4, 'batch_size': 128}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:03:02,849] Trial 36 finished with value: 0.4704874996977822 and parameters: {'lr': 0.0066697404779154194, 'hidden_1': 160, 'hidden_2': 192, 'dropout': 0.18811400133265208, 'attention_dim': 224, 'num_heads': 4, 'batch_size': 96}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:03:10,726] Trial 37 finished with value: 0.4686766914999318 and parameters: {'lr': 0.0035516358012027145, 'hidden_1': 128, 'hidden_2': 128, 'dropout': 0.11775998854568112, 'attention_dim': 224, 'num_heads': 2, 'batch_size': 128}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:03:21,816] Trial 38 finished with value: 0.44728498784077475 and parameters: {'lr': 0.0020775012681909094, 'hidden_1': 96, 'hidden_2': 192, 'dropout': 0.2014814050784543, 'attention_dim': 192, 'num_heads': 4, 'batch_size': 64}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:03:26,606] Trial 39 finished with value: 0.49130200242869293 and parameters: {'lr': 0.006454238496455757, 'hidden_1': 224, 'hidden_2': 128, 'dropout': 0.3338803970338385, 'attention_dim': 256, 'num_heads': 2, 'batch_size': 160}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:03:33,652] Trial 40 finished with value: 0.5097745182482525 and parameters: {'lr': 0.003839867379421873, 'hidden_1': 160, 'hidden_2': 160, 'dropout': 0.02037870311722667, 'attention_dim': 224, 'num_heads': 4, 'batch_size': 96}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:03:39,183] Trial 41 finished with value: 0.4436788497853682 and parameters: {'lr': 0.0017414124962371242, 'hidden_1': 96, 'hidden_2': 192, 'dropout': 0.2920272608010053, 'attention_dim': 192, 'num_heads': 4, 'batch_size': 128}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:03:44,717] Trial 42 finished with value: 0.4143750894483818 and parameters: {'lr': 0.0007792467875062437, 'hidden_1': 96, 'hidden_2': 224, 'dropout': 0.25305042732211797, 'attention_dim': 192, 'num_heads': 4, 'batch_size': 128}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:03:51,803] Trial 43 finished with value: 0.436890091079425 and parameters: {'lr': 0.0028521129768975953, 'hidden_1': 128, 'hidden_2': 224, 'dropout': 0.25510609266603484, 'attention_dim': 192, 'num_heads': 4, 'batch_size': 96}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:03:57,292] Trial 44 finished with value: 0.425371589708323 and parameters: {'lr': 0.0006603302537264287, 'hidden_1': 96, 'hidden_2': 224, 'dropout': 0.10034549988302588, 'attention_dim': 224, 'num_heads': 4, 'batch_size': 128}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:04:02,884] Trial 45 finished with value: 0.48622482876844025 and parameters: {'lr': 1.984129179697753e-05, 'hidden_1': 128, 'hidden_2': 160, 'dropout': 0.2069248725299267, 'attention_dim': 256, 'num_heads': 4, 'batch_size': 128}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:04:07,807] Trial 46 finished with value: 0.42180036312202246 and parameters: {'lr': 0.0014039053049020834, 'hidden_1': 96, 'hidden_2': 224, 'dropout': 0.31302087363878184, 'attention_dim': 192, 'num_heads': 2, 'batch_size': 160}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:04:12,570] Trial 47 finished with value: 0.4225313025980391 and parameters: {'lr': 0.000541436436987748, 'hidden_1': 96, 'hidden_2': 224, 'dropout': 0.34468398790835386, 'attention_dim': 192, 'num_heads': 2, 'batch_size': 160}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:04:17,070] Trial 48 finished with value: 0.4379735785995632 and parameters: {'lr': 0.00042087798734144954, 'hidden_1': 224, 'hidden_2': 224, 'dropout': 0.4406377134742063, 'attention_dim': 160, 'num_heads': 2, 'batch_size': 192}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:04:21,818] Trial 49 finished with value: 0.4325689365635672 and parameters: {'lr': 0.0014958548563385244, 'hidden_1': 128, 'hidden_2': 224, 'dropout': 0.31238940879820104, 'attention_dim': 192, 'num_heads': 2, 'batch_size': 160}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:04:31,387] Trial 50 finished with value: 0.43323718015641577 and parameters: {'lr': 0.000761505343416709, 'hidden_1': 64, 'hidden_2': 256, 'dropout': 0.25039113295946214, 'attention_dim': 160, 'num_heads': 8, 'batch_size': 64}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:04:38,307] Trial 51 finished with value: 0.4161668839425032 and parameters: {'lr': 0.00241829611079325, 'hidden_1': 96, 'hidden_2': 192, 'dropout': 0.17082411625601182, 'attention_dim': 224, 'num_heads': 4, 'batch_size': 96}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:04:45,263] Trial 52 finished with value: 0.48693063532212133 and parameters: {'lr': 0.0023832618647901542, 'hidden_1': 96, 'hidden_2': 192, 'dropout': 0.3577038229225419, 'attention_dim': 224, 'num_heads': 4, 'batch_size': 96}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:04:52,410] Trial 53 finished with value: 0.44353381534338054 and parameters: {'lr': 0.0018581043411402425, 'hidden_1': 96, 'hidden_2': 160, 'dropout': 0.30687305217765143, 'attention_dim': 256, 'num_heads': 4, 'batch_size': 96}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:05:01,807] Trial 54 finished with value: 0.42707671351932774 and parameters: {'lr': 0.0012914080564399824, 'hidden_1': 128, 'hidden_2': 192, 'dropout': 0.23223946919808353, 'attention_dim': 192, 'num_heads': 6, 'batch_size': 64}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:05:08,378] Trial 55 finished with value: 0.46355368224268084 and parameters: {'lr': 0.00019552257936609167, 'hidden_1': 96, 'hidden_2': 224, 'dropout': 0.4342906880433437, 'attention_dim': 224, 'num_heads': 2, 'batch_size': 96}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:05:13,260] Trial 56 finished with value: 0.4330894794734153 and parameters: {'lr': 0.0010583384802443877, 'hidden_1': 64, 'hidden_2': 192, 'dropout': 0.3618216426255048, 'attention_dim': 192, 'num_heads': 4, 'batch_size': 160}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:05:18,796] Trial 57 finished with value: 0.4279758177317907 and parameters: {'lr': 0.0027142672118044588, 'hidden_1': 128, 'hidden_2': 224, 'dropout': 0.2614531150800089, 'attention_dim': 160, 'num_heads': 6, 'batch_size': 128}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:05:23,225] Trial 58 finished with value: 0.4122688076101225 and parameters: {'lr': 0.0016045481129905797, 'hidden_1': 96, 'hidden_2': 160, 'dropout': 0.3205056681732499, 'attention_dim': 192, 'num_heads': 4, 'batch_size': 192}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:05:27,757] Trial 59 finished with value: 0.48457717765812147 and parameters: {'lr': 0.005230624207909361, 'hidden_1': 64, 'hidden_2': 160, 'dropout': 0.22154754776024854, 'attention_dim': 128, 'num_heads': 4, 'batch_size': 192}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:05:32,364] Trial 60 finished with value: 0.4360920413724374 and parameters: {'lr': 0.0016431470487233224, 'hidden_1': 160, 'hidden_2': 128, 'dropout': 0.49634032223717844, 'attention_dim': 224, 'num_heads': 4, 'batch_size': 192}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:05:36,704] Trial 61 finished with value: 0.43411205128472885 and parameters: {'lr': 0.0014097554427247774, 'hidden_1': 96, 'hidden_2': 160, 'dropout': 0.31118103766789834, 'attention_dim': 192, 'num_heads': 4, 'batch_size': 192}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:05:41,595] Trial 62 finished with value: 0.47125185448095736 and parameters: {'lr': 0.0022614933609903663, 'hidden_1': 96, 'hidden_2': 160, 'dropout': 0.3176772363551493, 'attention_dim': 160, 'num_heads': 6, 'batch_size': 160}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:05:46,297] Trial 63 finished with value: 0.4385746576468369 and parameters: {'lr': 0.0008447567151583982, 'hidden_1': 96, 'hidden_2': 192, 'dropout': 0.1312953618648997, 'attention_dim': 192, 'num_heads': 4, 'batch_size': 160}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:05:50,300] Trial 64 finished with value: 0.4480001226924443 and parameters: {'lr': 0.001179835866251974, 'hidden_1': 96, 'hidden_2': 128, 'dropout': 0.1822865667555248, 'attention_dim': 128, 'num_heads': 4, 'batch_size': 224}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:05:57,116] Trial 65 finished with value: 0.43323198509646466 and parameters: {'lr': 0.00036585488670312676, 'hidden_1': 64, 'hidden_2': 256, 'dropout': 0.3737012825360286, 'attention_dim': 224, 'num_heads': 4, 'batch_size': 96}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:06:02,721] Trial 66 finished with value: 0.41359795690979484 and parameters: {'lr': 0.0005960407874482491, 'hidden_1': 128, 'hidden_2': 160, 'dropout': 0.40578709195487617, 'attention_dim': 192, 'num_heads': 6, 'batch_size': 128}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:06:08,360] Trial 67 finished with value: 0.4684145672861247 and parameters: {'lr': 0.0005249720095045333, 'hidden_1': 128, 'hidden_2': 160, 'dropout': 0.40883102579701547, 'attention_dim': 256, 'num_heads': 6, 'batch_size': 128}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:06:13,874] Trial 68 finished with value: 0.500690809438852 and parameters: {'lr': 9.06086437568365e-05, 'hidden_1': 128, 'hidden_2': 160, 'dropout': 0.4406304142345139, 'attention_dim': 96, 'num_heads': 6, 'batch_size': 128}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:06:19,558] Trial 69 finished with value: 0.459561048591665 and parameters: {'lr': 0.0006194732881402741, 'hidden_1': 64, 'hidden_2': 160, 'dropout': 0.4974771926410102, 'attention_dim': 160, 'num_heads': 6, 'batch_size': 128}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:06:26,156] Trial 70 finished with value: 0.46576783932321364 and parameters: {'lr': 0.00024287233875315984, 'hidden_1': 96, 'hidden_2': 160, 'dropout': 0.34228725507275826, 'attention_dim': 128, 'num_heads': 6, 'batch_size': 96}. Best is trial 13 with value: 0.407800051321861.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:06:31,010] Trial 71 finished with value: 0.4055255094729398 and parameters: {'lr': 0.0009407393722781288, 'hidden_1': 96, 'hidden_2': 192, 'dropout': 0.29193254425214576, 'attention_dim': 192, 'num_heads': 4, 'batch_size': 160}. Best is trial 71 with value: 0.4055255094729398.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:06:36,447] Trial 72 finished with value: 0.4248697879937203 and parameters: {'lr': 0.0009922722690144474, 'hidden_1': 96, 'hidden_2': 192, 'dropout': 0.28444742797039696, 'attention_dim': 192, 'num_heads': 4, 'batch_size': 128}. Best is trial 71 with value: 0.4055255094729398.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:06:40,905] Trial 73 finished with value: 0.4468294261399929 and parameters: {'lr': 0.0007239344000382332, 'hidden_1': 128, 'hidden_2': 192, 'dropout': 0.3922597200170046, 'attention_dim': 224, 'num_heads': 4, 'batch_size': 192}. Best is trial 71 with value: 0.4055255094729398.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:06:45,743] Trial 74 finished with value: 0.43814728281235826 and parameters: {'lr': 0.003025719778211547, 'hidden_1': 64, 'hidden_2': 192, 'dropout': 0.2360448642061885, 'attention_dim': 160, 'num_heads': 4, 'batch_size': 160}. Best is trial 71 with value: 0.4055255094729398.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:06:51,290] Trial 75 finished with value: 0.4344812046188521 and parameters: {'lr': 0.004310413929995195, 'hidden_1': 96, 'hidden_2': 160, 'dropout': 0.28471726773203904, 'attention_dim': 192, 'num_heads': 4, 'batch_size': 128}. Best is trial 71 with value: 0.4055255094729398.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:06:58,129] Trial 76 finished with value: 0.49427636468142994 and parameters: {'lr': 0.0019560621470556608, 'hidden_1': 128, 'hidden_2': 192, 'dropout': 0.26675196077077856, 'attention_dim': 224, 'num_heads': 4, 'batch_size': 96}. Best is trial 71 with value: 0.4055255094729398.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:07:02,184] Trial 77 finished with value: 0.5935706030425596 and parameters: {'lr': 0.002384233602710677, 'hidden_1': 96, 'hidden_2': 192, 'dropout': 0.7887759331584742, 'attention_dim': 192, 'num_heads': 4, 'batch_size': 224}. Best is trial 71 with value: 0.4055255094729398.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:07:07,746] Trial 78 finished with value: 0.4132685912143812 and parameters: {'lr': 0.0008640233924903172, 'hidden_1': 64, 'hidden_2': 128, 'dropout': 0.3674350570247843, 'attention_dim': 192, 'num_heads': 6, 'batch_size': 128}. Best is trial 71 with value: 0.4055255094729398.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:07:13,439] Trial 79 finished with value: 0.4809589959914625 and parameters: {'lr': 0.0003486021612164366, 'hidden_1': 64, 'hidden_2': 96, 'dropout': 0.4196959957168994, 'attention_dim': 160, 'num_heads': 6, 'batch_size': 128}. Best is trial 71 with value: 0.4055255094729398.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:07:18,140] Trial 80 finished with value: 0.44306214802588945 and parameters: {'lr': 0.00056858691838906, 'hidden_1': 64, 'hidden_2': 128, 'dropout': 0.36671150915694534, 'attention_dim': 192, 'num_heads': 8, 'batch_size': 192}. Best is trial 71 with value: 0.4055255094729398.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:07:23,666] Trial 81 finished with value: 0.4305394777041304 and parameters: {'lr': 0.0009054550148489083, 'hidden_1': 96, 'hidden_2': 64, 'dropout': 0.32816120453826575, 'attention_dim': 192, 'num_heads': 6, 'batch_size': 128}. Best is trial 71 with value: 0.4055255094729398.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:07:29,211] Trial 82 finished with value: 0.4382314205503208 and parameters: {'lr': 0.0011688052439204318, 'hidden_1': 64, 'hidden_2': 96, 'dropout': 0.33587756990523104, 'attention_dim': 224, 'num_heads': 6, 'batch_size': 128}. Best is trial 71 with value: 0.4055255094729398.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:07:33,971] Trial 83 finished with value: 0.45877090032640044 and parameters: {'lr': 0.0016087624725223233, 'hidden_1': 96, 'hidden_2': 128, 'dropout': 0.46226812925782745, 'attention_dim': 160, 'num_heads': 6, 'batch_size': 160}. Best is trial 71 with value: 0.4055255094729398.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:07:39,582] Trial 84 finished with value: 0.44504076890750743 and parameters: {'lr': 0.0007941781122182569, 'hidden_1': 160, 'hidden_2': 160, 'dropout': 0.20828086796222184, 'attention_dim': 192, 'num_heads': 4, 'batch_size': 128}. Best is trial 71 with value: 0.4055255094729398.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:07:46,641] Trial 85 finished with value: 0.4516901335765775 and parameters: {'lr': 0.00044448422665697507, 'hidden_1': 64, 'hidden_2': 128, 'dropout': 0.3936138001677596, 'attention_dim': 224, 'num_heads': 6, 'batch_size': 96}. Best is trial 71 with value: 0.4055255094729398.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:07:51,335] Trial 86 finished with value: 0.44925731797866214 and parameters: {'lr': 0.00310550416981292, 'hidden_1': 96, 'hidden_2': 192, 'dropout': 0.29490770171513747, 'attention_dim': 192, 'num_heads': 4, 'batch_size': 160}. Best is trial 71 with value: 0.4055255094729398.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:07:56,703] Trial 87 finished with value: 0.4318567856741791 and parameters: {'lr': 0.0018323827074812815, 'hidden_1': 64, 'hidden_2': 160, 'dropout': 0.16716692940028433, 'attention_dim': 128, 'num_heads': 4, 'batch_size': 128}. Best is trial 71 with value: 0.4055255094729398.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:08:00,505] Trial 88 finished with value: 0.4407751574312186 and parameters: {'lr': 0.0006754217169833719, 'hidden_1': 192, 'hidden_2': 192, 'dropout': 0.09810637140337448, 'attention_dim': 224, 'num_heads': 6, 'batch_size': 256}. Best is trial 71 with value: 0.4055255094729398.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:08:09,298] Trial 89 finished with value: 0.4435673056879485 and parameters: {'lr': 0.0012141299920154541, 'hidden_1': 96, 'hidden_2': 160, 'dropout': 0.24305975039491548, 'attention_dim': 192, 'num_heads': 4, 'batch_size': 64}. Best is trial 71 with value: 0.4055255094729398.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:08:15,971] Trial 90 finished with value: 0.6122041249776623 and parameters: {'lr': 1.0979747413002858e-05, 'hidden_1': 128, 'hidden_2': 224, 'dropout': 0.27369294362816254, 'attention_dim': 96, 'num_heads': 4, 'batch_size': 96}. Best is trial 71 with value: 0.4055255094729398.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:08:20,597] Trial 91 finished with value: 0.4209618398435878 and parameters: {'lr': 0.0014843098694273134, 'hidden_1': 96, 'hidden_2': 224, 'dropout': 0.35430888436906094, 'attention_dim': 192, 'num_heads': 2, 'batch_size': 160}. Best is trial 71 with value: 0.4055255094729398.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:08:25,367] Trial 92 finished with value: 0.4817216944206823 and parameters: {'lr': 0.0014882009070885513, 'hidden_1': 96, 'hidden_2': 224, 'dropout': 0.3820183193763967, 'attention_dim': 160, 'num_heads': 2, 'batch_size': 160}. Best is trial 71 with value: 0.4055255094729398.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:08:30,294] Trial 93 finished with value: 0.4385606937433138 and parameters: {'lr': 0.0009207617918982232, 'hidden_1': 96, 'hidden_2': 224, 'dropout': 0.3523426771592017, 'attention_dim': 192, 'num_heads': 6, 'batch_size': 160}. Best is trial 71 with value: 0.4055255094729398.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:08:35,630] Trial 94 finished with value: 0.4328438291130947 and parameters: {'lr': 0.0023055213860417546, 'hidden_1': 96, 'hidden_2': 192, 'dropout': 0.42131831436752265, 'attention_dim': 192, 'num_heads': 4, 'batch_size': 128}. Best is trial 71 with value: 0.4055255094729398.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:08:40,177] Trial 95 finished with value: 0.49047424374713217 and parameters: {'lr': 0.0010933172237896944, 'hidden_1': 64, 'hidden_2': 224, 'dropout': 0.36238446295075977, 'attention_dim': 256, 'num_heads': 6, 'batch_size': 192}. Best is trial 71 with value: 0.4055255094729398.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:08:44,922] Trial 96 finished with value: 0.5659142364960456 and parameters: {'lr': 0.0041245930851767434, 'hidden_1': 96, 'hidden_2': 192, 'dropout': 0.3022685975316254, 'attention_dim': 224, 'num_heads': 2, 'batch_size': 160}. Best is trial 71 with value: 0.4055255094729398.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:08:50,454] Trial 97 finished with value: 0.4609153896036736 and parameters: {'lr': 0.0025462198897759376, 'hidden_1': 96, 'hidden_2': 256, 'dropout': 0.45318805087029335, 'attention_dim': 192, 'num_heads': 4, 'batch_size': 128}. Best is trial 71 with value: 0.4055255094729398.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:08:54,583] Trial 98 finished with value: 0.42902546700591765 and parameters: {'lr': 0.0033048028829433867, 'hidden_1': 96, 'hidden_2': 160, 'dropout': 0.32463218395126153, 'attention_dim': 160, 'num_heads': 6, 'batch_size': 224}. Best is trial 71 with value: 0.4055255094729398.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:09:00,349] Trial 99 finished with value: 0.4502287695413867 and parameters: {'lr': 0.0020059896075483707, 'hidden_1': 128, 'hidden_2': 224, 'dropout': 0.4059283448602452, 'attention_dim': 224, 'num_heads': 4, 'batch_size': 128}. Best is trial 71 with value: 0.4055255094729398.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:09:05,187] Trial 100 finished with value: 0.4396630863652362 and parameters: {'lr': 0.0016963226054737283, 'hidden_1': 64, 'hidden_2': 160, 'dropout': 0.34572012663428203, 'attention_dim': 192, 'num_heads': 8, 'batch_size': 160}. Best is trial 71 with value: 0.4055255094729398.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:09:09,831] Trial 101 finished with value: 0.4567340841801168 and parameters: {'lr': 0.001329812117385288, 'hidden_1': 96, 'hidden_2': 224, 'dropout': 0.3237711095168177, 'attention_dim': 192, 'num_heads': 2, 'batch_size': 160}. Best is trial 71 with value: 0.4055255094729398.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:09:14,689] Trial 102 finished with value: 0.4012485291441349 and parameters: {'lr': 0.0012911542744037815, 'hidden_1': 96, 'hidden_2': 224, 'dropout': 0.3003030358118872, 'attention_dim': 192, 'num_heads': 2, 'batch_size': 160}. Best is trial 102 with value: 0.4012485291441349.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:09:19,400] Trial 103 finished with value: 0.4847527838777184 and parameters: {'lr': 0.0010317105730389682, 'hidden_1': 96, 'hidden_2': 128, 'dropout': 0.21771794405401956, 'attention_dim': 192, 'num_heads': 2, 'batch_size': 160}. Best is trial 102 with value: 0.4012485291441349.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:09:24,097] Trial 104 finished with value: 0.5193574896917978 and parameters: {'lr': 0.0007490944682505169, 'hidden_1': 128, 'hidden_2': 256, 'dropout': 0.6553066038473387, 'attention_dim': 192, 'num_heads': 2, 'batch_size': 160}. Best is trial 102 with value: 0.4012485291441349.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:09:28,478] Trial 105 finished with value: 0.4115789592174945 and parameters: {'lr': 0.0004980833460628573, 'hidden_1': 96, 'hidden_2': 224, 'dropout': 0.2566882571590548, 'attention_dim': 128, 'num_heads': 2, 'batch_size': 192}. Best is trial 102 with value: 0.4012485291441349.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:09:32,883] Trial 106 finished with value: 0.42517112919024874 and parameters: {'lr': 0.0005131624954047437, 'hidden_1': 96, 'hidden_2': 192, 'dropout': 0.25469679536642187, 'attention_dim': 128, 'num_heads': 4, 'batch_size': 192}. Best is trial 102 with value: 0.4012485291441349.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:09:36,709] Trial 107 finished with value: 0.42883161258113245 and parameters: {'lr': 0.0004009443031208494, 'hidden_1': 224, 'hidden_2': 192, 'dropout': 0.2763524040477566, 'attention_dim': 128, 'num_heads': 2, 'batch_size': 224}. Best is trial 102 with value: 0.4012485291441349.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:09:41,261] Trial 108 finished with value: 0.5730924166761686 and parameters: {'lr': 3.583513572665543e-05, 'hidden_1': 64, 'hidden_2': 224, 'dropout': 0.1895310879835205, 'attention_dim': 128, 'num_heads': 4, 'batch_size': 192}. Best is trial 102 with value: 0.4012485291441349.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:09:46,560] Trial 109 finished with value: 0.4520908426300324 and parameters: {'lr': 0.0005850033159623244, 'hidden_1': 96, 'hidden_2': 192, 'dropout': 0.3057085430300683, 'attention_dim': 96, 'num_heads': 6, 'batch_size': 128}. Best is trial 102 with value: 0.4012485291441349.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:09:50,839] Trial 110 finished with value: 0.43770408422670687 and parameters: {'lr': 0.0006661236486070672, 'hidden_1': 128, 'hidden_2': 160, 'dropout': 0.29506168642725733, 'attention_dim': 160, 'num_heads': 2, 'batch_size': 192}. Best is trial 102 with value: 0.4012485291441349.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:09:55,564] Trial 111 finished with value: 0.48680660523846686 and parameters: {'lr': 0.0013452353162455581, 'hidden_1': 96, 'hidden_2': 224, 'dropout': 0.3704999502144399, 'attention_dim': 192, 'num_heads': 2, 'batch_size': 160}. Best is trial 102 with value: 0.4012485291441349.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:10:00,952] Trial 112 finished with value: 0.43968249169062656 and parameters: {'lr': 0.0015506096774359928, 'hidden_1': 96, 'hidden_2': 224, 'dropout': 0.33499647043025205, 'attention_dim': 128, 'num_heads': 2, 'batch_size': 128}. Best is trial 102 with value: 0.4012485291441349.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:10:04,531] Trial 113 finished with value: 0.4443522828181434 and parameters: {'lr': 0.0008845120791870927, 'hidden_1': 96, 'hidden_2': 224, 'dropout': 0.26688702502235756, 'attention_dim': 192, 'num_heads': 2, 'batch_size': 256}. Best is trial 102 with value: 0.4012485291441349.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:10:09,145] Trial 114 finished with value: 0.4115032168818277 and parameters: {'lr': 0.0021373748827718266, 'hidden_1': 96, 'hidden_2': 224, 'dropout': 0.2517626457212604, 'attention_dim': 224, 'num_heads': 2, 'batch_size': 160}. Best is trial 102 with value: 0.4012485291441349.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:10:14,718] Trial 115 finished with value: 0.42579941879195526 and parameters: {'lr': 0.002118948304699253, 'hidden_1': 96, 'hidden_2': 256, 'dropout': 0.24569215299419692, 'attention_dim': 224, 'num_heads': 2, 'batch_size': 128}. Best is trial 102 with value: 0.4012485291441349.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:10:19,208] Trial 116 finished with value: 0.47919882664868974 and parameters: {'lr': 0.0026701675833004494, 'hidden_1': 64, 'hidden_2': 224, 'dropout': 0.14761572700349998, 'attention_dim': 224, 'num_heads': 4, 'batch_size': 192}. Best is trial 102 with value: 0.4012485291441349.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:10:23,192] Trial 117 finished with value: 0.4127194592347242 and parameters: {'lr': 0.0004895586720196295, 'hidden_1': 96, 'hidden_2': 192, 'dropout': 0.22712056764160132, 'attention_dim': 224, 'num_heads': 4, 'batch_size': 224}. Best is trial 102 with value: 0.4012485291441349.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:10:27,403] Trial 118 finished with value: 0.49607605154499956 and parameters: {'lr': 0.0004829662327911996, 'hidden_1': 96, 'hidden_2': 192, 'dropout': 0.2185083111643083, 'attention_dim': 256, 'num_heads': 4, 'batch_size': 224}. Best is trial 102 with value: 0.4012485291441349.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:10:31,493] Trial 119 finished with value: 0.42175350666797534 and parameters: {'lr': 0.00038912535370892525, 'hidden_1': 128, 'hidden_2': 160, 'dropout': 0.19981037837216944, 'attention_dim': 224, 'num_heads': 6, 'batch_size': 224}. Best is trial 102 with value: 0.4012485291441349.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:10:35,615] Trial 120 finished with value: 0.42116665827663935 and parameters: {'lr': 0.00033914492387879523, 'hidden_1': 64, 'hidden_2': 192, 'dropout': 0.17109418849091795, 'attention_dim': 128, 'num_heads': 4, 'batch_size': 224}. Best is trial 102 with value: 0.4012485291441349.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:10:39,694] Trial 121 finished with value: 0.42690124276095526 and parameters: {'lr': 0.0018761227703256245, 'hidden_1': 96, 'hidden_2': 192, 'dropout': 0.23576270385905293, 'attention_dim': 224, 'num_heads': 4, 'batch_size': 224}. Best is trial 102 with value: 0.4012485291441349.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:10:45,219] Trial 122 finished with value: 0.412727558228281 and parameters: {'lr': 0.0008168610113504434, 'hidden_1': 96, 'hidden_2': 192, 'dropout': 0.2777900667101574, 'attention_dim': 224, 'num_heads': 4, 'batch_size': 128}. Best is trial 102 with value: 0.4012485291441349.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:10:52,033] Trial 123 finished with value: 0.42866468698322213 and parameters: {'lr': 0.0007854158608013282, 'hidden_1': 96, 'hidden_2': 192, 'dropout': 0.2557270390316006, 'attention_dim': 224, 'num_heads': 4, 'batch_size': 96}. Best is trial 102 with value: 0.4012485291441349.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:10:57,906] Trial 124 finished with value: 0.44113062514253104 and parameters: {'lr': 0.00027271523067750144, 'hidden_1': 96, 'hidden_2': 224, 'dropout': 0.2815910452527025, 'attention_dim': 224, 'num_heads': 4, 'batch_size': 128}. Best is trial 102 with value: 0.4012485291441349.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:11:02,480] Trial 125 finished with value: 0.4273572068761822 and parameters: {'lr': 0.0010044237861433958, 'hidden_1': 96, 'hidden_2': 192, 'dropout': 0.22661933216733432, 'attention_dim': 256, 'num_heads': 4, 'batch_size': 192}. Best is trial 102 with value: 0.4012485291441349.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:11:07,510] Trial 126 finished with value: 0.4011217632205893 and parameters: {'lr': 0.000573524476683021, 'hidden_1': 256, 'hidden_2': 224, 'dropout': 0.2635647624410654, 'attention_dim': 224, 'num_heads': 6, 'batch_size': 160}. Best is trial 126 with value: 0.4011217632205893.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:11:12,614] Trial 127 finished with value: 0.4245149095655305 and parameters: {'lr': 0.0004610142149257328, 'hidden_1': 256, 'hidden_2': 224, 'dropout': 0.26627571308253734, 'attention_dim': 224, 'num_heads': 6, 'batch_size': 160}. Best is trial 126 with value: 0.4011217632205893.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:11:17,508] Trial 128 finished with value: 0.4192707983010454 and parameters: {'lr': 0.0005918446334997072, 'hidden_1': 224, 'hidden_2': 224, 'dropout': 0.315706216829441, 'attention_dim': 160, 'num_heads': 6, 'batch_size': 160}. Best is trial 126 with value: 0.4011217632205893.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:11:22,305] Trial 129 finished with value: 0.4600201752623165 and parameters: {'lr': 0.000699051249699851, 'hidden_1': 160, 'hidden_2': 224, 'dropout': 0.2930452444778921, 'attention_dim': 192, 'num_heads': 6, 'batch_size': 160}. Best is trial 126 with value: 0.4011217632205893.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:11:27,348] Trial 130 finished with value: 0.5061729845212654 and parameters: {'lr': 0.000878324936774714, 'hidden_1': 64, 'hidden_2': 256, 'dropout': 0.2431078198431026, 'attention_dim': 224, 'num_heads': 6, 'batch_size': 160}. Best is trial 126 with value: 0.4011217632205893.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:11:33,058] Trial 131 finished with value: 0.400416160182687 and parameters: {'lr': 0.0012433400099414293, 'hidden_1': 256, 'hidden_2': 160, 'dropout': 0.27669213011333293, 'attention_dim': 224, 'num_heads': 6, 'batch_size': 128}. Best is trial 131 with value: 0.400416160182687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:11:38,709] Trial 132 finished with value: 0.4152722933254675 and parameters: {'lr': 0.0011785298985018747, 'hidden_1': 256, 'hidden_2': 160, 'dropout': 0.2774781024127592, 'attention_dim': 224, 'num_heads': 6, 'batch_size': 128}. Best is trial 131 with value: 0.400416160182687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:11:44,517] Trial 133 finished with value: 0.41058869858531294 and parameters: {'lr': 0.0006295625842797068, 'hidden_1': 256, 'hidden_2': 160, 'dropout': 0.2595946894034218, 'attention_dim': 224, 'num_heads': 6, 'batch_size': 128}. Best is trial 131 with value: 0.400416160182687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:11:50,327] Trial 134 finished with value: 0.4383608323752399 and parameters: {'lr': 0.0005290675558345991, 'hidden_1': 256, 'hidden_2': 160, 'dropout': 0.30618636475288774, 'attention_dim': 224, 'num_heads': 6, 'batch_size': 128}. Best is trial 131 with value: 0.400416160182687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:11:56,000] Trial 135 finished with value: 0.4305673154145146 and parameters: {'lr': 0.0003077870912368251, 'hidden_1': 256, 'hidden_2': 160, 'dropout': 0.2630606377879795, 'attention_dim': 256, 'num_heads': 6, 'batch_size': 128}. Best is trial 131 with value: 0.400416160182687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:12:01,906] Trial 136 finished with value: 0.41922581937838355 and parameters: {'lr': 0.0006128881082105808, 'hidden_1': 256, 'hidden_2': 160, 'dropout': 0.32398210854398957, 'attention_dim': 224, 'num_heads': 6, 'batch_size': 128}. Best is trial 131 with value: 0.400416160182687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:12:07,420] Trial 137 finished with value: 0.46166238194844933 and parameters: {'lr': 0.0012747942724481737, 'hidden_1': 256, 'hidden_2': 160, 'dropout': 0.3834479970457855, 'attention_dim': 224, 'num_heads': 6, 'batch_size': 128}. Best is trial 131 with value: 0.400416160182687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:12:12,356] Trial 138 finished with value: 0.4135860447711213 and parameters: {'lr': 0.001040826325855667, 'hidden_1': 256, 'hidden_2': 160, 'dropout': 0.2837062880997074, 'attention_dim': 224, 'num_heads': 6, 'batch_size': 160}. Best is trial 131 with value: 0.400416160182687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:12:17,241] Trial 139 finished with value: 0.4193261651740002 and parameters: {'lr': 0.0010602366120965343, 'hidden_1': 256, 'hidden_2': 128, 'dropout': 0.28855466960169684, 'attention_dim': 224, 'num_heads': 6, 'batch_size': 160}. Best is trial 131 with value: 0.400416160182687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:12:22,396] Trial 140 finished with value: 0.43015470576659753 and parameters: {'lr': 0.0009423973816444114, 'hidden_1': 224, 'hidden_2': 160, 'dropout': 0.22979026015972437, 'attention_dim': 256, 'num_heads': 6, 'batch_size': 160}. Best is trial 131 with value: 0.400416160182687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:12:27,252] Trial 141 finished with value: 0.44433335275996916 and parameters: {'lr': 0.0007192478262423655, 'hidden_1': 256, 'hidden_2': 160, 'dropout': 0.343324480301401, 'attention_dim': 224, 'num_heads': 6, 'batch_size': 160}. Best is trial 131 with value: 0.400416160182687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:12:32,162] Trial 142 finished with value: 0.4180817117191409 and parameters: {'lr': 0.0007850430299141056, 'hidden_1': 256, 'hidden_2': 160, 'dropout': 0.250808837595936, 'attention_dim': 224, 'num_heads': 6, 'batch_size': 160}. Best is trial 131 with value: 0.400416160182687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:12:36,500] Trial 143 finished with value: 0.4234054091333754 and parameters: {'lr': 0.0016216377556039082, 'hidden_1': 256, 'hidden_2': 160, 'dropout': 0.2072126507947774, 'attention_dim': 224, 'num_heads': 6, 'batch_size': 224}. Best is trial 131 with value: 0.400416160182687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:12:42,068] Trial 144 finished with value: 0.3981115267154687 and parameters: {'lr': 0.0006568221771998115, 'hidden_1': 256, 'hidden_2': 160, 'dropout': 0.30097927649530637, 'attention_dim': 128, 'num_heads': 6, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:12:47,607] Trial 145 finished with value: 0.4009694792958036 and parameters: {'lr': 0.0011213745210462852, 'hidden_1': 256, 'hidden_2': 128, 'dropout': 0.3061277020089106, 'attention_dim': 128, 'num_heads': 6, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:12:53,425] Trial 146 finished with value: 0.4164470676553501 and parameters: {'lr': 0.0014583652402839977, 'hidden_1': 256, 'hidden_2': 128, 'dropout': 0.30963289865807186, 'attention_dim': 128, 'num_heads': 6, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:12:59,011] Trial 147 finished with value: 0.4121793796872401 and parameters: {'lr': 0.000434588739435644, 'hidden_1': 256, 'hidden_2': 128, 'dropout': 0.2991482376621438, 'attention_dim': 128, 'num_heads': 6, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:13:04,635] Trial 148 finished with value: 0.41032737250986695 and parameters: {'lr': 0.0002169120953849328, 'hidden_1': 256, 'hidden_2': 128, 'dropout': 0.2698587557763117, 'attention_dim': 128, 'num_heads': 6, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:13:10,416] Trial 149 finished with value: 0.4358309324196356 and parameters: {'lr': 0.0002386086998739972, 'hidden_1': 256, 'hidden_2': 128, 'dropout': 0.29814626820690526, 'attention_dim': 128, 'num_heads': 6, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:13:16,018] Trial 150 finished with value: 0.420038858064007 and parameters: {'lr': 0.00015866116520877336, 'hidden_1': 256, 'hidden_2': 128, 'dropout': 0.2636628592968228, 'attention_dim': 128, 'num_heads': 6, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:13:21,671] Trial 151 finished with value: 0.4308496173581071 and parameters: {'lr': 0.00042964743799523845, 'hidden_1': 256, 'hidden_2': 96, 'dropout': 0.27612146646341584, 'attention_dim': 128, 'num_heads': 6, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:13:27,415] Trial 152 finished with value: 0.43775902282692886 and parameters: {'lr': 0.0002065457326829729, 'hidden_1': 256, 'hidden_2': 128, 'dropout': 0.31766875219531093, 'attention_dim': 96, 'num_heads': 6, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:13:33,020] Trial 153 finished with value: 0.4463601314970006 and parameters: {'lr': 0.00048024049871184764, 'hidden_1': 256, 'hidden_2': 128, 'dropout': 0.23745249687505843, 'attention_dim': 128, 'num_heads': 6, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:13:38,621] Trial 154 finished with value: 0.4662088079538339 and parameters: {'lr': 0.00014812276083917496, 'hidden_1': 256, 'hidden_2': 128, 'dropout': 0.2937102941623338, 'attention_dim': 128, 'num_heads': 6, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:13:44,220] Trial 155 finished with value: 0.4159029402408837 and parameters: {'lr': 0.0003278840201051194, 'hidden_1': 256, 'hidden_2': 128, 'dropout': 0.25570604132082575, 'attention_dim': 128, 'num_heads': 2, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:13:49,799] Trial 156 finished with value: 0.4149711479135893 and parameters: {'lr': 0.0006641406261766178, 'hidden_1': 224, 'hidden_2': 128, 'dropout': 0.3357950867380121, 'attention_dim': 128, 'num_heads': 6, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:13:53,601] Trial 157 finished with value: 0.41753951128133654 and parameters: {'lr': 0.0018208999075450037, 'hidden_1': 256, 'hidden_2': 96, 'dropout': 0.2710821866949019, 'attention_dim': 128, 'num_heads': 6, 'batch_size': 256}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:13:57,706] Trial 158 finished with value: 0.4175809307929981 and parameters: {'lr': 0.0005440548301294945, 'hidden_1': 256, 'hidden_2': 160, 'dropout': 0.30503967038513163, 'attention_dim': 96, 'num_heads': 2, 'batch_size': 224}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:14:02,185] Trial 159 finished with value: 0.4179540974776087 and parameters: {'lr': 0.0003684861657971587, 'hidden_1': 256, 'hidden_2': 160, 'dropout': 0.21354032841405277, 'attention_dim': 128, 'num_heads': 6, 'batch_size': 192}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:14:07,751] Trial 160 finished with value: 0.42421759128442055 and parameters: {'lr': 0.00027238351355675947, 'hidden_1': 256, 'hidden_2': 224, 'dropout': 0.24679664622884714, 'attention_dim': 128, 'num_heads': 6, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:14:13,352] Trial 161 finished with value: 0.46688363035899516 and parameters: {'lr': 0.0011819504928436678, 'hidden_1': 256, 'hidden_2': 128, 'dropout': 0.35227330922652145, 'attention_dim': 128, 'num_heads': 6, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:14:18,889] Trial 162 finished with value: 0.4176790518671437 and parameters: {'lr': 0.0008272889571101137, 'hidden_1': 192, 'hidden_2': 128, 'dropout': 0.32522189548200925, 'attention_dim': 128, 'num_heads': 6, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:14:24,686] Trial 163 finished with value: 0.41277119796625616 and parameters: {'lr': 0.0013386248405872754, 'hidden_1': 256, 'hidden_2': 128, 'dropout': 0.28246218938245604, 'attention_dim': 128, 'num_heads': 6, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:14:30,543] Trial 164 finished with value: 0.43020722287474905 and parameters: {'lr': 0.0013943908188352836, 'hidden_1': 256, 'hidden_2': 128, 'dropout': 0.2807836728463493, 'attention_dim': 128, 'num_heads': 6, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:14:36,001] Trial 165 finished with value: 0.4203106758306993 and parameters: {'lr': 0.002059702033858859, 'hidden_1': 256, 'hidden_2': 128, 'dropout': 0.29315142343147216, 'attention_dim': 128, 'num_heads': 6, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:14:41,368] Trial 166 finished with value: 0.4291838336672393 and parameters: {'lr': 0.001694033023872045, 'hidden_1': 256, 'hidden_2': 192, 'dropout': 0.27015572239715807, 'attention_dim': 128, 'num_heads': 2, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:14:45,827] Trial 167 finished with value: 0.42522293931117605 and parameters: {'lr': 0.0011495710353224657, 'hidden_1': 224, 'hidden_2': 96, 'dropout': 0.23654623667347352, 'attention_dim': 128, 'num_heads': 6, 'batch_size': 224}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:14:51,354] Trial 168 finished with value: 0.41249998057862 and parameters: {'lr': 0.00044563835007619486, 'hidden_1': 192, 'hidden_2': 160, 'dropout': 0.31287149693796446, 'attention_dim': 128, 'num_heads': 6, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:14:56,061] Trial 169 finished with value: 0.41937643177691947 and parameters: {'lr': 0.0004833648927351303, 'hidden_1': 192, 'hidden_2': 160, 'dropout': 0.31450099305791873, 'attention_dim': 96, 'num_heads': 2, 'batch_size': 160}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:15:01,737] Trial 170 finished with value: 0.4447518157776543 and parameters: {'lr': 0.00039344606822779157, 'hidden_1': 224, 'hidden_2': 160, 'dropout': 0.33309485980917425, 'attention_dim': 160, 'num_heads': 6, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:15:07,480] Trial 171 finished with value: 0.4143136651553767 and parameters: {'lr': 0.0005468231522920225, 'hidden_1': 256, 'hidden_2': 160, 'dropout': 0.3015137886383901, 'attention_dim': 128, 'num_heads': 6, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:15:13,139] Trial 172 finished with value: 0.4082253987080556 and parameters: {'lr': 0.0006029458893157878, 'hidden_1': 256, 'hidden_2': 160, 'dropout': 0.25951081694920963, 'attention_dim': 128, 'num_heads': 6, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:15:18,761] Trial 173 finished with value: 0.4206225875889535 and parameters: {'lr': 0.0006523702310680537, 'hidden_1': 256, 'hidden_2': 160, 'dropout': 0.26731567905391124, 'attention_dim': 128, 'num_heads': 6, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:15:24,463] Trial 174 finished with value: 0.4251625456333719 and parameters: {'lr': 0.0004062289631671964, 'hidden_1': 256, 'hidden_2': 160, 'dropout': 0.25439621293799775, 'attention_dim': 128, 'num_heads': 6, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:15:30,040] Trial 175 finished with value: 0.43895486417152857 and parameters: {'lr': 0.0004546881667729604, 'hidden_1': 160, 'hidden_2': 160, 'dropout': 0.22303983191564686, 'attention_dim': 128, 'num_heads': 6, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:15:35,535] Trial 176 finished with value: 0.41690968777864884 and parameters: {'lr': 0.0005968932275249769, 'hidden_1': 96, 'hidden_2': 160, 'dropout': 0.2906349317237338, 'attention_dim': 128, 'num_heads': 6, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:15:40,174] Trial 177 finished with value: 0.4143078484789816 and parameters: {'lr': 0.0007382631985393222, 'hidden_1': 96, 'hidden_2': 160, 'dropout': 0.25721297525125475, 'attention_dim': 128, 'num_heads': 6, 'batch_size': 192}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:15:45,685] Trial 178 finished with value: 0.4271292917957575 and parameters: {'lr': 0.0009407411199244602, 'hidden_1': 256, 'hidden_2': 224, 'dropout': 0.31756976309581897, 'attention_dim': 160, 'num_heads': 6, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:15:49,822] Trial 179 finished with value: 0.41510431160640293 and parameters: {'lr': 0.0005273856977138278, 'hidden_1': 256, 'hidden_2': 192, 'dropout': 0.24330508582096894, 'attention_dim': 224, 'num_heads': 8, 'batch_size': 224}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:15:55,082] Trial 180 finished with value: 0.41235482550138836 and parameters: {'lr': 0.0006433397425231234, 'hidden_1': 96, 'hidden_2': 192, 'dropout': 0.30446132483859595, 'attention_dim': 32, 'num_heads': 2, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:16:00,201] Trial 181 finished with value: 0.42025155695225375 and parameters: {'lr': 0.000626615555493551, 'hidden_1': 96, 'hidden_2': 192, 'dropout': 0.30245132201077374, 'attention_dim': 32, 'num_heads': 2, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:16:05,508] Trial 182 finished with value: 0.4170689909731808 and parameters: {'lr': 0.00046472429560757305, 'hidden_1': 96, 'hidden_2': 192, 'dropout': 0.2775327952865207, 'attention_dim': 64, 'num_heads': 2, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:16:11,154] Trial 183 finished with value: 0.420547176399807 and parameters: {'lr': 0.0007084615140697166, 'hidden_1': 96, 'hidden_2': 192, 'dropout': 0.3316706617877108, 'attention_dim': 32, 'num_heads': 2, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:16:16,622] Trial 184 finished with value: 0.41446624401395493 and parameters: {'lr': 0.000828023303846161, 'hidden_1': 96, 'hidden_2': 160, 'dropout': 0.28983001800098623, 'attention_dim': 96, 'num_heads': 2, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:16:21,894] Trial 185 finished with value: 0.44912656584628585 and parameters: {'lr': 0.0005288942976635607, 'hidden_1': 96, 'hidden_2': 224, 'dropout': 0.26824977212925366, 'attention_dim': 64, 'num_heads': 2, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:16:27,644] Trial 186 finished with value: 0.41247963118135433 and parameters: {'lr': 0.0005720897332097425, 'hidden_1': 96, 'hidden_2': 192, 'dropout': 0.3096501869261924, 'attention_dim': 224, 'num_heads': 4, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:16:32,444] Trial 187 finished with value: 0.4208417183375036 and parameters: {'lr': 0.0003525886418324805, 'hidden_1': 256, 'hidden_2': 192, 'dropout': 0.3126684695716369, 'attention_dim': 224, 'num_heads': 2, 'batch_size': 160}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:16:37,354] Trial 188 finished with value: 0.4170574281614059 and parameters: {'lr': 0.00042561145459052435, 'hidden_1': 96, 'hidden_2': 192, 'dropout': 0.3012881351731914, 'attention_dim': 160, 'num_heads': 6, 'batch_size': 160}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:16:42,336] Trial 189 finished with value: 0.42707454163524855 and parameters: {'lr': 0.0006267352399616703, 'hidden_1': 192, 'hidden_2': 224, 'dropout': 0.3492067362484784, 'attention_dim': 224, 'num_heads': 4, 'batch_size': 160}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:16:47,830] Trial 190 finished with value: 0.4172251336937848 and parameters: {'lr': 0.0005085312375401619, 'hidden_1': 256, 'hidden_2': 192, 'dropout': 0.19481082003377442, 'attention_dim': 128, 'num_heads': 6, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:16:53,468] Trial 191 finished with value: 0.4503085265103412 and parameters: {'lr': 0.0005826421179690089, 'hidden_1': 96, 'hidden_2': 192, 'dropout': 0.2790752135814777, 'attention_dim': 224, 'num_heads': 4, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:16:58,925] Trial 192 finished with value: 0.41444019188428405 and parameters: {'lr': 0.000827895538591652, 'hidden_1': 96, 'hidden_2': 192, 'dropout': 0.25495954025766004, 'attention_dim': 224, 'num_heads': 4, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:17:04,734] Trial 193 finished with value: 0.4252172937417957 and parameters: {'lr': 0.0010168983230251678, 'hidden_1': 96, 'hidden_2': 192, 'dropout': 0.3196126820095215, 'attention_dim': 224, 'num_heads': 4, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:17:10,344] Trial 194 finished with value: 0.4369014489630733 and parameters: {'lr': 0.0007488946804779488, 'hidden_1': 96, 'hidden_2': 224, 'dropout': 0.2920282330398088, 'attention_dim': 224, 'num_heads': 4, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:17:15,825] Trial 195 finished with value: 0.44016804214760114 and parameters: {'lr': 0.002233355556447423, 'hidden_1': 96, 'hidden_2': 192, 'dropout': 0.2270666619675499, 'attention_dim': 224, 'num_heads': 4, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:17:21,563] Trial 196 finished with value: 0.4379989805902203 and parameters: {'lr': 0.0006828954475766589, 'hidden_1': 96, 'hidden_2': 160, 'dropout': 0.3376925442128196, 'attention_dim': 224, 'num_heads': 4, 'batch_size': 128}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:17:26,348] Trial 197 finished with value: 0.4101016892791037 and parameters: {'lr': 0.0015925400870056207, 'hidden_1': 96, 'hidden_2': 224, 'dropout': 0.30635643654420547, 'attention_dim': 32, 'num_heads': 6, 'batch_size': 160}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:17:30,942] Trial 198 finished with value: 0.40783934479637024 and parameters: {'lr': 0.0016026889834163238, 'hidden_1': 256, 'hidden_2': 224, 'dropout': 0.3090516439357938, 'attention_dim': 32, 'num_heads': 6, 'batch_size': 160}. Best is trial 144 with value: 0.3981115267154687.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
  hidden_1 = trial.suggest_int('hidden_1', 64, 256,32)
  hidden_2 = trial.suggest_int('hidden_2', 64, 256,32)
  dropout_rate = trial.suggest_uniform('dropout',0.0,0.8)
  attention_dim = trial.suggest_int('attention_dim',32,256,32)
  num_heads = trial.suggest_int('num_heads',2,8,2)
  batch_size = trial.suggest_int('batch_size', 64, 256,32)


[I 2024-01-19 19:17:35,879] Trial 199 finished with value: 0.4567629499238173 and parameters: {'lr': 0.001633034768469293, 'hidden_1': 256, 'hidden_2': 224, 'dropout': 0.3063049436403606, 'attention_dim': 32, 'num_heads': 6, 'batch_size': 160}. Best is trial 144 with value: 0.3981115267154687.
