In [1]:
import numpy as np
import SearchSpace as ss
import ModelBuild as Builder
import TrainModel as Trainer
from google.colab import drive
import pandas as pd
import os
import torch
import random
from scipy.stats import norm
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern
import torch
from torch.utils.data import DataLoader, Subset
from torchvision import datasets, transforms

drive.mount('/content/drive')

Output hidden; open in https://colab.research.google.com to view.

In [2]:
random_params = ss.create_param_combinations(strategy="bayesian")
print("bayesion Search Combinations:", random_params)

bayesion Search Combinations: [{'num_layers': 4, 'units_per_layer': 108, 'activation': 'relu', 'learning_rate': 0.0005829011452567019, 'batch_size': 64, 'dropout_rate': 0.5776722153152961, 'l2_reg_strength': 3.773291459887234e-07}]


In [3]:
# 定义 activation 函数的映射字典
activation_to_num = {'relu': 1, 'tanh': 2, 'sigmoid': 3}
num_to_activation = {1: 'relu', 2: 'tanh', 3: 'sigmoid'}

def initialize_population(num_points, search_space):
    """
    初始化初始种群，生成包含激活函数数字编码的初始点。
    """
    population = [random_sample_point(search_space) for _ in range(num_points)]
    for params in population:
        if 'activation' in params:
            params['activation'] = activation_to_num[params['activation']]
    return population

def random_sample_point(search_space):
    """
    从搜索空间中随机生成一个采样点。
    """
    sample = {}
    for param, config in search_space.items():
        if "values" in config:  # 离散值
            sample[param] = np.random.choice(config["values"])
        elif config["type"] == "continuous":  # 连续值
            if config.get("scale") == "log":
                sample[param] = np.exp(np.random.uniform(np.log(config["min"]), np.log(config["max"])))
            else:
                sample[param] = np.random.uniform(config["min"], config["max"])

            if param == "units_per_layer":
                sample[param] = int(sample[param])
    return sample

def select_next_point(gpr, search_space, X_sample, y_sample, n_candidates=100):
    """
    使用期望改进选择下一个采样点。
    """
    candidates = [random_sample_point(search_space) for _ in range(n_candidates)]
    for candidate in candidates:
        if 'activation' in candidate:
            candidate['activation'] = activation_to_num[candidate['activation']]
    candidates = np.array([list(candidate.values()) for candidate in candidates])

    ei = expected_improvement(candidates, X_sample, gpr, y_sample)
    next_point = candidates[np.argmax(ei)]

    # 将选出的下一点转换为字典格式
    next_point_dict = dict(zip(search_space.keys(), next_point))

    next_point_dict['activation'] = int(next_point_dict['activation'])
    next_point_dict['batch_size'] = int(next_point_dict['batch_size'])
    next_point_dict['units_per_layer'] = int(next_point_dict['units_per_layer'])
    next_point_dict['num_layers'] = int(next_point_dict['num_layers'])

    return next_point_dict

def expected_improvement(X, X_sample, gpr, y_sample, xi=0.01):
    """
    计算候选点的期望改进 (Expected Improvement, EI)。
    """
    from scipy.stats import norm
    mu, sigma = gpr.predict(X, return_std=True)
    mu_sample_opt = np.max(y_sample)

    with np.errstate(divide='warn'):
        imp = mu - mu_sample_opt - xi
        Z = imp / sigma
        ei = imp * norm.cdf(Z) + sigma * norm.pdf(Z)
        ei[sigma == 0.0] = 0.0

    return ei



In [4]:
def bayesian_search(train_set, val_set, epochs=10, save_dir="/content/drive/MyDrive/DL_HPO/BayesianResult"):
    search_space = ss.get_search_space("bayesian")

    initial_points = 10
    total_iterations = 1000

    all_results = []
    best_result = None
    best_val_acc = 0.0
    best_model_wts = None
    total_training_time = 0

    # 确保保存路径存在
    os.makedirs(save_dir, exist_ok=True)
    summary_file_path = f"{save_dir}/bayesian_result.csv"

    # 初始化初始种群并将 activation 转换为数值
    initial_population = initialize_population(initial_points, search_space)
    X_train = np.array([list(point.values()) for point in initial_population])
    y_train = []

    # 代理模型：高斯过程回归
    kernel = Matern(nu=2.5)
    gpr = GaussianProcessRegressor(kernel=kernel, alpha=1e-6, n_restarts_optimizer=10)

    for iteration in range(total_iterations):
        print(f"Iteration {iteration + 1}/{total_iterations}")

        # 初始点或采集函数生成的点
        if iteration < initial_points:
            params = initial_population[iteration]
        else:
            params = select_next_point(gpr, search_space, X_train, y_train)

        print(params)
        params_build = params.copy()
        if 'activation' in params_build:
          params_build['activation'] = num_to_activation[params_build['activation']]


        # 构建模型并加载数据
        model = Builder.build_model(params_build)
        batch_size = int(params["batch_size"])
        train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)

        # 训练模型
        result = Trainer.train_model(model, train_loader, val_loader, epochs=epochs)

        # 累加当前模型的训练时间
        total_training_time += result["training_time"]

        # 记录结果
        result_summary = {
            "iteration": iteration,
            "params": params,
            "strategy": "bayesian",
            "train_loss": result["train_losses"][-1],
            "val_loss": result["val_losses"][-1],
            "train_accuracy": result["train_accuracies"][-1],
            "val_accuracy": result["val_accuracies"][-1],
            "training_time": result["training_time"],
            "total_training_time": total_training_time
        }
        all_results.append(result_summary)

        # 保存结果到文件
        result_df = pd.DataFrame([result_summary])
        if os.path.exists(summary_file_path):
            result_df.to_csv(summary_file_path, mode='a', header=False, index=False)
        else:
            result_df.to_csv(summary_file_path, mode='w', index=False)

        # 更新最佳模型并保存
        if result["val_accuracies"][-1] > best_val_acc:
            best_val_acc = result["val_accuracies"][-1]
            best_result = result_summary
            best_model_wts = model.state_dict()
            best_result_df = pd.DataFrame([best_result])
            best_result_df.to_csv(f"{save_dir}/best_bayesian_result.csv", index=False)
            torch.save(best_model_wts, f"{save_dir}/best_model_weights.pt")

        # 更新代理模型
        print(all_results[0])
        X_train = np.array([list(res['params'].values()) for res in all_results])
        y_train = np.array([res['val_accuracy'] for res in all_results])
        gpr.fit(X_train, y_train)

    return all_results, best_result, best_model_wts


In [5]:
# 下载并预处理 MNIST 数据集
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# 加载 MNIST 数据集
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

# 截取 1000 个训练样本和 100 个验证样本
train_subset = Subset(train_dataset, range(3000))
val_subset = Subset(test_dataset, range(500))

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9.91M/9.91M [00:00<00:00, 36.3MB/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28.9k/28.9k [00:00<00:00, 1.01MB/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1.65M/1.65M [00:00<00:00, 8.39MB/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4.54k/4.54k [00:00<00:00, 4.16MB/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






In [None]:
all_results, best_result, best_model_wts = bayesian_search(train_subset, val_subset, epochs=10,save_dir="/content/drive/MyDrive/DL_HPO/BayesianResult2")

Iteration 1/1000
{'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 2/1000
{'num_layers': 1, 'units_per_layer': 79, 'activation': 1, 'learning_rate': 0.00014620708825200177, 'batch_size': 64, 'dropout_rate': 0.35110782263853474, 'l2_reg_strength': 0.0007540784716011744}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 133/1000
{'num_layers': 1, 'units_per_layer': 109, 'activation': 3, 'learning_rate': 0.02286237739322568, 'batch_size': 32, 'dropout_rate': 0.4368752805113197, 'l2_reg_strength': 0.0011505270051254993}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 134/1000
{'num_layers': 2, 'units_per_layer': 111, 'activation': 2, 'learning_rate': 0.03447204489581634, 'batch_size': 128, 'dropout_rate': 0.570297417282609, 'l2_reg_strength': 1.6776115304347982e-05}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'ba

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 138/1000
{'num_layers': 1, 'units_per_layer': 37, 'activation': 1, 'learning_rate': 0.0003299364463926733, 'batch_size': 128, 'dropout_rate': 0.28730154464674457, 'l2_reg_strength': 2.267211278805207e-05}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 139/1000
{'num_layers': 2, 'units_per_layer': 38, 'activation': 2, 'learning_rate': 0.01299033915223949, 'batch_size': 32, 'dropout_rate': 0.3532406333515858, 'l2_reg_strength': 2.5232445763176185e-07}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, '

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 144/1000
{'num_layers': 3, 'units_per_layer': 57, 'activation': 1, 'learning_rate': 0.000235337549238794, 'batch_size': 16, 'dropout_rate': 0.5828388110026936, 'l2_reg_strength': 0.00034295242912870663}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 145/1000
{'num_layers': 1, 'units_per_layer': 83, 'activation': 1, 'learning_rate': 0.0029480395432349487, 'batch_size': 64, 'dropout_rate': 0.2787587018716015, 'l2_reg_strength': 0.0014218142215400781}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'b

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 146/1000
{'num_layers': 3, 'units_per_layer': 110, 'activation': 2, 'learning_rate': 0.00011886659185698322, 'batch_size': 32, 'dropout_rate': 0.05496154854189454, 'l2_reg_strength': 5.23179416166443e-07}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 147/1000
{'num_layers': 4, 'units_per_layer': 96, 'activation': 3, 'learning_rate': 0.00025334851713524584, 'batch_size': 32, 'dropout_rate': 0.23859092189267767, 'l2_reg_strength': 0.00015868552950516586}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.1157276994916600

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 153/1000
{'num_layers': 2, 'units_per_layer': 78, 'activation': 2, 'learning_rate': 0.0003745138732723686, 'batch_size': 128, 'dropout_rate': 0.19616607023407545, 'l2_reg_strength': 2.602787587148349e-06}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 154/1000
{'num_layers': 1, 'units_per_layer': 126, 'activation': 1, 'learning_rate': 0.022318143242802013, 'batch_size': 256, 'dropout_rate': 0.08858833108577198, 'l2_reg_strength': 0.0014715362325136163}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 157/1000
{'num_layers': 4, 'units_per_layer': 125, 'activation': 2, 'learning_rate': 0.0809576926726316, 'batch_size': 256, 'dropout_rate': 0.3831994089372834, 'l2_reg_strength': 4.06383161273892e-05}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 158/1000
{'num_layers': 3, 'units_per_layer': 58, 'activation': 1, 'learning_rate': 0.0029645475053473136, 'batch_size': 128, 'dropout_rate': 0.46554616516964387, 'l2_reg_strength': 1.596872234193303e-07}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'b

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 182/1000
{'num_layers': 3, 'units_per_layer': 110, 'activation': 2, 'learning_rate': 0.0006022758130785908, 'batch_size': 32, 'dropout_rate': 0.51769534409859, 'l2_reg_strength': 1.2313850961317413e-06}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 183/1000
{'num_layers': 3, 'units_per_layer': 108, 'activation': 2, 'learning_rate': 0.00414420712472887, 'batch_size': 32, 'dropout_rate': 0.25388883094872333, 'l2_reg_strength': 2.3013698392057257e-05}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, '

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 196/1000
{'num_layers': 1, 'units_per_layer': 70, 'activation': 2, 'learning_rate': 0.01537611188326571, 'batch_size': 32, 'dropout_rate': 0.2252793133308877, 'l2_reg_strength': 1.4585750982264605e-07}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 197/1000
{'num_layers': 2, 'units_per_layer': 36, 'activation': 2, 'learning_rate': 0.0005549231767498585, 'batch_size': 128, 'dropout_rate': 0.23741388934541385, 'l2_reg_strength': 7.33140567990168e-05}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 198/1000
{'num_layers': 2, 'units_per_layer': 72, 'activation': 2, 'learning_rate': 0.0002757708779857114, 'batch_size': 128, 'dropout_rate': 0.5960838557637607, 'l2_reg_strength': 5.115731206704687e-05}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 214/1000
{'num_layers': 1, 'units_per_layer': 9, 'activation': 3, 'learning_rate': 0.00016824729785425935, 'batch_size': 16, 'dropout_rate': 0.2100216936522356, 'l2_reg_strength': 1.5778090999079507e-07}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 215/1000
{'num_layers': 1, 'units_per_layer': 106, 'activation': 2, 'learning_rate': 0.00015404736666900463, 'batch_size': 32, 'dropout_rate': 0.18466864504155162, 'l2_reg_strength': 8.949107959458298e-06}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 223/1000
{'num_layers': 4, 'units_per_layer': 106, 'activation': 1, 'learning_rate': 0.015375508978156843, 'batch_size': 32, 'dropout_rate': 0.37250354827646864, 'l2_reg_strength': 0.000855711379795818}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 224/1000
{'num_layers': 2, 'units_per_layer': 109, 'activation': 2, 'learning_rate': 0.007863822418171388, 'batch_size': 128, 'dropout_rate': 0.15616486424344136, 'l2_reg_strength': 7.417772924009633e-05}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 234/1000
{'num_layers': 2, 'units_per_layer': 112, 'activation': 2, 'learning_rate': 0.00040129371264888727, 'batch_size': 128, 'dropout_rate': 0.05310850613687621, 'l2_reg_strength': 1.9544665433933625e-07}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 235/1000
{'num_layers': 2, 'units_per_layer': 105, 'activation': 1, 'learning_rate': 0.09456546167632346, 'batch_size': 32, 'dropout_rate': 0.309823155920171, 'l2_reg_strength': 1.1137683769288492e-06}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 250/1000
{'num_layers': 2, 'units_per_layer': 106, 'activation': 2, 'learning_rate': 0.0006724969300398676, 'batch_size': 32, 'dropout_rate': 0.15145458516257815, 'l2_reg_strength': 1.666145238380157e-05}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 251/1000
{'num_layers': 2, 'units_per_layer': 107, 'activation': 3, 'learning_rate': 0.0032692264197316375, 'batch_size': 32, 'dropout_rate': 0.12878922353550776, 'l2_reg_strength': 3.905922561060247e-06}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 255/1000
{'num_layers': 2, 'units_per_layer': 103, 'activation': 3, 'learning_rate': 0.1440954562735619, 'batch_size': 16, 'dropout_rate': 0.3902961369530428, 'l2_reg_strength': 1.9599806323527024e-05}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 256/1000
{'num_layers': 1, 'units_per_layer': 103, 'activation': 1, 'learning_rate': 0.0008236350183137704, 'batch_size': 32, 'dropout_rate': 0.06703099237439707, 'l2_reg_strength': 2.9633571277502e-06}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'ba

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 265/1000
{'num_layers': 2, 'units_per_layer': 19, 'activation': 3, 'learning_rate': 0.00035253591760098425, 'batch_size': 32, 'dropout_rate': 0.5274864143624137, 'l2_reg_strength': 0.003516559176434161}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 266/1000
{'num_layers': 1, 'units_per_layer': 109, 'activation': 3, 'learning_rate': 0.001714299869849934, 'batch_size': 128, 'dropout_rate': 0.23700111244431366, 'l2_reg_strength': 0.00031129771470570275}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002,

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 291/1000
{'num_layers': 2, 'units_per_layer': 108, 'activation': 1, 'learning_rate': 0.2747783929682573, 'batch_size': 32, 'dropout_rate': 0.4434412910283079, 'l2_reg_strength': 2.3036400184167135e-06}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 292/1000
{'num_layers': 2, 'units_per_layer': 23, 'activation': 3, 'learning_rate': 0.0005611724594348316, 'batch_size': 64, 'dropout_rate': 0.20611466285995722, 'l2_reg_strength': 0.002081111454623671}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'ba

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 315/1000
{'num_layers': 1, 'units_per_layer': 106, 'activation': 1, 'learning_rate': 0.018884329964453748, 'batch_size': 128, 'dropout_rate': 0.08965552697577091, 'l2_reg_strength': 0.0007593806351772054}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 316/1000
{'num_layers': 1, 'units_per_layer': 83, 'activation': 3, 'learning_rate': 0.05534845069821174, 'batch_size': 64, 'dropout_rate': 0.5877036987582333, 'l2_reg_strength': 2.8288282324406784e-05}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, '

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 326/1000
{'num_layers': 3, 'units_per_layer': 78, 'activation': 1, 'learning_rate': 0.005625687141159841, 'batch_size': 128, 'dropout_rate': 0.3441559277168367, 'l2_reg_strength': 0.004814975911723952}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 327/1000
{'num_layers': 2, 'units_per_layer': 106, 'activation': 2, 'learning_rate': 0.024562148517130282, 'batch_size': 128, 'dropout_rate': 0.39375562856364105, 'l2_reg_strength': 4.098470623503485e-05}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, '

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 334/1000
{'num_layers': 4, 'units_per_layer': 109, 'activation': 2, 'learning_rate': 0.2059353168996909, 'batch_size': 32, 'dropout_rate': 0.08013866776314967, 'l2_reg_strength': 4.811942518775487e-05}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 335/1000
{'num_layers': 1, 'units_per_layer': 103, 'activation': 2, 'learning_rate': 0.000162193181330839, 'batch_size': 32, 'dropout_rate': 0.2993015601808734, 'l2_reg_strength': 2.9115303005201517e-07}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'b

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 336/1000
{'num_layers': 2, 'units_per_layer': 104, 'activation': 1, 'learning_rate': 0.0015795829458779343, 'batch_size': 32, 'dropout_rate': 0.14011280904313383, 'l2_reg_strength': 1.2796693016724325e-05}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 337/1000
{'num_layers': 4, 'units_per_layer': 72, 'activation': 2, 'learning_rate': 0.0017456722362111253, 'batch_size': 256, 'dropout_rate': 0.25534882645884077, 'l2_reg_strength': 0.0009700732919482363}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.1157276994916600

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 361/1000
{'num_layers': 1, 'units_per_layer': 72, 'activation': 3, 'learning_rate': 0.0012033235872125, 'batch_size': 128, 'dropout_rate': 0.528393114063002, 'l2_reg_strength': 2.569934710556299e-05}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 362/1000
{'num_layers': 4, 'units_per_layer': 115, 'activation': 1, 'learning_rate': 0.015034520877988853, 'batch_size': 16, 'dropout_rate': 0.1762106903948138, 'l2_reg_strength': 7.394340015350389e-06}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batc

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 374/1000
{'num_layers': 1, 'units_per_layer': 79, 'activation': 3, 'learning_rate': 0.010392339176672327, 'batch_size': 128, 'dropout_rate': 0.29726812690272103, 'l2_reg_strength': 2.393402848144558e-07}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 375/1000
{'num_layers': 1, 'units_per_layer': 102, 'activation': 2, 'learning_rate': 0.07772873759807264, 'batch_size': 32, 'dropout_rate': 0.3229723053026071, 'l2_reg_strength': 1.026380612648207e-06}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'b

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 394/1000
{'num_layers': 1, 'units_per_layer': 113, 'activation': 2, 'learning_rate': 0.028461697557308544, 'batch_size': 128, 'dropout_rate': 0.36537112620892637, 'l2_reg_strength': 0.00039724144768313704}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 395/1000
{'num_layers': 4, 'units_per_layer': 59, 'activation': 1, 'learning_rate': 0.000120437635756962, 'batch_size': 32, 'dropout_rate': 0.16739734663553096, 'l2_reg_strength': 0.00262957245032069}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, '

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 411/1000
{'num_layers': 3, 'units_per_layer': 37, 'activation': 1, 'learning_rate': 0.02675323385409758, 'batch_size': 16, 'dropout_rate': 0.5776205679499258, 'l2_reg_strength': 2.654565528742323e-07}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 412/1000
{'num_layers': 1, 'units_per_layer': 40, 'activation': 1, 'learning_rate': 0.00045696664930368986, 'batch_size': 32, 'dropout_rate': 0.213068049251335, 'l2_reg_strength': 1.5719421955774903e-06}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'ba

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 417/1000
{'num_layers': 1, 'units_per_layer': 76, 'activation': 1, 'learning_rate': 0.048036940388293474, 'batch_size': 128, 'dropout_rate': 0.07679990318187885, 'l2_reg_strength': 0.0005714826346660817}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 418/1000
{'num_layers': 3, 'units_per_layer': 115, 'activation': 2, 'learning_rate': 0.00043401176782744723, 'batch_size': 16, 'dropout_rate': 0.2604006847220872, 'l2_reg_strength': 5.207992177899106e-06}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 419/1000
{'num_layers': 3, 'units_per_layer': 70, 'activation': 1, 'learning_rate': 0.2604971802270769, 'batch_size': 128, 'dropout_rate': 0.5547383679986495, 'l2_reg_strength': 1.476858762147176e-07}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'b

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 460/1000
{'num_layers': 1, 'units_per_layer': 110, 'activation': 2, 'learning_rate': 0.0003234907748155854, 'batch_size': 32, 'dropout_rate': 0.5708812714479677, 'l2_reg_strength': 1.6621151976985404e-07}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 461/1000
{'num_layers': 4, 'units_per_layer': 108, 'activation': 2, 'learning_rate': 0.00016301278608614117, 'batch_size': 32, 'dropout_rate': 0.148535498125764, 'l2_reg_strength': 0.0001950676339504822}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002,

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 480/1000
{'num_layers': 3, 'units_per_layer': 127, 'activation': 1, 'learning_rate': 0.03030140029779919, 'batch_size': 256, 'dropout_rate': 0.3130858710215032, 'l2_reg_strength': 9.850007674564758e-07}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 481/1000
{'num_layers': 3, 'units_per_layer': 104, 'activation': 3, 'learning_rate': 0.12980883283072936, 'batch_size': 16, 'dropout_rate': 0.3416165639321564, 'l2_reg_strength': 0.005367582532746141}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'bat

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 526/1000
{'num_layers': 4, 'units_per_layer': 126, 'activation': 2, 'learning_rate': 0.020657227136739738, 'batch_size': 256, 'dropout_rate': 0.06391977530035087, 'l2_reg_strength': 2.4700655257962614e-06}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 527/1000
{'num_layers': 1, 'units_per_layer': 109, 'activation': 2, 'learning_rate': 0.13338106102443284, 'batch_size': 128, 'dropout_rate': 0.2364749377020024, 'l2_reg_strength': 5.970012704698604e-06}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002,

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 533/1000
{'num_layers': 1, 'units_per_layer': 112, 'activation': 3, 'learning_rate': 0.020303883335658097, 'batch_size': 16, 'dropout_rate': 0.4216723359388739, 'l2_reg_strength': 2.5109956034724916e-07}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 534/1000
{'num_layers': 3, 'units_per_layer': 117, 'activation': 2, 'learning_rate': 0.0004709140255768977, 'batch_size': 16, 'dropout_rate': 0.3436708967319425, 'l2_reg_strength': 0.006246627168211028}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, '

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 551/1000
{'num_layers': 2, 'units_per_layer': 110, 'activation': 2, 'learning_rate': 0.011951129765450043, 'batch_size': 32, 'dropout_rate': 0.09010705113864867, 'l2_reg_strength': 9.808204450194404e-07}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 552/1000
{'num_layers': 3, 'units_per_layer': 99, 'activation': 2, 'learning_rate': 0.12297796231296942, 'batch_size': 256, 'dropout_rate': 0.1740686257202605, 'l2_reg_strength': 0.00026773790969056977}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, '

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 584/1000
{'num_layers': 2, 'units_per_layer': 92, 'activation': 3, 'learning_rate': 0.0005357917615215914, 'batch_size': 16, 'dropout_rate': 0.39453166218646796, 'l2_reg_strength': 2.768810953715351e-07}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 585/1000
{'num_layers': 2, 'units_per_layer': 78, 'activation': 1, 'learning_rate': 0.2989075422653799, 'batch_size': 128, 'dropout_rate': 0.07001473494933319, 'l2_reg_strength': 0.0012484653557224342}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'b

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 586/1000
{'num_layers': 3, 'units_per_layer': 122, 'activation': 2, 'learning_rate': 0.0006409289252408886, 'batch_size': 256, 'dropout_rate': 0.4389378445474274, 'l2_reg_strength': 1.2677632511557325e-05}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 587/1000
{'num_layers': 1, 'units_per_layer': 38, 'activation': 1, 'learning_rate': 0.0006927873570578909, 'batch_size': 128, 'dropout_rate': 0.45202308229783134, 'l2_reg_strength': 0.004648280808845334}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 608/1000
{'num_layers': 4, 'units_per_layer': 117, 'activation': 2, 'learning_rate': 0.00011316664716834429, 'batch_size': 16, 'dropout_rate': 0.2527888439640492, 'l2_reg_strength': 9.710919584706958e-05}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 609/1000
{'num_layers': 2, 'units_per_layer': 113, 'activation': 3, 'learning_rate': 0.0017639541710760054, 'batch_size': 16, 'dropout_rate': 0.5533361426174385, 'l2_reg_strength': 0.00040428800876982703}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 612/1000
{'num_layers': 3, 'units_per_layer': 56, 'activation': 1, 'learning_rate': 0.0003732477069369161, 'batch_size': 32, 'dropout_rate': 0.5307255348146906, 'l2_reg_strength': 1.3414697987252202e-06}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 613/1000
{'num_layers': 1, 'units_per_layer': 13, 'activation': 1, 'learning_rate': 0.00016708402756975422, 'batch_size': 32, 'dropout_rate': 0.5585098607275163, 'l2_reg_strength': 0.0032096691693876394}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 654/1000
{'num_layers': 2, 'units_per_layer': 111, 'activation': 2, 'learning_rate': 0.000858499180132366, 'batch_size': 16, 'dropout_rate': 0.498790713121692, 'l2_reg_strength': 1.743113325749397e-05}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 655/1000
{'num_layers': 2, 'units_per_layer': 108, 'activation': 1, 'learning_rate': 0.004154275218409696, 'batch_size': 128, 'dropout_rate': 0.09327711887469353, 'l2_reg_strength': 0.00469181501184767}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'ba

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 658/1000
{'num_layers': 3, 'units_per_layer': 107, 'activation': 2, 'learning_rate': 0.21269186684383876, 'batch_size': 64, 'dropout_rate': 0.39286963560330507, 'l2_reg_strength': 0.0002726528422535305}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 659/1000
{'num_layers': 2, 'units_per_layer': 124, 'activation': 1, 'learning_rate': 0.008644280695875181, 'batch_size': 256, 'dropout_rate': 0.5640178765103558, 'l2_reg_strength': 9.326361856446549e-07}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 660/1000
{'num_layers': 2, 'units_per_layer': 110, 'activation': 2, 'learning_rate': 0.011598204632004073, 'batch_size': 16, 'dropout_rate': 0.31965539890107375, 'l2_reg_strength': 7.07573399096565e-07}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, '

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 671/1000
{'num_layers': 1, 'units_per_layer': 111, 'activation': 2, 'learning_rate': 0.29621051461092335, 'batch_size': 128, 'dropout_rate': 0.20950969732077218, 'l2_reg_strength': 3.574823137827408e-05}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 672/1000
{'num_layers': 1, 'units_per_layer': 39, 'activation': 2, 'learning_rate': 0.0004606060725943532, 'batch_size': 32, 'dropout_rate': 0.5810899723354143, 'l2_reg_strength': 5.043640144348783e-07}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, '

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 695/1000
{'num_layers': 2, 'units_per_layer': 125, 'activation': 2, 'learning_rate': 0.04903836401611144, 'batch_size': 128, 'dropout_rate': 0.46390618208550694, 'l2_reg_strength': 0.00017166351985613604}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 696/1000
{'num_layers': 3, 'units_per_layer': 56, 'activation': 2, 'learning_rate': 0.24799538403744215, 'batch_size': 32, 'dropout_rate': 0.14194580889713088, 'l2_reg_strength': 0.00011458465542914337}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 697/1000
{'num_layers': 2, 'units_per_layer': 91, 'activation': 1, 'learning_rate': 0.0010969488798453285, 'batch_size': 128, 'dropout_rate': 0.5709046679664268, 'l2_reg_strength': 1.4152045478177257e-05}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 698/1000
{'num_layers': 4, 'units_per_layer': 56, 'activation': 2, 'learning_rate': 0.03191901155240219, 'batch_size': 32, 'dropout_rate': 0.08603782261216705, 'l2_reg_strength': 0.00014157403397899965}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 709/1000
{'num_layers': 4, 'units_per_layer': 57, 'activation': 1, 'learning_rate': 0.26065330166835365, 'batch_size': 128, 'dropout_rate': 0.3158576072245712, 'l2_reg_strength': 0.003466110126042698}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 710/1000
{'num_layers': 1, 'units_per_layer': 111, 'activation': 3, 'learning_rate': 0.0013665595885396196, 'batch_size': 16, 'dropout_rate': 0.4020352369409454, 'l2_reg_strength': 2.4390160096108034e-05}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'b

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 714/1000
{'num_layers': 2, 'units_per_layer': 125, 'activation': 1, 'learning_rate': 0.0003177140694113687, 'batch_size': 128, 'dropout_rate': 0.3443671498192971, 'l2_reg_strength': 3.8031745018588834e-07}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 715/1000
{'num_layers': 1, 'units_per_layer': 110, 'activation': 3, 'learning_rate': 0.002230632893281074, 'batch_size': 16, 'dropout_rate': 0.4237410464828585, 'l2_reg_strength': 5.070831789871641e-07}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002,

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 729/1000
{'num_layers': 2, 'units_per_layer': 105, 'activation': 1, 'learning_rate': 0.03705449383135336, 'batch_size': 128, 'dropout_rate': 0.13416530463791282, 'l2_reg_strength': 0.0018889675784645882}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 730/1000
{'num_layers': 3, 'units_per_layer': 123, 'activation': 2, 'learning_rate': 0.0606164236754969, 'batch_size': 256, 'dropout_rate': 0.11307243983384534, 'l2_reg_strength': 0.00039145857547644294}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 753/1000
{'num_layers': 1, 'units_per_layer': 114, 'activation': 2, 'learning_rate': 0.13619035803472965, 'batch_size': 16, 'dropout_rate': 0.26286249592580396, 'l2_reg_strength': 2.5392260500453556e-07}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 754/1000
{'num_layers': 3, 'units_per_layer': 54, 'activation': 2, 'learning_rate': 0.010354815905851696, 'batch_size': 32, 'dropout_rate': 0.5518203545305443, 'l2_reg_strength': 1.1178178056205715e-07}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, '

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 785/1000
{'num_layers': 1, 'units_per_layer': 106, 'activation': 3, 'learning_rate': 0.15775597189739168, 'batch_size': 32, 'dropout_rate': 0.537452957889835, 'l2_reg_strength': 1.5959392221947793e-06}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 786/1000
{'num_layers': 2, 'units_per_layer': 126, 'activation': 2, 'learning_rate': 0.2758538204970659, 'batch_size': 256, 'dropout_rate': 0.4566173626820746, 'l2_reg_strength': 1.8630613053450102e-06}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 787/1000
{'num_layers': 3, 'units_per_layer': 115, 'activation': 1, 'learning_rate': 0.19930527219351823, 'batch_size': 16, 'dropout_rate': 0.10018196316302397, 'l2_reg_strength': 1.5423098714882674e-06}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, '

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Iteration 860/1000
{'num_layers': 2, 'units_per_layer': 107, 'activation': 2, 'learning_rate': 0.008735989442473932, 'batch_size': 128, 'dropout_rate': 0.47560058608336614, 'l2_reg_strength': 1.4279634509481655e-05}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'batch_size': 64, 'dropout_rate': 0.570402391979106, 'l2_reg_strength': 0.0001459203222690064}, 'strategy': 'bayesian', 'train_loss': 2.2198531379699706, 'val_loss': 2.1209178562164306, 'train_accuracy': 14.366666666666667, 'val_accuracy': 21.6, 'training_time': 15.026321172714233, 'total_training_time': 15.026321172714233}
Iteration 861/1000
{'num_layers': 1, 'units_per_layer': 92, 'activation': 1, 'learning_rate': 0.0319459658962734, 'batch_size': 64, 'dropout_rate': 0.23875507108397692, 'l2_reg_strength': 0.000141064912206128}
{'iteration': 0, 'params': {'num_layers': 3, 'units_per_layer': 15, 'activation': 1, 'learning_rate': 0.11572769949166002, 'b