Взять метод обучения с подкреплением, в котором есть применение нейросети и применить подбор гипер параметров

In [None]:
!pip install gym numpy torch optuna

In [6]:
import gym

# Создаем среду
env = gym.make("FrozenLake-v1") # https://github.com/openai/

In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random
import optuna

# Определение нейронной сети для аппроксимации Q-функции
class QNetwork(nn.Module):
    def __init__(self, state_size, action_size, hidden_size=24):
        super(QNetwork, self).__init__()
        self.fc1 = nn.Linear(state_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, action_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Среда (упрощенная для примера)
class SimpleEnv:
    def __init__(self):
        self.state_space_size = 4
        self.action_space_size = 2

    def step(self, state, action):
        next_state = (state + 1) % self.state_space_size
        reward = 1 if action == 0 else -1
        done = next_state == 0
        return next_state, reward, done

    def reset(self):
        return 0

def train(env, model, episodes, learning_rate, gamma):
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    loss_fn = nn.MSELoss()
    total_reward = 0

    for episode in range(episodes):
        state = env.reset()
        done = False
        episode_reward = 0

        while not done:
            state_tensor = torch.FloatTensor([state])
            q_values = model(state_tensor)

            _, action = torch.max(q_values, dim=0)
            action = action.item()

            next_state, reward, done = env.step(state, action)
            episode_reward += reward

            next_state_tensor = torch.FloatTensor([next_state])
            next_q_values = model(next_state_tensor)

            target_q = reward + gamma * torch.max(next_q_values)
            loss = loss_fn(q_values[action], target_q)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            state = next_state
        
        total_reward += episode_reward
    
    average_reward = total_reward / episodes
    print(average_reward)
    return average_reward

def objective(trial):
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-1)
    gamma = trial.suggest_uniform('gamma', 0.85, 0.99)
    hidden_size = trial.suggest_categorical('hidden_size', [16, 32, 64, 128])
    episodes = 100

    model = QNetwork(1, 2, hidden_size)
    env = SimpleEnv()
    average_reward = train(env, model, episodes, learning_rate, gamma)

    return average_reward

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

print("Лучшие гиперпараметры:", study.best_params)

[I 2024-03-07 17:26:16,502] A new study created in memory with name: no-name-ef8e6b19-a178-4ac1-94d1-4b247bcc6dca


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-1)
  gamma = trial.suggest_uniform('gamma', 0.85, 0.99)
[I 2024-03-07 17:26:16,668] Trial 0 finished with value: 3.22 and parameters: {'learning_rate': 0.0022232378052516634, 'gamma': 0.9245754534211699, 'hidden_size': 128}. Best is trial 0 with value: 3.22.
[I 2024-03-07 17:26:16,804] Trial 1 finished with value: 4.0 and parameters: {'learning_rate': 0.06980575140348894, 'gamma': 0.8657160315938123, 'hidden_size': 128}. Best is trial 1 with value: 4.0.


3.22
4.0


[I 2024-03-07 17:26:16,932] Trial 2 finished with value: -4.0 and parameters: {'learning_rate': 3.7736906667717635e-05, 'gamma': 0.8896824130573853, 'hidden_size': 32}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:17,060] Trial 3 finished with value: 4.0 and parameters: {'learning_rate': 0.09730412550444388, 'gamma': 0.8520739144131977, 'hidden_size': 32}. Best is trial 1 with value: 4.0.


-4.0
4.0


[I 2024-03-07 17:26:17,188] Trial 4 finished with value: 4.0 and parameters: {'learning_rate': 3.817633079351554e-05, 'gamma': 0.8939674411100769, 'hidden_size': 128}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:17,320] Trial 5 finished with value: -4.0 and parameters: {'learning_rate': 0.00021730533209578706, 'gamma': 0.9387922586058098, 'hidden_size': 32}. Best is trial 1 with value: 4.0.


4.0
-4.0


[I 2024-03-07 17:26:17,454] Trial 6 finished with value: 2.98 and parameters: {'learning_rate': 0.001768586609898366, 'gamma': 0.9138908161482697, 'hidden_size': 32}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:17,584] Trial 7 finished with value: 3.88 and parameters: {'learning_rate': 0.0013894554552345538, 'gamma': 0.956629684156268, 'hidden_size': 64}. Best is trial 1 with value: 4.0.


2.98
3.88


[I 2024-03-07 17:26:17,712] Trial 8 finished with value: 3.74 and parameters: {'learning_rate': 0.019214994117610556, 'gamma': 0.9733398665325156, 'hidden_size': 64}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:17,838] Trial 9 finished with value: 4.0 and parameters: {'learning_rate': 0.003953566950550455, 'gamma': 0.9650953242634125, 'hidden_size': 64}. Best is trial 1 with value: 4.0.


3.74
4.0


[I 2024-03-07 17:26:17,975] Trial 10 finished with value: 3.94 and parameters: {'learning_rate': 0.08926726356870109, 'gamma': 0.8554471210071775, 'hidden_size': 16}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:18,110] Trial 11 finished with value: 4.0 and parameters: {'learning_rate': 0.0887325430835111, 'gamma': 0.8510921929917414, 'hidden_size': 128}. Best is trial 1 with value: 4.0.


3.94
4.0


[I 2024-03-07 17:26:18,244] Trial 12 finished with value: 4.0 and parameters: {'learning_rate': 0.014357016593962232, 'gamma': 0.8754285727365566, 'hidden_size': 16}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:18,374] Trial 13 finished with value: 3.8 and parameters: {'learning_rate': 0.013420074606033464, 'gamma': 0.8707863794195662, 'hidden_size': 32}. Best is trial 1 with value: 4.0.


4.0
3.8


[I 2024-03-07 17:26:18,505] Trial 14 finished with value: 3.98 and parameters: {'learning_rate': 0.0002744763639835556, 'gamma': 0.8739697891161485, 'hidden_size': 128}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:18,644] Trial 15 finished with value: 4.0 and parameters: {'learning_rate': 0.04260705073429386, 'gamma': 0.9046777758554932, 'hidden_size': 32}. Best is trial 1 with value: 4.0.


3.98
4.0


[I 2024-03-07 17:26:18,797] Trial 16 finished with value: 3.68 and parameters: {'learning_rate': 0.008386889614460501, 'gamma': 0.8612892014688165, 'hidden_size': 128}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:18,928] Trial 17 finished with value: 3.76 and parameters: {'learning_rate': 0.03865442557401695, 'gamma': 0.8897200239059843, 'hidden_size': 16}. Best is trial 1 with value: 4.0.


3.68
3.76


[I 2024-03-07 17:26:19,064] Trial 18 finished with value: 4.0 and parameters: {'learning_rate': 0.0003226111705977872, 'gamma': 0.9897264742930072, 'hidden_size': 32}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:19,198] Trial 19 finished with value: 3.68 and parameters: {'learning_rate': 0.005122890945010774, 'gamma': 0.8653261891350105, 'hidden_size': 128}. Best is trial 1 with value: 4.0.


4.0
3.68


[I 2024-03-07 17:26:19,334] Trial 20 finished with value: -4.0 and parameters: {'learning_rate': 1.2905022236650412e-05, 'gamma': 0.9363296544155495, 'hidden_size': 32}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:19,468] Trial 21 finished with value: 4.0 and parameters: {'learning_rate': 7.852698082585294e-05, 'gamma': 0.8929851209130788, 'hidden_size': 128}. Best is trial 1 with value: 4.0.


-4.0
4.0


[I 2024-03-07 17:26:19,602] Trial 22 finished with value: 3.98 and parameters: {'learning_rate': 0.03889752939208805, 'gamma': 0.8786218289708834, 'hidden_size': 128}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:19,739] Trial 23 finished with value: 4.0 and parameters: {'learning_rate': 1.4494919011060788e-05, 'gamma': 0.8501538482702286, 'hidden_size': 128}. Best is trial 1 with value: 4.0.


3.98
4.0


[I 2024-03-07 17:26:19,872] Trial 24 finished with value: 3.16 and parameters: {'learning_rate': 0.0005933769040090834, 'gamma': 0.9048436978672435, 'hidden_size': 128}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:20,006] Trial 25 finished with value: -4.0 and parameters: {'learning_rate': 8.670138798450994e-05, 'gamma': 0.8836072673012723, 'hidden_size': 128}. Best is trial 1 with value: 4.0.


3.16
-4.0


[I 2024-03-07 17:26:20,146] Trial 26 finished with value: 1.28 and parameters: {'learning_rate': 0.0006818093614952524, 'gamma': 0.8992518240408162, 'hidden_size': 64}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:20,277] Trial 27 finished with value: 4.0 and parameters: {'learning_rate': 2.8641534333947242e-05, 'gamma': 0.8606948102211414, 'hidden_size': 16}. Best is trial 1 with value: 4.0.


1.28
4.0


[I 2024-03-07 17:26:20,410] Trial 28 finished with value: 3.98 and parameters: {'learning_rate': 0.09930501965169491, 'gamma': 0.8667077261085037, 'hidden_size': 128}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:20,541] Trial 29 finished with value: 4.0 and parameters: {'learning_rate': 0.02791697842879885, 'gamma': 0.9226467011600885, 'hidden_size': 128}. Best is trial 1 with value: 4.0.


3.98
4.0


[I 2024-03-07 17:26:20,690] Trial 30 finished with value: 4.0 and parameters: {'learning_rate': 0.0031172393132194746, 'gamma': 0.8833455456471034, 'hidden_size': 32}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:20,826] Trial 31 finished with value: 3.98 and parameters: {'learning_rate': 0.0036987677465930816, 'gamma': 0.9569131979338514, 'hidden_size': 64}. Best is trial 1 with value: 4.0.


4.0
3.98


[I 2024-03-07 17:26:20,958] Trial 32 finished with value: 3.94 and parameters: {'learning_rate': 0.05541959588814921, 'gamma': 0.9379974715232102, 'hidden_size': 64}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:21,089] Trial 33 finished with value: 3.98 and parameters: {'learning_rate': 0.010938231162180762, 'gamma': 0.9285418644691744, 'hidden_size': 64}. Best is trial 1 with value: 4.0.


3.94
3.98


[I 2024-03-07 17:26:21,222] Trial 34 finished with value: 3.72 and parameters: {'learning_rate': 0.0001171031157953412, 'gamma': 0.9158035286173295, 'hidden_size': 64}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:21,353] Trial 35 finished with value: 3.96 and parameters: {'learning_rate': 0.006198151809828665, 'gamma': 0.952658452903342, 'hidden_size': 32}. Best is trial 1 with value: 4.0.


3.72
3.96


[I 2024-03-07 17:26:21,495] Trial 36 finished with value: 3.98 and parameters: {'learning_rate': 0.0011183767579689106, 'gamma': 0.9887672100438626, 'hidden_size': 64}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:21,628] Trial 37 finished with value: 3.98 and parameters: {'learning_rate': 0.022706576528951687, 'gamma': 0.9764433811879646, 'hidden_size': 32}. Best is trial 1 with value: 4.0.


3.98
3.98


[I 2024-03-07 17:26:21,761] Trial 38 finished with value: 4.0 and parameters: {'learning_rate': 4.0465533337397385e-05, 'gamma': 0.8574699339462484, 'hidden_size': 64}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:21,892] Trial 39 finished with value: 4.0 and parameters: {'learning_rate': 0.002056145761713527, 'gamma': 0.9091074471545311, 'hidden_size': 128}. Best is trial 1 with value: 4.0.


4.0
4.0


[I 2024-03-07 17:26:22,027] Trial 40 finished with value: 4.0 and parameters: {'learning_rate': 0.060948771180073094, 'gamma': 0.9489696647840205, 'hidden_size': 16}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:22,161] Trial 41 finished with value: 3.98 and parameters: {'learning_rate': 0.06630705929996655, 'gamma': 0.852354681320449, 'hidden_size': 128}. Best is trial 1 with value: 4.0.


4.0
3.98


[I 2024-03-07 17:26:22,296] Trial 42 finished with value: 3.92 and parameters: {'learning_rate': 0.09128235494495666, 'gamma': 0.8677007949427001, 'hidden_size': 128}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:22,428] Trial 43 finished with value: 3.92 and parameters: {'learning_rate': 0.022303657682668284, 'gamma': 0.8586850011303199, 'hidden_size': 128}. Best is trial 1 with value: 4.0.


3.92
3.92


[I 2024-03-07 17:26:22,561] Trial 44 finished with value: 3.92 and parameters: {'learning_rate': 0.03441654943098965, 'gamma': 0.8788822039107376, 'hidden_size': 128}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:22,695] Trial 45 finished with value: 3.9 and parameters: {'learning_rate': 0.01382459885740322, 'gamma': 0.8719571634794525, 'hidden_size': 32}. Best is trial 1 with value: 4.0.


3.92
3.9


[I 2024-03-07 17:26:22,831] Trial 46 finished with value: 3.92 and parameters: {'learning_rate': 0.059743746421111965, 'gamma': 0.8503470904868031, 'hidden_size': 128}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:22,964] Trial 47 finished with value: -3.88 and parameters: {'learning_rate': 0.00019744579681235864, 'gamma': 0.8961430357541003, 'hidden_size': 16}. Best is trial 1 with value: 4.0.


3.92
-3.88


[I 2024-03-07 17:26:23,099] Trial 48 finished with value: 3.5 and parameters: {'learning_rate': 0.017394186857998026, 'gamma': 0.862227927477977, 'hidden_size': 32}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:23,236] Trial 49 finished with value: 2.86 and parameters: {'learning_rate': 0.0005466815828167274, 'gamma': 0.8558468139966371, 'hidden_size': 128}. Best is trial 1 with value: 4.0.


3.5
2.86


[I 2024-03-07 17:26:23,369] Trial 50 finished with value: 3.92 and parameters: {'learning_rate': 0.04394751706350429, 'gamma': 0.9293723080258736, 'hidden_size': 64}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:23,502] Trial 51 finished with value: 3.98 and parameters: {'learning_rate': 0.009754436802154858, 'gamma': 0.8757482919743167, 'hidden_size': 16}. Best is trial 1 with value: 4.0.


3.92
3.98


[I 2024-03-07 17:26:23,637] Trial 52 finished with value: 4.0 and parameters: {'learning_rate': 0.0073825130587175025, 'gamma': 0.8886731085300656, 'hidden_size': 128}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:23,773] Trial 53 finished with value: 3.96 and parameters: {'learning_rate': 0.07475944004399132, 'gamma': 0.869009598765364, 'hidden_size': 16}. Best is trial 1 with value: 4.0.


4.0
3.96


[I 2024-03-07 17:26:23,907] Trial 54 finished with value: 4.0 and parameters: {'learning_rate': 0.004490763982509366, 'gamma': 0.8823976912906982, 'hidden_size': 16}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:24,041] Trial 55 finished with value: 3.86 and parameters: {'learning_rate': 0.002917427062379455, 'gamma': 0.8625185897175719, 'hidden_size': 16}. Best is trial 1 with value: 4.0.


4.0
3.86


[I 2024-03-07 17:26:24,223] Trial 56 finished with value: 1.78 and parameters: {'learning_rate': 0.03125086173549592, 'gamma': 0.8757775836298971, 'hidden_size': 32}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:24,362] Trial 57 finished with value: 3.76 and parameters: {'learning_rate': 0.046620168535460214, 'gamma': 0.9688336927139151, 'hidden_size': 128}. Best is trial 1 with value: 4.0.


1.78
3.76


[I 2024-03-07 17:26:24,502] Trial 58 finished with value: 3.96 and parameters: {'learning_rate': 0.015788549418504095, 'gamma': 0.887050280206554, 'hidden_size': 16}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:24,638] Trial 59 finished with value: 3.86 and parameters: {'learning_rate': 0.02379041994845844, 'gamma': 0.8641168142639339, 'hidden_size': 128}. Best is trial 1 with value: 4.0.


3.96
3.86


[I 2024-03-07 17:26:24,773] Trial 60 finished with value: 3.96 and parameters: {'learning_rate': 0.001217738923168326, 'gamma': 0.9008167488739197, 'hidden_size': 32}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:24,906] Trial 61 finished with value: 3.98 and parameters: {'learning_rate': 0.08801845242396247, 'gamma': 0.9140025140296567, 'hidden_size': 32}. Best is trial 1 with value: 4.0.


3.96
3.98


[I 2024-03-07 17:26:25,045] Trial 62 finished with value: 3.88 and parameters: {'learning_rate': 0.04651959991413359, 'gamma': 0.9081729923773506, 'hidden_size': 32}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:25,182] Trial 63 finished with value: 3.9 and parameters: {'learning_rate': 0.03680823988967428, 'gamma': 0.8938962205131531, 'hidden_size': 32}. Best is trial 1 with value: 4.0.


3.88
3.9


[I 2024-03-07 17:26:25,324] Trial 64 finished with value: -4.0 and parameters: {'learning_rate': 1.8252562414837928e-05, 'gamma': 0.8524446823354017, 'hidden_size': 64}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:25,455] Trial 65 finished with value: 3.92 and parameters: {'learning_rate': 0.09916643884378824, 'gamma': 0.9016880989502838, 'hidden_size': 32}. Best is trial 1 with value: 4.0.


-4.0
3.92


[I 2024-03-07 17:26:25,588] Trial 66 finished with value: 3.92 and parameters: {'learning_rate': 0.02898762732983123, 'gamma': 0.9181515585871844, 'hidden_size': 128}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:25,722] Trial 67 finished with value: 3.78 and parameters: {'learning_rate': 0.011794658420980653, 'gamma': 0.8709692424124102, 'hidden_size': 64}. Best is trial 1 with value: 4.0.


3.92
3.78


[I 2024-03-07 17:26:25,859] Trial 68 finished with value: 3.84 and parameters: {'learning_rate': 0.001693346205417306, 'gamma': 0.8559353836161364, 'hidden_size': 32}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:25,991] Trial 69 finished with value: 3.98 and parameters: {'learning_rate': 0.061124499203373074, 'gamma': 0.9086170387974988, 'hidden_size': 128}. Best is trial 1 with value: 4.0.


3.84
3.98


[I 2024-03-07 17:26:26,124] Trial 70 finished with value: -2.68 and parameters: {'learning_rate': 0.0007397334986868857, 'gamma': 0.8804161040855393, 'hidden_size': 16}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:26,255] Trial 71 finished with value: -0.02 and parameters: {'learning_rate': 0.00038919222974990075, 'gamma': 0.9848266144402276, 'hidden_size': 32}. Best is trial 1 with value: 4.0.


-2.68
-0.02


[I 2024-03-07 17:26:26,391] Trial 72 finished with value: 4.0 and parameters: {'learning_rate': 7.693991965930572e-05, 'gamma': 0.9659990556742465, 'hidden_size': 32}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:26,526] Trial 73 finished with value: -4.0 and parameters: {'learning_rate': 0.00012650994985665307, 'gamma': 0.9784331739770451, 'hidden_size': 32}. Best is trial 1 with value: 4.0.


4.0
-4.0


[I 2024-03-07 17:26:26,660] Trial 74 finished with value: 3.9 and parameters: {'learning_rate': 0.07514470995166528, 'gamma': 0.9576341283556673, 'hidden_size': 32}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:26,793] Trial 75 finished with value: 2.7 and parameters: {'learning_rate': 5.51703973599049e-05, 'gamma': 0.9887157755024675, 'hidden_size': 128}. Best is trial 1 with value: 4.0.


3.9
2.7


[I 2024-03-07 17:26:26,928] Trial 76 finished with value: 3.8 and parameters: {'learning_rate': 0.048504939324368404, 'gamma': 0.9411948077408602, 'hidden_size': 64}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:27,061] Trial 77 finished with value: 2.3 and parameters: {'learning_rate': 0.0002511119623850287, 'gamma': 0.8644699606027479, 'hidden_size': 128}. Best is trial 1 with value: 4.0.


3.8
2.3


[I 2024-03-07 17:26:27,206] Trial 78 finished with value: 4.0 and parameters: {'learning_rate': 2.826635887115221e-05, 'gamma': 0.8585636193920438, 'hidden_size': 32}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:27,363] Trial 79 finished with value: -4.0 and parameters: {'learning_rate': 1.0868405431386224e-05, 'gamma': 0.9638054662053618, 'hidden_size': 128}. Best is trial 1 with value: 4.0.


4.0
-4.0


[I 2024-03-07 17:26:27,499] Trial 80 finished with value: 3.96 and parameters: {'learning_rate': 0.00551748213481859, 'gamma': 0.8534357781541236, 'hidden_size': 64}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:27,633] Trial 81 finished with value: 3.86 and parameters: {'learning_rate': 0.00016469419382792098, 'gamma': 0.9034775048866146, 'hidden_size': 128}. Best is trial 1 with value: 4.0.


3.96
3.86


[I 2024-03-07 17:26:27,768] Trial 82 finished with value: -4.0 and parameters: {'learning_rate': 3.5344754178325855e-05, 'gamma': 0.8922172547555246, 'hidden_size': 128}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:27,902] Trial 83 finished with value: -4.0 and parameters: {'learning_rate': 6.578630147133082e-05, 'gamma': 0.887957889025464, 'hidden_size': 128}. Best is trial 1 with value: 4.0.


-4.0
-4.0


[I 2024-03-07 17:26:28,042] Trial 84 finished with value: -4.0 and parameters: {'learning_rate': 2.233658519740103e-05, 'gamma': 0.894233250184067, 'hidden_size': 128}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:28,174] Trial 85 finished with value: -4.0 and parameters: {'learning_rate': 4.475511505540772e-05, 'gamma': 0.8965353559076402, 'hidden_size': 16}. Best is trial 1 with value: 4.0.


-4.0
-4.0


[I 2024-03-07 17:26:28,307] Trial 86 finished with value: 2.08 and parameters: {'learning_rate': 0.0003700487903666064, 'gamma': 0.8843543576292761, 'hidden_size': 128}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:28,446] Trial 87 finished with value: 3.84 and parameters: {'learning_rate': 0.07335707225137, 'gamma': 0.9817907269320246, 'hidden_size': 32}. Best is trial 1 with value: 4.0.


2.08
3.84


[I 2024-03-07 17:26:28,600] Trial 88 finished with value: 3.98 and parameters: {'learning_rate': 0.0008682439385185559, 'gamma': 0.8742389657935773, 'hidden_size': 128}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:28,733] Trial 89 finished with value: 3.92 and parameters: {'learning_rate': 0.05428595919952195, 'gamma': 0.9230141219251107, 'hidden_size': 32}. Best is trial 1 with value: 4.0.


3.98
3.92


[I 2024-03-07 17:26:28,868] Trial 90 finished with value: 4.0 and parameters: {'learning_rate': 0.002659633966769283, 'gamma': 0.8596503890592301, 'hidden_size': 64}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:29,000] Trial 91 finished with value: -4.0 and parameters: {'learning_rate': 1.5518470923182593e-05, 'gamma': 0.8506094139205692, 'hidden_size': 128}. Best is trial 1 with value: 4.0.


4.0
-4.0


[I 2024-03-07 17:26:29,138] Trial 92 finished with value: -2.0 and parameters: {'learning_rate': 1.0066817514333654e-05, 'gamma': 0.8535771720523754, 'hidden_size': 128}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:29,273] Trial 93 finished with value: -4.0 and parameters: {'learning_rate': 2.347127504561257e-05, 'gamma': 0.8670343575955176, 'hidden_size': 128}. Best is trial 1 with value: 4.0.


-2.0
-4.0


[I 2024-03-07 17:26:29,408] Trial 94 finished with value: 3.92 and parameters: {'learning_rate': 0.024892955068691314, 'gamma': 0.8557738612901817, 'hidden_size': 128}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:29,550] Trial 95 finished with value: 4.0 and parameters: {'learning_rate': 0.0001178559825693319, 'gamma': 0.9064572173045, 'hidden_size': 16}. Best is trial 1 with value: 4.0.


3.92
4.0


[I 2024-03-07 17:26:29,688] Trial 96 finished with value: 4.0 and parameters: {'learning_rate': 0.007625332207712092, 'gamma': 0.8607408792093729, 'hidden_size': 128}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:29,838] Trial 97 finished with value: 3.98 and parameters: {'learning_rate': 0.037026074729780034, 'gamma': 0.9727021100197579, 'hidden_size': 32}. Best is trial 1 with value: 4.0.


4.0
3.98


[I 2024-03-07 17:26:29,971] Trial 98 finished with value: 3.98 and parameters: {'learning_rate': 0.08765066381768837, 'gamma': 0.9275376092111716, 'hidden_size': 128}. Best is trial 1 with value: 4.0.
[I 2024-03-07 17:26:30,107] Trial 99 finished with value: 4.0 and parameters: {'learning_rate': 1.297657258073961e-05, 'gamma': 0.8909264848520577, 'hidden_size': 16}. Best is trial 1 with value: 4.0.


3.98
4.0
Лучшие гиперпараметры: {'learning_rate': 0.06980575140348894, 'gamma': 0.8657160315938123, 'hidden_size': 128}


Функция 'objective' определяет объектив для оптимизации с помощью библиотеки Optuna. Optuna используется для автоматического выбора оптимальных гиперпараметров.
В objective, гиперпараметры (скорость обучения, gamma, размер скрытого слоя) варьируются в предопределенных диапазонах. Модель обучается с этими параметрами и возвращается средняя награда как метрика качества.
Наконец, создается исследование (study), которое максимизирует среднюю награду и Optuna ищет лучшие гиперпараметры, выполняя функцию objective множество раз
