## Import Libraries

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
local_number = 1
receive_numbers = [2,3,4]
iteration_per_communicate = 1
add = '/content/drive/MyDrive/update/'

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import math
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import cv2
import csv
import time
import random
import matplotlib.image as img
# import warnings
import warnings
# filter warnings
warnings.filterwarnings('ignore')

import os

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torch.utils.data import RandomSampler
from torch.utils.data import TensorDataset
from torchvision.utils import make_grid
import torchvision.models as models
import time
from PIL import Image
from tqdm import tqdm
import gc
from torch.utils.data import DataLoader
# from torchsummary import summary

## Import Dataset

In [None]:
class Musicdata(torch.utils.data.Dataset):
    def __init__(self, npz, mode='train'):
        npzfile = np.load(npz)
        self.mode = mode   
        self.x = npzfile['arr_0']
        self.x = [one.reshape(1,640,128) for one in self.x]
        self.y = npzfile['arr_1']

        
                    
    def __getitem__(self, index):
        data = torch.tensor(self.x[index], dtype = torch.float32)
        #data = transforms(data)
        if self.mode == 'test': 
            return data
        genre = [np.where(one == 1)[0][0] for one in self.y]
        genre = torch.tensor(int(genre[index]))
        return data, genre

    def __len__(self):
        return len(self.x)

In [None]:
dataset_train = Musicdata('/content/drive/MyDrive/shuffled_train.npz', mode='train')
dataset_val = Musicdata('/content/drive/My Drive/shuffled_valid.npz', mode='val')
# dataset_test = Musicdata('/content/drive/My Drive/test_arr.npz', mode='test')
from torch.utils.data import DataLoader

train_loader = DataLoader(dataset_train, batch_size=32, shuffle=False, num_workers = 8)
val_loader = DataLoader(dataset_val, batch_size=32, shuffle=False, num_workers = 8)
# test_loader = DataLoader(dataset_test, batch_size=128, shuffle=False)

## Building Model Architecture

## CNN(ResNet-based) + LSTM
The following architecture is based on ResNet.

### Define `Conv1` layer and `Bottleneck` class

In [None]:
def Conv1(in_planes, places, stride=2):
    return nn.Sequential(
        nn.Conv2d(in_channels=in_planes, out_channels=places, kernel_size=7, stride=stride, padding=3, bias=False),
        nn.BatchNorm2d(places),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
    )

class Bottleneck(nn.Module):
  '''
  in_places: size of input channel
  places: 進行前兩次捲機時的 output channel size
  stride: 第二次卷機時的 stride
  expansion: bottleneck 輸出的 channel 為 places*expansion
  downsampling: 是否要改變 H, W
  '''
  def __init__(self,in_places,places, stride=1, downsampling=False, expansion = 4):
        super(Bottleneck,self).__init__()
        self.expansion = expansion
        self.downsampling = downsampling

        self.bottleneck = nn.Sequential(
            # 第一次是為了改變 channel 的維度，H, W 不變
            nn.Conv2d(in_channels=in_places,out_channels=places,kernel_size=1,stride=1, bias=False),
            nn.BatchNorm2d(places),
            nn.ReLU(inplace=True),
            # 第二次是是做捲機，H, W 會變
            nn.Conv2d(in_channels=places, out_channels=places, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(places),
            nn.ReLU(inplace=True),
            # 第三次是為了改變 channel 的維度為 places*self.expansion，H, W 不變
            nn.Conv2d(in_channels=places, out_channels=places*self.expansion, kernel_size=1, stride=1, bias=False),
            nn.BatchNorm2d(places*self.expansion),
        )
        

        if self.downsampling:
            self.downsample = nn.Sequential(
                nn.Conv2d(in_channels=in_places, out_channels=places*self.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(places*self.expansion)
            )
        self.relu = nn.ReLU(inplace=True)
  def forward(self, x):
        residual = x
        out = self.bottleneck(x)

        if self.downsampling:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)
        return out

### Define `ResNet` class

In [None]:
class RESTM(nn.Module):
    def __init__(self, blocks, initial_size, expansion=4, hidden_size=32, num_layers=2, fc_neural=512, fc_block=1):
        super(RESTM,self).__init__()
        self.expansion = expansion
        self.initial_size = initial_size

        self.conv1 = Conv1(in_planes=1, places=self.initial_size)
        set_size = self.initial_size
        self.layer1 = self.make_layer(in_places=set_size, places=set_size, block=blocks[0], stride=1, expansion=self.expansion)
        set_size *= self.expansion
        self.layer2 = self.make_layer(in_places=set_size, places=int(set_size/2), block=blocks[1], stride=2, expansion=self.expansion)
        set_size *= self.expansion
        self.layer3 = self.make_layer(in_places=int(set_size/2), places=int(set_size/4), block=blocks[2], stride=2, expansion=self.expansion)
        set_size *= self.expansion
        self.layer4 = self.make_layer(in_places=int(set_size/4), places=int(set_size/8), block=blocks[3], stride=2, expansion=self.expansion)
        set_size *= self.expansion

        self.avgpool = nn.AvgPool2d(2, stride=1)
        self.LSTM = nn.LSTM(int(set_size/8), hidden_size, num_layers, batch_first = True)
        
        self.fc = self.make_fc(hidden_size=hidden_size, fc_neural=fc_neural, fc_block=fc_block)
#         self.fc = nn.Sequential(
#                 nn.Dropout(),
#                 nn.Linear(3*19*hidden_size, fc_neural),
#                 nn.BatchNorm1d(fc_neural),
#                 nn.ReLU(True),
#                 nn.Dropout(),

#                 nn.Linear(fc_neural, 8),
#         )

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def make_layer(self, in_places, places, block, stride, expansion):
        layers = []
        layers.append(Bottleneck(in_places, places, stride, downsampling=True, expansion=expansion))
        for i in range(1, block):
            layers.append(Bottleneck(places*self.expansion, places, expansion=expansion))

        return nn.Sequential(*layers)
    
    def make_fc(self, hidden_size, fc_neural, fc_block):
        layers = [nn.Dropout(), nn.Linear(3*19*hidden_size, fc_neural), nn.BatchNorm1d(fc_neural), nn.ReLU(True), nn.Dropout()]
        for i in range(fc_block):
            layers += [nn.Linear(fc_neural, fc_neural), nn.BatchNorm1d(fc_neural), nn.ReLU(True), nn.Dropout()]
        layers += [nn.Linear(fc_neural, 8)]
        
        return nn.Sequential(*layers)


    def forward(self, x):
#         size before everything: bach size * 1 * 640 * 128
#         size after conv1:       bach size * initial size * 160 * 32
#         size after layer1:      bach size * setting      * 160 * 32
#         size after layer2:      bach size * setting      *  80 * 16
#         size after layer3:      bach size * setting      *  40 *  8
#         size after layer4:      bach size * setting      *  20 *  4
#         size after avgpooling:  bach size * setting      *  19 *  3
#         size after view:        bach size * 57 * setting
#         size after LSTM:        bach size * (57 * setting)

#         print("size before everything:", x.shape)
        x = self.conv1(x)
#         print("size after conv1", x.shape)
        x = self.layer1(x)
#         print("size after layer1", x.shape)
        x = self.layer2(x)
#         print("size after layer2",x.shape)
        x = self.layer3(x)
#         print("size after layer3",x.shape)
        x = self.layer4(x)
#         print("size after layer4",x.shape)
        x = self.avgpool(x)
#         print("size after average pooling",x.shape)
        #x = x.view(x.size(0), -1)
        x = x.view(x.size(0), int(3*19), -1)
#         print("size after view", x.shape)
        x, _ = self.LSTM(x)
        x = x.reshape(x.size(0), -1)
#         print("size after LSTM", x.shape)
        x = self.fc(x)
        return x

# model = ResNet([1, 2, 1, 1], initial_size=64)
# summary(model, (1,128,640))

### Define `configuration` function

In [None]:
def configuration(initial_size=64, blocks_1=1, blocks_2=1, blocks_3=1, blocks_4=1, expansion=4, hidden_size=32, num_layers=2, fc_neural=512, fc_block=1):
    block = [blocks_1, blocks_2, blocks_3, blocks_4]
    return RESTM(block, initial_size, expansion, hidden_size, num_layers, fc_neural, fc_block)

# model = configuration(1, 2, 3, 4, 5, 6, 7)
# summary(model, (1,128,640))

## Criterion

In [None]:
criterion = nn.CrossEntropyLoss()
criterion = criterion.cuda()

In [None]:
def train(input_data, model, criterion, optimizer, output_disable = False):

    model.train()
    loss_list = []
    total_count = 0
    acc_count = 0
    pbar = tqdm(input_data, position = 0)
    for data in pbar:
        pbar.set_description("Training")
        music, genre = data[0].cuda(), data[1].cuda()
        optimizer.zero_grad()
        outputs = model(music)
        loss = criterion(outputs, genre) ##data type
        loss.backward()
        optimizer.step() 
        _, predicted = torch.max(outputs.data, 1)
        total_count += genre.size(0) 
        acc_count += (predicted == genre).sum()  
        loss_list.append(loss.item())
        

    acc = acc_count.to("cpu").detach().numpy() / total_count
    loss = sum(loss_list) / len(loss_list)
    return acc, loss

In [None]:
def val(input_data, model, criterion, output_disable = False):
    model.eval()
    
    loss_list = []
    total_count = 0
    acc_count = 0
    pbar = tqdm(input_data, position = 0)
    with torch.no_grad():
        for data in pbar:
            pbar.set_description("Validation")
            music, genre = data[0].cuda(), data[1].cuda()
            outputs = model(music)
            loss = criterion(outputs, genre) ##data type
            _, predicted = torch.max(outputs.data, dim=1)
            total_count += genre.size(0)
            acc_count += (predicted == genre).sum()
            loss_list.append(loss.item())


    acc = acc_count.to("cpu").detach().numpy() / total_count
    loss = sum(loss_list) / len(loss_list)
    return acc, loss

## CNN(ResNet-based) + LSTM Training Process
The following architecture is based on ResNet.

In [None]:
def RESTM_Train(parameters ,max_epochs = 3):    
    learning_rate = parameters[0]
    initial_size, blocks_1, blocks_2, blocks_3, blocks_4, expansion, hidden_size, num_layers, fc_neural, fc_block = tuple(int(param) for param in parameters[1:])
    
    print(" learning_rate =", learning_rate, "initial_size =", initial_size, "\n",\
          "blocks_1 =",blocks_1, "blocks_2 =",blocks_2, "blocks_3 =",blocks_3, "blocks_4 =",blocks_4, "\n", \
          "expansion =", expansion, "hidden_size =", hidden_size, "num_layers =", num_layers)
    
    train_acc_list = []
    train_loss_list = []
    val_acc_list = []
    val_loss_list = []
    
    model = configuration(initial_size, blocks_1, blocks_2, blocks_3, blocks_4, expansion, hidden_size, num_layers, fc_neural, fc_block)
    model = model.cuda()
    optimizer = torch.optim.Adam(params = model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)

    for epoch in range(1, max_epochs + 1):
        print('=' * 20, 'Epoch', epoch, '=' * 20)
        train_acc, train_loss = train(train_loader, model, criterion, optimizer, True)
        val_acc, val_loss = val(val_loader, model, criterion, True)

        train_acc_list.append(train_acc)
        train_loss_list.append(train_loss)
        val_acc_list.append(val_acc)
        val_loss_list.append(val_loss)

        print('Train Acc: {:.6f} Train Loss: {:.6f}'.format(train_acc, train_loss))
        print('  Val Acc: {:.6f}   Val Loss: {:.6f}'.format(val_acc, val_loss))
    return val_acc

In [None]:
def comparison(parameter, lower_bound, upper_bound, integer = False, mutation_rate = 0.02):
    new_parameter = parameter
    if parameter > upper_bound:
        new_parameter =  np.random.uniform(low = (lower_bound+upper_bound)/2, high = upper_bound)
    elif parameter < lower_bound:
        new_parameter =  np.random.uniform(low = lower_bound, high = (lower_bound+upper_bound)/2)
    else:
        if random.uniform(0,1) < mutation_rate:
            new_parameter = np.random.uniform(low = lower_bound, high = upper_bound)
            print("Mutation occur:", parameter, "-->", new_parameter)
    if integer:
        new_parameter = int(new_parameter)
    return new_parameter

In [None]:
def communication(i, iteration_per_communicate, local_number, receive_numbers, Gb, add, score_ind):
  write_name = str(i)+ str(local_number) +".csv"
  pd.DataFrame(Gb).to_csv(add+write_name)
  for receive_number in receive_numbers:
    patience = 0
    check = True
    while check:
      try:
        read_name = str(i)+ str(receive_number) +".csv"
        Gb_from_others = pd.read_csv(add+read_name)
        Gb_from_others = Gb_from_others.to_numpy().transpose()[1,]  
        print("receive from", receive_number, Gb_from_others)
        if Gb_from_others[score_ind] > Gb[score_ind]:
          Gb = Gb_from_others

        check = False
      except:
        time.sleep(1)
        patience += 1
        if patience > 4*60:
          print("break!!!!!! I loss my patience")
          break
  return Gb

In [None]:
p_num = 4  ##粒子個數
max_iteration = 10 ##迭代次數
score_ind = 11 ## 分數的 index
record = np.zeros((1, max_iteration), dtype='float')
boundary = [[1e-5, 5e-3],[1,64],[1,8],[1,16],[1,30],[1,16],[4,4],[1, 50], [1, 5], [32,256], [1,3]]
x = np.array([[1.5e-4, 59,  3,1,1,1,  4, 30, 1, 256, 1, 0.398750],
              [  5e-4, 40,  3,6,5,3,  4, 30, 1, 128, 2, 0.426250],
              [  1e-4, 48,  4,3,2,1,  4, 31, 2,  64, 3, 0.261250],
              [  5e-3, 20,  6,6,16,3, 4, 20, 2,  32, 1, 0.373750],
             ])
x = x.transpose()
for j in range(p_num):
    for k in range(score_ind):
        if k != 0:
            x[k,j] = comparison(x[k,j], boundary[k][0], boundary[k][1], True,  mutation_rate = 1)
    gc.collect()
    with torch.no_grad():
        torch.cuda.empty_cache()
    time.sleep(10)
    x[score_ind,j] = RESTM_Train(x[:score_ind,j].tolist())
          
# x = np.array([[1.5000e-04, 5.0000e-04, 1.0000e-04, 5.0000e-03],
#  [4.2000e+01, 1.6000e+01, 5.4000e+01, 1.0000e+00],
#  [4.0000e+00, 5.0000e+00, 4.0000e+00, 2.0000e+00],
#  [1.1000e+01, 1.1000e+01, 7.0000e+00, 1.3000e+01],
#  [1.8000e+01, 4.0000e+00, 4.0000e+00, 1.2000e+01],
#  [2.0000e+00, 3.0000e+00, 1.0000e+01, 1.0000e+00],
#  [4.0000e+00, 4.0000e+00, 4.0000e+00, 4.0000e+00],
#  [1.1000e+01, 3.0000e+00, 8.0000e+00, 2.5000e+01],
#  [3.0000e+00, 1.0000e+00, 3.0000e+00, 3.0000e+00],
#  [1.7400e+02, 1.8700e+02, 2.2000e+02, 1.2000e+02],
#  [1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00],
#  [2.3250e-01, 2.1375e-01, 1.0125e-01, 3.1375e-01]])

print(x)

[[1.5000e-04 5.0000e-04 1.0000e-04 5.0000e-03]
 [4.2000e+01 1.6000e+01 5.4000e+01 1.0000e+00]
 [4.0000e+00 5.0000e+00 4.0000e+00 2.0000e+00]
 [1.1000e+01 1.1000e+01 7.0000e+00 1.3000e+01]
 [1.8000e+01 4.0000e+00 4.0000e+00 1.2000e+01]
 [2.0000e+00 3.0000e+00 1.0000e+01 1.0000e+00]
 [4.0000e+00 4.0000e+00 4.0000e+00 4.0000e+00]
 [1.1000e+01 3.0000e+00 8.0000e+00 2.5000e+01]
 [3.0000e+00 1.0000e+00 3.0000e+00 3.0000e+00]
 [1.7400e+02 1.8700e+02 2.2000e+02 1.2000e+02]
 [1.0000e+00 1.0000e+00 1.0000e+00 1.0000e+00]
 [2.3250e-01 2.1375e-01 1.0125e-01 3.1375e-01]]


In [None]:
w = 0.5
c1 = 2
c2 = 2
V = np.zeros((score_ind, p_num))
Pb = x.copy()
Gb = Pb[:, np.argmax(Pb[score_ind])].copy()
Gb = communication(0, iteration_per_communicate, local_number, receive_numbers, Gb, add, score_ind)
print(Gb)

receive from 2 [5.0000e-04 1.9000e+01 7.0000e+00 3.0000e+00 2.3000e+01 4.0000e+00
 4.0000e+00 3.4000e+01 4.0000e+00 2.5500e+02 1.0000e+00 2.9625e-01]
receive from 3 [5.0000e-04 5.3000e+01 4.0000e+00 2.0000e+00 1.8000e+01 9.0000e+00
 4.0000e+00 2.6000e+01 1.0000e+00 8.0000e+01 1.0000e+00 3.0375e-01]
receive from 4 [5.00e-03 3.50e+01 6.00e+00 1.10e+01 1.80e+01 8.00e+00 4.00e+00 2.10e+01
 3.00e+00 2.08e+02 1.00e+00 2.15e-01]
[5.0000e-03 1.0000e+00 2.0000e+00 1.3000e+01 1.2000e+01 1.0000e+00
 4.0000e+00 2.5000e+01 3.0000e+00 1.2000e+02 1.0000e+00 3.1375e-01]


In [None]:
for i in range(1,(max_iteration+1)):
    print('-' * 20, 'Iteration', i, '-' * 20)
    for j in range(p_num):
        print('.' * 20, 'Particle', j+1, '.' * 20)
        for k in range(score_ind):
            V[k,j] = w * V[k,j] + c1 * random.uniform(0,1) * (Pb[k,j] - x[k,j]) + c2 * random.uniform(0,1) * (Gb[k] - x[k,j])  ##計算速度
            x[k,j] += V[k,j]  ##更新位置
            x[k,j] = comparison(x[k,j], boundary[k][0], boundary[k][1], (k != 0))
        gc.collect()
        with torch.no_grad():
            torch.cuda.empty_cache()
        time.sleep(10)
        x[score_ind,j] = RESTM_Train(x[:score_ind,j].tolist())
    for j in range(p_num): ##每個粒子的最好位置更新
        if Pb[score_ind,j] < x[score_ind,j]:
            Pb[:,j] = x[:,j].copy()
            print(Gb[score_ind], max(Pb[score_ind]))
    if Gb[score_ind] < max(Pb[score_ind]): 
        Gb = Pb[:, np.argmax(Pb[score_ind])].copy()

    if i % iteration_per_communicate == 0:
        Gb = communication(i, iteration_per_communicate, local_number, receive_numbers, Gb, add, score_ind)
    print(i,Gb)

-------------------- Iteration 1 --------------------
.................... Particle 1 ....................
 learning_rate = 0.0028187847549780024 initial_size = 5 
 blocks_1 = 3 blocks_2 = 14 blocks_3 = 17 blocks_4 = 1 
 expansion = 4 hidden_size = 29 num_layers = 3


  0%|          | 0/200 [00:00<?, ?it/s]



Training: 100%|██████████| 200/200 [01:51<00:00,  1.79it/s]
Validation: 100%|██████████| 25/25 [00:02<00:00,  8.36it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.126212 Train Loss: 2.153193
  Val Acc: 0.146250   Val Loss: 2.068887


Training: 100%|██████████| 200/200 [01:51<00:00,  1.79it/s]
Validation: 100%|██████████| 25/25 [00:02<00:00,  8.34it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.192055 Train Loss: 2.006416
  Val Acc: 0.210000   Val Loss: 1.943914


Training: 100%|██████████| 200/200 [01:52<00:00,  1.78it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.97it/s]


Train Acc: 0.280419 Train Loss: 1.859892
  Val Acc: 0.271250   Val Loss: 1.873371
.................... Particle 2 ....................


  0%|          | 0/200 [00:00<?, ?it/s]

 learning_rate = 0.003992144114762897 initial_size = 9 
 blocks_1 = 4 blocks_2 = 11 blocks_3 = 7 blocks_4 = 4 
 expansion = 4 hidden_size = 14 num_layers = 2


Training: 100%|██████████| 200/200 [01:49<00:00,  1.83it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  8.13it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.162809 Train Loss: 2.125274
  Val Acc: 0.242500   Val Loss: 1.984841


Training: 100%|██████████| 200/200 [01:51<00:00,  1.80it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  8.19it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.277135 Train Loss: 1.890255
  Val Acc: 0.222500   Val Loss: 1.992655


Training: 100%|██████████| 200/200 [01:51<00:00,  1.79it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  8.06it/s]


Train Acc: 0.324836 Train Loss: 1.798208
  Val Acc: 0.287500   Val Loss: 1.915874
.................... Particle 3 ....................
Mutation occur: 1.0 --> 1.52785787502694


  0%|          | 0/200 [00:00<?, ?it/s]

 learning_rate = 0.0027818766739924517 initial_size = 3 
 blocks_1 = 2 blocks_2 = 15 blocks_3 = 17 blocks_4 = 2 
 expansion = 4 hidden_size = 29 num_layers = 3


Training: 100%|██████████| 200/200 [01:55<00:00,  1.74it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.92it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.160463 Train Loss: 2.091070
  Val Acc: 0.221250   Val Loss: 1.970582


Training: 100%|██████████| 200/200 [01:54<00:00,  1.75it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.99it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.204098 Train Loss: 1.998518
  Val Acc: 0.232500   Val Loss: 1.985075


Training: 100%|██████████| 200/200 [01:54<00:00,  1.74it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.93it/s]


Train Acc: 0.293869 Train Loss: 1.849254
  Val Acc: 0.300000   Val Loss: 1.863199
.................... Particle 4 ....................


  0%|          | 0/200 [00:00<?, ?it/s]

 learning_rate = 0.005 initial_size = 1 
 blocks_1 = 2 blocks_2 = 13 blocks_3 = 12 blocks_4 = 1 
 expansion = 4 hidden_size = 25 num_layers = 3


Training: 100%|██████████| 200/200 [01:53<00:00,  1.77it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  8.15it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.127307 Train Loss: 2.157973
  Val Acc: 0.125000   Val Loss: 2.080711


Training: 100%|██████████| 200/200 [01:51<00:00,  1.79it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  8.29it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.131530 Train Loss: 2.109606
  Val Acc: 0.223750   Val Loss: 2.037588


Training: 100%|██████████| 200/200 [01:53<00:00,  1.77it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.96it/s]


Train Acc: 0.190647 Train Loss: 2.013977
  Val Acc: 0.230000   Val Loss: 1.961877
0.31375 0.31375
0.31375 0.31375
0.31375 0.31375
receive from 2 [1.44072102e-03 2.70000000e+01 3.00000000e+00 1.40000000e+01
 8.00000000e+00 8.00000000e+00 4.00000000e+00 2.60000000e+01
 2.00000000e+00 1.22000000e+02 1.00000000e+00 3.72500000e-01]
receive from 3 [4.09175326e-03 2.90000000e+01 2.00000000e+00 6.00000000e+00
 1.70000000e+01 6.00000000e+00 4.00000000e+00 2.40000000e+01
 1.00000000e+00 1.31000000e+02 1.00000000e+00 3.25000000e-01]
receive from 4 [3.43225528e-03 4.00000000e+00 2.00000000e+00 1.00000000e+01
 1.00000000e+01 4.00000000e+00 4.00000000e+00 1.20000000e+01
 3.00000000e+00 1.09000000e+02 1.00000000e+00 3.62500000e-01]
1 [1.44072102e-03 2.70000000e+01 3.00000000e+00 1.40000000e+01
 8.00000000e+00 8.00000000e+00 4.00000000e+00 2.60000000e+01
 2.00000000e+00 1.22000000e+02 1.00000000e+00 3.72500000e-01]
-------------------- Iteration 2 --------------------
.................... Particle 1 .

  0%|          | 0/200 [00:00<?, ?it/s]

 learning_rate = 0.004461389952270723 initial_size = 19 
 blocks_1 = 2 blocks_2 = 15 blocks_3 = 15 blocks_4 = 9 
 expansion = 4 hidden_size = 36 num_layers = 2


Training: 100%|██████████| 200/200 [02:00<00:00,  1.66it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  6.62it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.162652 Train Loss: 2.091201
  Val Acc: 0.170000   Val Loss: 2.067556


Training: 100%|██████████| 200/200 [02:00<00:00,  1.66it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  6.55it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.188614 Train Loss: 2.010162
  Val Acc: 0.243750   Val Loss: 1.979262


Training: 100%|██████████| 200/200 [02:00<00:00,  1.66it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  6.57it/s]


Train Acc: 0.267438 Train Loss: 1.914346
  Val Acc: 0.282500   Val Loss: 1.910172
.................... Particle 2 ....................


  0%|          | 0/200 [00:00<?, ?it/s]

 learning_rate = 0.0015516681041158348 initial_size = 12 
 blocks_1 = 2 blocks_2 = 15 blocks_3 = 9 blocks_4 = 2 
 expansion = 4 hidden_size = 32 num_layers = 2


Training: 100%|██████████| 200/200 [01:51<00:00,  1.79it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.61it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.192368 Train Loss: 2.041128
  Val Acc: 0.220000   Val Loss: 2.023863


Training: 100%|██████████| 200/200 [01:50<00:00,  1.80it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.75it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.269628 Train Loss: 1.870934
  Val Acc: 0.323750   Val Loss: 1.784129


Training: 100%|██████████| 200/200 [01:51<00:00,  1.79it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.81it/s]


Train Acc: 0.342509 Train Loss: 1.731834
  Val Acc: 0.355000   Val Loss: 1.713370
.................... Particle 3 ....................


  0%|          | 0/200 [00:00<?, ?it/s]

 learning_rate = 0.001566399664692731 initial_size = 14 
 blocks_1 = 2 blocks_2 = 14 blocks_3 = 11 blocks_4 = 3 
 expansion = 4 hidden_size = 35 num_layers = 2


Training: 100%|██████████| 200/200 [01:53<00:00,  1.76it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.56it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.203472 Train Loss: 2.022440
  Val Acc: 0.226250   Val Loss: 2.036543


Training: 100%|██████████| 200/200 [01:53<00:00,  1.76it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.68it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.292462 Train Loss: 1.859668
  Val Acc: 0.308750   Val Loss: 1.784784


Training: 100%|██████████| 200/200 [01:52<00:00,  1.77it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.53it/s]


Train Acc: 0.343760 Train Loss: 1.758786
  Val Acc: 0.320000   Val Loss: 1.756615
.................... Particle 4 ....................


  0%|          | 0/200 [00:00<?, ?it/s]

 learning_rate = 0.0008351134748248358 initial_size = 3 
 blocks_1 = 2 blocks_2 = 13 blocks_3 = 5 blocks_4 = 13 
 expansion = 4 hidden_size = 25 num_layers = 2


Training: 100%|██████████| 200/200 [01:52<00:00,  1.78it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  8.12it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.150610 Train Loss: 2.126199
  Val Acc: 0.156250   Val Loss: 2.085778


Training: 100%|██████████| 200/200 [01:51<00:00,  1.79it/s]
Validation: 100%|██████████| 25/25 [00:02<00:00,  8.39it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.202377 Train Loss: 2.021558
  Val Acc: 0.217500   Val Loss: 1.988260


Training: 100%|██████████| 200/200 [01:52<00:00,  1.78it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  8.16it/s]


Train Acc: 0.229434 Train Loss: 1.977546
  Val Acc: 0.250000   Val Loss: 1.946545
0.3725 0.31375
0.3725 0.355
0.3725 0.355
receive from 2 [1.44072102e-03 2.70000000e+01 3.00000000e+00 1.40000000e+01
 8.00000000e+00 8.00000000e+00 4.00000000e+00 2.60000000e+01
 2.00000000e+00 1.22000000e+02 1.00000000e+00 3.72500000e-01]
receive from 3 [1.44072102e-03 2.70000000e+01 3.00000000e+00 1.40000000e+01
 8.00000000e+00 8.00000000e+00 4.00000000e+00 2.60000000e+01
 2.00000000e+00 1.22000000e+02 1.00000000e+00 3.72500000e-01]
break!!!!!! I loss my patience
2 [1.44072102e-03 2.70000000e+01 3.00000000e+00 1.40000000e+01
 8.00000000e+00 8.00000000e+00 4.00000000e+00 2.60000000e+01
 2.00000000e+00 1.22000000e+02 1.00000000e+00 3.72500000e-01]
-------------------- Iteration 3 --------------------
.................... Particle 1 ....................


  0%|          | 0/200 [00:00<?, ?it/s]

 learning_rate = 0.0011004982124353108 initial_size = 18 
 blocks_1 = 3 blocks_2 = 14 blocks_3 = 11 blocks_4 = 12 
 expansion = 4 hidden_size = 20 num_layers = 1


Training: 100%|██████████| 200/200 [01:58<00:00,  1.69it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  6.67it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.159055 Train Loss: 2.100030
  Val Acc: 0.242500   Val Loss: 1.948263


Training: 100%|██████████| 200/200 [01:57<00:00,  1.70it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  6.75it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.199093 Train Loss: 2.014922
  Val Acc: 0.236250   Val Loss: 1.913805


Training: 100%|██████████| 200/200 [01:58<00:00,  1.69it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  6.84it/s]


Train Acc: 0.232405 Train Loss: 1.963020
  Val Acc: 0.288750   Val Loss: 1.883135
.................... Particle 2 ....................


  0%|          | 0/200 [00:00<?, ?it/s]

 learning_rate = 0.0002810568589297524 initial_size = 29 
 blocks_1 = 1 blocks_2 = 15 blocks_3 = 9 blocks_4 = 2 
 expansion = 4 hidden_size = 39 num_layers = 2


Training: 100%|██████████| 200/200 [02:06<00:00,  1.58it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  6.44it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.133719 Train Loss: 2.172832
  Val Acc: 0.128750   Val Loss: 2.068718


Training: 100%|██████████| 200/200 [02:06<00:00,  1.59it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  6.46it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.171411 Train Loss: 2.083198
  Val Acc: 0.227500   Val Loss: 1.998478


Training: 100%|██████████| 200/200 [02:06<00:00,  1.58it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  6.41it/s]


Train Acc: 0.214107 Train Loss: 2.011431
  Val Acc: 0.242500   Val Loss: 1.934854
.................... Particle 3 ....................


  0%|          | 0/200 [00:00<?, ?it/s]

 learning_rate = 0.0008340587324781062 initial_size = 22 
 blocks_1 = 3 blocks_2 = 15 blocks_3 = 6 blocks_4 = 3 
 expansion = 4 hidden_size = 38 num_layers = 1


Training: 100%|██████████| 200/200 [01:54<00:00,  1.75it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  6.77it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.170316 Train Loss: 2.107193
  Val Acc: 0.272500   Val Loss: 1.982152


Training: 100%|██████████| 200/200 [01:54<00:00,  1.75it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  6.82it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.275571 Train Loss: 1.911795
  Val Acc: 0.371250   Val Loss: 1.729669


Training: 100%|██████████| 200/200 [01:53<00:00,  1.76it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  6.94it/s]


Train Acc: 0.346731 Train Loss: 1.756386
  Val Acc: 0.371250   Val Loss: 1.699852
.................... Particle 4 ....................
 learning_rate = 0.0004553932818892743 initial_size = 30 
 blocks_1 = 3 blocks_2 = 14 blocks_3 = 12 blocks_4 = 5 
 expansion = 4 hidden_size = 27 num_layers = 2


  0%|          | 0/200 [00:00<?, ?it/s]



Training: 100%|██████████| 200/200 [02:16<00:00,  1.47it/s]
Validation: 100%|██████████| 25/25 [00:04<00:00,  5.95it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.162652 Train Loss: 2.122761
  Val Acc: 0.210000   Val Loss: 1.981556


Training: 100%|██████████| 200/200 [02:15<00:00,  1.47it/s]
Validation: 100%|██████████| 25/25 [00:04<00:00,  6.00it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.240069 Train Loss: 1.976100
  Val Acc: 0.251250   Val Loss: 1.916994


Training: 100%|██████████| 200/200 [02:15<00:00,  1.48it/s]
Validation: 100%|██████████| 25/25 [00:04<00:00,  5.95it/s]


Train Acc: 0.266656 Train Loss: 1.894601
  Val Acc: 0.352500   Val Loss: 1.783177
0.3725 0.355
0.3725 0.37125
0.3725 0.37125
receive from 2 [1.44072102e-03 2.70000000e+01 3.00000000e+00 1.40000000e+01
 8.00000000e+00 8.00000000e+00 4.00000000e+00 2.60000000e+01
 2.00000000e+00 1.22000000e+02 1.00000000e+00 3.72500000e-01]
receive from 3 [1.44072102e-03 2.70000000e+01 3.00000000e+00 1.40000000e+01
 8.00000000e+00 8.00000000e+00 4.00000000e+00 2.60000000e+01
 2.00000000e+00 1.22000000e+02 1.00000000e+00 3.72500000e-01]
break!!!!!! I loss my patience
3 [1.44072102e-03 2.70000000e+01 3.00000000e+00 1.40000000e+01
 8.00000000e+00 8.00000000e+00 4.00000000e+00 2.60000000e+01
 2.00000000e+00 1.22000000e+02 1.00000000e+00 3.72500000e-01]
-------------------- Iteration 4 --------------------
.................... Particle 1 ....................
 learning_rate = 0.0009709898906329915 initial_size = 25 
 blocks_1 = 3 blocks_2 = 13 blocks_3 = 15 blocks_4 = 10 
 expansion = 4 hidden_size = 23 num_la

  0%|          | 0/200 [00:00<?, ?it/s]



Training: 100%|██████████| 200/200 [02:09<00:00,  1.55it/s]
Validation: 100%|██████████| 25/25 [00:04<00:00,  6.14it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.153269 Train Loss: 2.119911
  Val Acc: 0.151250   Val Loss: 2.070866


Training: 100%|██████████| 200/200 [02:08<00:00,  1.56it/s]
Validation: 100%|██████████| 25/25 [00:04<00:00,  6.22it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.196747 Train Loss: 2.026504
  Val Acc: 0.260000   Val Loss: 1.977151


Training: 100%|██████████| 200/200 [02:08<00:00,  1.55it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  6.29it/s]


Train Acc: 0.251955 Train Loss: 1.935667
  Val Acc: 0.286250   Val Loss: 1.907399
.................... Particle 2 ....................
Mutation occur: 10.242421404843917 --> 38.902210894068304
 learning_rate = 0.0007406653500966158 initial_size = 38 
 blocks_1 = 4 blocks_2 = 13 blocks_3 = 8 blocks_4 = 5 
 expansion = 4 hidden_size = 29 num_layers = 2


  0%|          | 0/200 [00:00<?, ?it/s]



Training: 100%|██████████| 200/200 [03:10<00:00,  1.05it/s]
Validation: 100%|██████████| 25/25 [00:05<00:00,  4.71it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.172036 Train Loss: 2.099175
  Val Acc: 0.247500   Val Loss: 1.945406


Training: 100%|██████████| 200/200 [03:10<00:00,  1.05it/s]
Validation: 100%|██████████| 25/25 [00:04<00:00,  5.00it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.251017 Train Loss: 1.941845
  Val Acc: 0.291250   Val Loss: 1.823195


Training: 100%|██████████| 200/200 [03:10<00:00,  1.05it/s]
Validation: 100%|██████████| 25/25 [00:05<00:00,  4.94it/s]


Train Acc: 0.319987 Train Loss: 1.808255
  Val Acc: 0.340000   Val Loss: 1.726511
.................... Particle 3 ....................


  0%|          | 0/200 [00:00<?, ?it/s]

 learning_rate = 0.0010036837940620264 initial_size = 29 
 blocks_1 = 3 blocks_2 = 14 blocks_3 = 6 blocks_4 = 3 
 expansion = 4 hidden_size = 28 num_layers = 2


Training: 100%|██████████| 200/200 [02:04<00:00,  1.61it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  6.53it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.201282 Train Loss: 2.043005
  Val Acc: 0.290000   Val Loss: 1.875218


Training: 100%|██████████| 200/200 [02:04<00:00,  1.61it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  6.51it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.293087 Train Loss: 1.838926
  Val Acc: 0.335000   Val Loss: 1.835482


Training: 100%|██████████| 200/200 [02:03<00:00,  1.62it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  6.39it/s]


Train Acc: 0.366281 Train Loss: 1.721234
  Val Acc: 0.350000   Val Loss: 1.772489
.................... Particle 4 ....................
 learning_rate = 0.0015530244007212909 initial_size = 43 
 blocks_1 = 3 blocks_2 = 14 blocks_3 = 9 blocks_4 = 6 
 expansion = 4 hidden_size = 26 num_layers = 2


  0%|          | 0/200 [00:00<?, ?it/s]



Training: 100%|██████████| 200/200 [02:40<00:00,  1.24it/s]
Validation: 100%|██████████| 25/25 [00:05<00:00,  4.49it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.178292 Train Loss: 2.081802
  Val Acc: 0.233750   Val Loss: 1.934752


Training: 100%|██████████| 200/200 [02:40<00:00,  1.25it/s]
Validation: 100%|██████████| 25/25 [00:05<00:00,  4.53it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.281670 Train Loss: 1.880586
  Val Acc: 0.331250   Val Loss: 1.777330


Training: 100%|██████████| 200/200 [02:40<00:00,  1.25it/s]
Validation: 100%|██████████| 25/25 [00:05<00:00,  4.53it/s]


Train Acc: 0.348608 Train Loss: 1.751331
  Val Acc: 0.350000   Val Loss: 1.703788
receive from 2 [1.60889058e-03 3.30000000e+01 1.00000000e+00 1.30000000e+01
 4.00000000e+00 6.00000000e+00 4.00000000e+00 2.60000000e+01
 1.00000000e+00 8.10000000e+01 1.00000000e+00 3.75000000e-01]
receive from 3 [1.44072102e-03 2.70000000e+01 3.00000000e+00 1.40000000e+01
 8.00000000e+00 8.00000000e+00 4.00000000e+00 2.60000000e+01
 2.00000000e+00 1.22000000e+02 1.00000000e+00 3.72500000e-01]
break!!!!!! I loss my patience
4 [1.60889058e-03 3.30000000e+01 1.00000000e+00 1.30000000e+01
 4.00000000e+00 6.00000000e+00 4.00000000e+00 2.60000000e+01
 1.00000000e+00 8.10000000e+01 1.00000000e+00 3.75000000e-01]
-------------------- Iteration 5 --------------------
.................... Particle 1 ....................
 learning_rate = 0.00130230595316154 initial_size = 36 
 blocks_1 = 2 blocks_2 = 13 blocks_3 = 3 blocks_4 = 7 
 expansion = 4 hidden_size = 24 num_layers = 1


  0%|          | 0/200 [00:00<?, ?it/s]



Training: 100%|██████████| 200/200 [02:27<00:00,  1.35it/s]
Validation: 100%|██████████| 25/25 [00:04<00:00,  5.91it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.170942 Train Loss: 2.079088
  Val Acc: 0.210000   Val Loss: 1.985643


Training: 100%|██████████| 200/200 [02:29<00:00,  1.34it/s]
Validation: 100%|██████████| 25/25 [00:04<00:00,  5.95it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.237723 Train Loss: 1.959839
  Val Acc: 0.260000   Val Loss: 1.947558


Training: 100%|██████████| 200/200 [02:29<00:00,  1.34it/s]
Validation: 100%|██████████| 25/25 [00:04<00:00,  5.80it/s]


Train Acc: 0.266969 Train Loss: 1.899271
  Val Acc: 0.263750   Val Loss: 1.918029
.................... Particle 2 ....................


  0%|          | 0/200 [00:00<?, ?it/s]

 learning_rate = 0.002776369249928815 initial_size = 5 
 blocks_1 = 2 blocks_2 = 14 blocks_3 = 1 blocks_4 = 2 
 expansion = 4 hidden_size = 23 num_layers = 1


Training: 100%|██████████| 200/200 [01:46<00:00,  1.87it/s]
Validation: 100%|██████████| 25/25 [00:02<00:00,  8.62it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.246168 Train Loss: 1.951669
  Val Acc: 0.332500   Val Loss: 1.746024


Training: 100%|██████████| 200/200 [01:45<00:00,  1.90it/s]
Validation: 100%|██████████| 25/25 [00:02<00:00,  8.86it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.352362 Train Loss: 1.744045
  Val Acc: 0.235000   Val Loss: 1.892126


Training: 100%|██████████| 200/200 [01:45<00:00,  1.90it/s]
Validation: 100%|██████████| 25/25 [00:02<00:00,  8.72it/s]


Train Acc: 0.390522 Train Loss: 1.677948
  Val Acc: 0.390000   Val Loss: 1.610425
.................... Particle 3 ....................
 learning_rate = 0.0019808948580570668 initial_size = 34 
 blocks_1 = 3 blocks_2 = 13 blocks_3 = 3 blocks_4 = 7 
 expansion = 4 hidden_size = 39 num_layers = 2


  0%|          | 0/200 [00:00<?, ?it/s]



Training: 100%|██████████| 200/200 [02:25<00:00,  1.37it/s]
Validation: 100%|██████████| 25/25 [00:04<00:00,  5.74it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.170160 Train Loss: 2.083711
  Val Acc: 0.273750   Val Loss: 1.917992


Training: 100%|██████████| 200/200 [02:28<00:00,  1.34it/s]
Validation: 100%|██████████| 25/25 [00:04<00:00,  5.78it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.316390 Train Loss: 1.840522
  Val Acc: 0.277500   Val Loss: 1.806910


Training: 100%|██████████| 200/200 [02:28<00:00,  1.35it/s]
Validation: 100%|██████████| 25/25 [00:04<00:00,  5.70it/s]


Train Acc: 0.339537 Train Loss: 1.760136
  Val Acc: 0.330000   Val Loss: 1.768445
.................... Particle 4 ....................


  0%|          | 0/200 [00:00<?, ?it/s]

 learning_rate = 0.0007832758337409453 initial_size = 19 
 blocks_1 = 4 blocks_2 = 13 blocks_3 = 7 blocks_4 = 2 
 expansion = 4 hidden_size = 25 num_layers = 1


Training: 100%|██████████| 200/200 [01:50<00:00,  1.82it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.30it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.159994 Train Loss: 2.103873
  Val Acc: 0.226250   Val Loss: 1.989250


Training: 100%|██████████| 200/200 [01:49<00:00,  1.82it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.38it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.234126 Train Loss: 1.981484
  Val Acc: 0.292500   Val Loss: 1.875563


Training: 100%|██████████| 200/200 [01:49<00:00,  1.82it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.31it/s]


Train Acc: 0.308727 Train Loss: 1.850226
  Val Acc: 0.332500   Val Loss: 1.803781
0.375 0.39
receive from 2 [1.57842885e-03 5.00000000e+01 1.00000000e+00 8.00000000e+00
 4.00000000e+00 7.00000000e+00 4.00000000e+00 2.60000000e+01
 1.00000000e+00 7.40000000e+01 1.00000000e+00 4.16250000e-01]
break!!!!!! I loss my patience
break!!!!!! I loss my patience
5 [1.57842885e-03 5.00000000e+01 1.00000000e+00 8.00000000e+00
 4.00000000e+00 7.00000000e+00 4.00000000e+00 2.60000000e+01
 1.00000000e+00 7.40000000e+01 1.00000000e+00 4.16250000e-01]
-------------------- Iteration 6 --------------------
.................... Particle 1 ....................
 learning_rate = 0.0012084845431880177 initial_size = 40 
 blocks_1 = 1 blocks_2 = 12 blocks_3 = 1 blocks_4 = 14 
 expansion = 4 hidden_size = 22 num_layers = 1


  0%|          | 0/200 [00:00<?, ?it/s]



Training: 100%|██████████| 200/200 [02:30<00:00,  1.33it/s]
Validation: 100%|██████████| 25/25 [00:04<00:00,  5.56it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.170160 Train Loss: 2.087407
  Val Acc: 0.242500   Val Loss: 1.920341


Training: 100%|██████████| 200/200 [02:36<00:00,  1.28it/s]
Validation: 100%|██████████| 25/25 [00:04<00:00,  5.52it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.275571 Train Loss: 1.899074
  Val Acc: 0.323750   Val Loss: 1.732723


Training: 100%|██████████| 200/200 [02:35<00:00,  1.28it/s]
Validation: 100%|██████████| 25/25 [00:04<00:00,  5.51it/s]


Train Acc: 0.323428 Train Loss: 1.790891
  Val Acc: 0.353750   Val Loss: 1.733587
.................... Particle 2 ....................


  0%|          | 0/200 [00:00<?, ?it/s]

 learning_rate = 0.003191886685636569 initial_size = 52 
 blocks_1 = 3 blocks_2 = 13 blocks_3 = 1 blocks_4 = 1 
 expansion = 4 hidden_size = 22 num_layers = 2


Training: 100%|██████████| 200/200 [02:01<00:00,  1.64it/s]
Validation: 100%|██████████| 25/25 [00:05<00:00,  4.78it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.210666 Train Loss: 2.004853
  Val Acc: 0.288750   Val Loss: 1.847732


Training: 100%|██████████| 200/200 [02:01<00:00,  1.65it/s]
Validation: 100%|██████████| 25/25 [00:05<00:00,  4.89it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.319518 Train Loss: 1.824562
  Val Acc: 0.243750   Val Loss: 2.039302


Training: 100%|██████████| 200/200 [02:00<00:00,  1.66it/s]
Validation: 100%|██████████| 25/25 [00:05<00:00,  4.82it/s]


Train Acc: 0.335158 Train Loss: 1.788237
  Val Acc: 0.307500   Val Loss: 1.804242
.................... Particle 3 ....................
 learning_rate = 0.0003758998921130801 initial_size = 39 
 blocks_1 = 2 blocks_2 = 13 blocks_3 = 5 blocks_4 = 7 
 expansion = 4 hidden_size = 25 num_layers = 1


  0%|          | 0/200 [00:00<?, ?it/s]



Training: 100%|██████████| 200/200 [02:55<00:00,  1.14it/s]
Validation: 100%|██████████| 25/25 [00:04<00:00,  5.34it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.150454 Train Loss: 2.142159
  Val Acc: 0.172500   Val Loss: 2.061768


Training: 100%|██████████| 200/200 [02:54<00:00,  1.15it/s]
Validation: 100%|██████████| 25/25 [00:04<00:00,  5.35it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.178136 Train Loss: 2.062622
  Val Acc: 0.230000   Val Loss: 1.990958


Training: 100%|██████████| 200/200 [02:54<00:00,  1.14it/s]
Validation: 100%|██████████| 25/25 [00:04<00:00,  5.43it/s]


Train Acc: 0.234908 Train Loss: 1.975038
  Val Acc: 0.283750   Val Loss: 1.932160
.................... Particle 4 ....................
 learning_rate = 0.0014905423380531645 initial_size = 30 
 blocks_1 = 1 blocks_2 = 6 blocks_3 = 2 blocks_4 = 10 
 expansion = 4 hidden_size = 25 num_layers = 2


  0%|          | 0/200 [00:00<?, ?it/s]



Training: 100%|██████████| 200/200 [01:50<00:00,  1.81it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.44it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.170472 Train Loss: 2.080336
  Val Acc: 0.238750   Val Loss: 1.959227


Training: 100%|██████████| 200/200 [01:49<00:00,  1.83it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.51it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.256490 Train Loss: 1.916526
  Val Acc: 0.296250   Val Loss: 1.836978


Training: 100%|██████████| 200/200 [01:48<00:00,  1.84it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.45it/s]


Train Acc: 0.309509 Train Loss: 1.812768
  Val Acc: 0.326250   Val Loss: 1.765112
0.41625 0.39
receive from 2 [1.57842885e-03 5.00000000e+01 1.00000000e+00 8.00000000e+00
 4.00000000e+00 7.00000000e+00 4.00000000e+00 2.60000000e+01
 1.00000000e+00 7.40000000e+01 1.00000000e+00 4.16250000e-01]
break!!!!!! I loss my patience
break!!!!!! I loss my patience
6 [1.57842885e-03 5.00000000e+01 1.00000000e+00 8.00000000e+00
 4.00000000e+00 7.00000000e+00 4.00000000e+00 2.60000000e+01
 1.00000000e+00 7.40000000e+01 1.00000000e+00 4.16250000e-01]
-------------------- Iteration 7 --------------------
.................... Particle 1 ....................
Mutation occur: 5.685579158208387 --> 25.306613465640634
 learning_rate = 0.0017130868563798446 initial_size = 52 
 blocks_1 = 1 blocks_2 = 6 blocks_3 = 25 blocks_4 = 12 
 expansion = 4 hidden_size = 24 num_layers = 1


  0%|          | 0/200 [00:00<?, ?it/s]



Training: 100%|██████████| 200/200 [02:57<00:00,  1.13it/s]
Validation: 100%|██████████| 25/25 [00:07<00:00,  3.36it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.136065 Train Loss: 2.126011
  Val Acc: 0.172500   Val Loss: 2.054936


Training: 100%|██████████| 200/200 [02:57<00:00,  1.13it/s]
Validation: 100%|██████████| 25/25 [00:07<00:00,  3.33it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.212543 Train Loss: 2.003558
  Val Acc: 0.227500   Val Loss: 1.950035


Training: 100%|██████████| 200/200 [02:57<00:00,  1.13it/s]
Validation: 100%|██████████| 25/25 [00:07<00:00,  3.32it/s]


Train Acc: 0.266031 Train Loss: 1.903118
  Val Acc: 0.285000   Val Loss: 1.840557
.................... Particle 2 ....................
 learning_rate = 0.0023080383384994083 initial_size = 24 
 blocks_1 = 3 blocks_2 = 4 blocks_3 = 4 blocks_4 = 12 
 expansion = 4 hidden_size = 26 num_layers = 1


  0%|          | 0/200 [00:00<?, ?it/s]



Training: 100%|██████████| 200/200 [01:48<00:00,  1.84it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.32it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.200344 Train Loss: 2.032521
  Val Acc: 0.235000   Val Loss: 1.954850


Training: 100%|██████████| 200/200 [01:48<00:00,  1.85it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.47it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.288083 Train Loss: 1.845897
  Val Acc: 0.372500   Val Loss: 1.754043


Training: 100%|██████████| 200/200 [01:48<00:00,  1.84it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.48it/s]


Train Acc: 0.335627 Train Loss: 1.777888
  Val Acc: 0.363750   Val Loss: 1.700161
.................... Particle 3 ....................


  0%|          | 0/200 [00:00<?, ?it/s]

 learning_rate = 0.0005343963222023355 initial_size = 38 
 blocks_1 = 1 blocks_2 = 7 blocks_3 = 5 blocks_4 = 2 
 expansion = 4 hidden_size = 36 num_layers = 2


Training: 100%|██████████| 200/200 [01:57<00:00,  1.70it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  6.80it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.189396 Train Loss: 2.066746
  Val Acc: 0.280000   Val Loss: 1.938091


Training: 100%|██████████| 200/200 [01:56<00:00,  1.72it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.00it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.287770 Train Loss: 1.871983
  Val Acc: 0.341250   Val Loss: 1.790287


Training: 100%|██████████| 200/200 [01:57<00:00,  1.71it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.03it/s]


Train Acc: 0.353456 Train Loss: 1.731540
  Val Acc: 0.358750   Val Loss: 1.638083
.................... Particle 4 ....................
 learning_rate = 0.0024575931083036144 initial_size = 51 
 blocks_1 = 1 blocks_2 = 12 blocks_3 = 15 blocks_4 = 7 
 expansion = 4 hidden_size = 29 num_layers = 2


  0%|          | 0/200 [00:00<?, ?it/s]



Training: 100%|██████████| 200/200 [02:38<00:00,  1.26it/s]
Validation: 100%|██████████| 25/25 [00:06<00:00,  3.77it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.163122 Train Loss: 2.102273
  Val Acc: 0.201250   Val Loss: 2.038676


Training: 100%|██████████| 200/200 [02:38<00:00,  1.27it/s]
Validation: 100%|██████████| 25/25 [00:06<00:00,  3.74it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.274163 Train Loss: 1.890117
  Val Acc: 0.232500   Val Loss: 2.222393


Training: 100%|██████████| 200/200 [02:37<00:00,  1.27it/s]
Validation: 100%|██████████| 25/25 [00:06<00:00,  3.75it/s]


Train Acc: 0.333907 Train Loss: 1.766419
  Val Acc: 0.326250   Val Loss: 1.776303
receive from 2 [1.57842885e-03 5.00000000e+01 1.00000000e+00 8.00000000e+00
 4.00000000e+00 7.00000000e+00 4.00000000e+00 2.60000000e+01
 1.00000000e+00 7.40000000e+01 1.00000000e+00 4.16250000e-01]
break!!!!!! I loss my patience
break!!!!!! I loss my patience
7 [1.57842885e-03 5.00000000e+01 1.00000000e+00 8.00000000e+00
 4.00000000e+00 7.00000000e+00 4.00000000e+00 2.60000000e+01
 1.00000000e+00 7.40000000e+01 1.00000000e+00 4.16250000e-01]
-------------------- Iteration 8 --------------------
.................... Particle 1 ....................
 learning_rate = 0.0013651385642707405 initial_size = 44 
 blocks_1 = 3 blocks_2 = 7 blocks_3 = 5 blocks_4 = 13 
 expansion = 4 hidden_size = 26 num_layers = 1


  0%|          | 0/200 [00:00<?, ?it/s]



Training: 100%|██████████| 200/200 [02:12<00:00,  1.51it/s]
Validation: 100%|██████████| 25/25 [00:05<00:00,  4.92it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.164998 Train Loss: 2.098808
  Val Acc: 0.183750   Val Loss: 2.006541


Training: 100%|██████████| 200/200 [02:12<00:00,  1.51it/s]
Validation: 100%|██████████| 25/25 [00:05<00:00,  4.82it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.228965 Train Loss: 1.965832
  Val Acc: 0.292500   Val Loss: 1.843624


Training: 100%|██████████| 200/200 [02:12<00:00,  1.51it/s]
Validation: 100%|██████████| 25/25 [00:05<00:00,  4.80it/s]


Train Acc: 0.316390 Train Loss: 1.850632
  Val Acc: 0.336250   Val Loss: 1.766057
.................... Particle 2 ....................


  0%|          | 0/200 [00:00<?, ?it/s]

 learning_rate = 0.0018907633305669579 initial_size = 19 
 blocks_1 = 4 blocks_2 = 10 blocks_3 = 3 blocks_4 = 7 
 expansion = 4 hidden_size = 24 num_layers = 2


Training: 100%|██████████| 200/200 [01:52<00:00,  1.79it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.18it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.206756 Train Loss: 2.025190
  Val Acc: 0.291250   Val Loss: 1.890950


Training: 100%|██████████| 200/200 [01:50<00:00,  1.80it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.16it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.309040 Train Loss: 1.816905
  Val Acc: 0.355000   Val Loss: 1.702999


Training: 100%|██████████| 200/200 [01:50<00:00,  1.82it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.31it/s]


Train Acc: 0.333907 Train Loss: 1.759396
  Val Acc: 0.315000   Val Loss: 1.887308
.................... Particle 3 ....................


  0%|          | 0/200 [00:00<?, ?it/s]

 learning_rate = 0.002891408914940107 initial_size = 14 
 blocks_1 = 4 blocks_2 = 9 blocks_3 = 4 blocks_4 = 8 
 expansion = 4 hidden_size = 37 num_layers = 1


Training: 100%|██████████| 200/200 [01:50<00:00,  1.81it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.25it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.172505 Train Loss: 2.070724
  Val Acc: 0.237500   Val Loss: 1.963000


Training: 100%|██████████| 200/200 [01:50<00:00,  1.81it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.46it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.277291 Train Loss: 1.902512
  Val Acc: 0.305000   Val Loss: 1.862243


Training: 100%|██████████| 200/200 [01:49<00:00,  1.83it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.46it/s]


Train Acc: 0.319675 Train Loss: 1.810523
  Val Acc: 0.352500   Val Loss: 1.757089
.................... Particle 4 ....................


  0%|          | 0/200 [00:00<?, ?it/s]

 learning_rate = 0.0022408543423857665 initial_size = 22 
 blocks_1 = 2 blocks_2 = 14 blocks_3 = 7 blocks_4 = 4 
 expansion = 4 hidden_size = 29 num_layers = 2


Training: 100%|██████████| 200/200 [01:53<00:00,  1.77it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  6.74it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.181420 Train Loss: 2.062817
  Val Acc: 0.216250   Val Loss: 1.970276


Training: 100%|██████████| 200/200 [01:52<00:00,  1.77it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  6.71it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.277448 Train Loss: 1.890803
  Val Acc: 0.327500   Val Loss: 1.776917


Training: 100%|██████████| 200/200 [01:52<00:00,  1.78it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  6.66it/s]


Train Acc: 0.332968 Train Loss: 1.778027
  Val Acc: 0.385000   Val Loss: 1.660724
0.41625 0.39
receive from 2 [1.57842885e-03 5.00000000e+01 1.00000000e+00 8.00000000e+00
 4.00000000e+00 7.00000000e+00 4.00000000e+00 2.60000000e+01
 1.00000000e+00 7.40000000e+01 1.00000000e+00 4.16250000e-01]
break!!!!!! I loss my patience
break!!!!!! I loss my patience
8 [1.57842885e-03 5.00000000e+01 1.00000000e+00 8.00000000e+00
 4.00000000e+00 7.00000000e+00 4.00000000e+00 2.60000000e+01
 1.00000000e+00 7.40000000e+01 1.00000000e+00 4.16250000e-01]
-------------------- Iteration 9 --------------------
.................... Particle 1 ....................
 learning_rate = 0.0011964014465101372 initial_size = 41 
 blocks_1 = 1 blocks_2 = 12 blocks_3 = 7 blocks_4 = 12 
 expansion = 4 hidden_size = 22 num_layers = 1


  0%|          | 0/200 [00:00<?, ?it/s]



Training: 100%|██████████| 200/200 [02:19<00:00,  1.43it/s]
Validation: 100%|██████████| 25/25 [00:05<00:00,  4.55it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.146544 Train Loss: 2.122410
  Val Acc: 0.190000   Val Loss: 2.022538


Training: 100%|██████████| 200/200 [02:18<00:00,  1.44it/s]
Validation: 100%|██████████| 25/25 [00:05<00:00,  4.80it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.216609 Train Loss: 1.986999
  Val Acc: 0.265000   Val Loss: 1.895832


Training: 100%|██████████| 200/200 [02:18<00:00,  1.44it/s]
Validation: 100%|██████████| 25/25 [00:05<00:00,  4.71it/s]


Train Acc: 0.296372 Train Loss: 1.860688
  Val Acc: 0.322500   Val Loss: 1.801330
.................... Particle 2 ....................


  0%|          | 0/200 [00:00<?, ?it/s]

 learning_rate = 0.0030050646833732226 initial_size = 11 
 blocks_1 = 3 blocks_2 = 8 blocks_3 = 3 blocks_4 = 2 
 expansion = 4 hidden_size = 23 num_layers = 2


Training: 100%|██████████| 200/200 [01:49<00:00,  1.83it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.34it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.195652 Train Loss: 2.038082
  Val Acc: 0.203750   Val Loss: 2.034364


Training: 100%|██████████| 200/200 [01:49<00:00,  1.83it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.53it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.329684 Train Loss: 1.779421
  Val Acc: 0.342500   Val Loss: 1.732497


Training: 100%|██████████| 200/200 [01:49<00:00,  1.83it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.55it/s]


Train Acc: 0.368470 Train Loss: 1.705668
  Val Acc: 0.277500   Val Loss: 2.010769
.................... Particle 3 ....................


  0%|          | 0/200 [00:00<?, ?it/s]

 learning_rate = 0.0027593015701075484 initial_size = 22 
 blocks_1 = 3 blocks_2 = 10 blocks_3 = 6 blocks_4 = 4 
 expansion = 4 hidden_size = 18 num_layers = 1


Training: 100%|██████████| 200/200 [01:53<00:00,  1.76it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  6.57it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.223960 Train Loss: 1.994505
  Val Acc: 0.302500   Val Loss: 1.844249


Training: 100%|██████████| 200/200 [01:54<00:00,  1.75it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  6.42it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.314514 Train Loss: 1.814665
  Val Acc: 0.346250   Val Loss: 1.730115


Training: 100%|██████████| 200/200 [01:54<00:00,  1.74it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  6.54it/s]


Train Acc: 0.351892 Train Loss: 1.742522
  Val Acc: 0.342500   Val Loss: 1.781282
.................... Particle 4 ....................


  0%|          | 0/200 [00:00<?, ?it/s]

 learning_rate = 0.0020493917564701775 initial_size = 14 
 blocks_1 = 1 blocks_2 = 5 blocks_3 = 2 blocks_4 = 8 
 expansion = 4 hidden_size = 23 num_layers = 1


Training: 100%|██████████| 200/200 [01:49<00:00,  1.83it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.63it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.214889 Train Loss: 2.006650
  Val Acc: 0.302500   Val Loss: 1.841184


Training: 100%|██████████| 200/200 [01:49<00:00,  1.82it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.94it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.325618 Train Loss: 1.810457
  Val Acc: 0.395000   Val Loss: 1.679311


Training: 100%|██████████| 200/200 [01:48<00:00,  1.84it/s]
Validation: 100%|██████████| 25/25 [00:03<00:00,  7.50it/s]


Train Acc: 0.367063 Train Loss: 1.734833
  Val Acc: 0.208750   Val Loss: 2.461520
receive from 2 [1.57842885e-03 5.00000000e+01 1.00000000e+00 8.00000000e+00
 4.00000000e+00 7.00000000e+00 4.00000000e+00 2.60000000e+01
 1.00000000e+00 7.40000000e+01 1.00000000e+00 4.16250000e-01]
break!!!!!! I loss my patience
break!!!!!! I loss my patience
9 [1.57842885e-03 5.00000000e+01 1.00000000e+00 8.00000000e+00
 4.00000000e+00 7.00000000e+00 4.00000000e+00 2.60000000e+01
 1.00000000e+00 7.40000000e+01 1.00000000e+00 4.16250000e-01]
-------------------- Iteration 10 --------------------
.................... Particle 1 ....................
 learning_rate = 0.0016116249581474475 initial_size = 56 
 blocks_1 = 1 blocks_2 = 8 blocks_3 = 13 blocks_4 = 9 
 expansion = 4 hidden_size = 23 num_layers = 1


  0%|          | 0/200 [00:00<?, ?it/s]



Training: 100%|██████████| 200/200 [02:21<00:00,  1.41it/s]
Validation: 100%|██████████| 25/25 [00:06<00:00,  3.85it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.162340 Train Loss: 2.093830
  Val Acc: 0.176250   Val Loss: 2.035408


Training: 100%|██████████| 200/200 [02:25<00:00,  1.38it/s]
Validation: 100%|██████████| 25/25 [00:06<00:00,  3.88it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.198780 Train Loss: 2.001156
  Val Acc: 0.256250   Val Loss: 1.918276


Training: 100%|██████████| 200/200 [02:26<00:00,  1.37it/s]
Validation: 100%|██████████| 25/25 [00:06<00:00,  3.85it/s]


Train Acc: 0.259149 Train Loss: 1.896115
  Val Acc: 0.306250   Val Loss: 1.815909
.................... Particle 2 ....................
 learning_rate = 0.001827554676773649 initial_size = 47 
 blocks_1 = 4 blocks_2 = 15 blocks_3 = 1 blocks_4 = 7 
 expansion = 4 hidden_size = 25 num_layers = 1


  0%|          | 0/200 [00:00<?, ?it/s]



Training: 100%|██████████| 200/200 [02:13<00:00,  1.49it/s]
Validation: 100%|██████████| 25/25 [00:05<00:00,  4.33it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.154207 Train Loss: 2.108465
  Val Acc: 0.258750   Val Loss: 1.934304


Training: 100%|██████████| 200/200 [02:14<00:00,  1.49it/s]
Validation: 100%|██████████| 25/25 [00:05<00:00,  4.21it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Train Acc: 0.265249 Train Loss: 1.912230
  Val Acc: 0.316250   Val Loss: 1.761264


Training: 100%|██████████| 200/200 [02:14<00:00,  1.49it/s]
Validation: 100%|██████████| 25/25 [00:05<00:00,  4.36it/s]


Train Acc: 0.323428 Train Loss: 1.809141
  Val Acc: 0.328750   Val Loss: 1.756988
.................... Particle 3 ....................
 learning_rate = 0.00026302786117824665 initial_size = 53 
 blocks_1 = 1 blocks_2 = 11 blocks_3 = 5 blocks_4 = 2 
 expansion = 4 hidden_size = 25 num_layers = 1


  0%|          | 0/200 [00:00<?, ?it/s]



Training:  82%|████████▏ | 163/200 [01:39<00:21,  1.75it/s]