In [None]:
import torch
from torch import nn
from torch.nn import functional as F
import numpy as np
import torchvision   
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm
import time
import torch.optim as optim
import os
import gdown
import zipfile
from torch.utils.data.sampler import SubsetRandomSampler

In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)
!pwd

Thu Apr 29 22:12:18 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 465.19.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   32C    P0    25W / 300W |      0MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
# data download source is not shown due to fair use

In [None]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print(use_cuda, device, torch.cuda.device_count())

True cuda 1


## Model

In [None]:
# We have looked at the following repository when coding this BetaVAE.
# @misc{Subramanian2020,
#   author = {Subramanian, A.K},
#   title = {PyTorch-VAE},
#   year = {2020},
#   publisher = {GitHub},
#   journal = {GitHub repository},
#   howpublished = {\url{https://github.com/AntixK/PyTorch-VAE}}
# }


class MyBetaVAE(nn.Module):
    def __init__(self, in_channels, z_dim, beta): # image should have size 64*64
        super(MyBetaVAE, self).__init__()
        self.z_dim = z_dim
        self.beta = beta

        # hidden_dims = [in_channels, 32, 64, 128, 256, 512]
        hidden_dims = [in_channels, 32, 64, 128, 256]
        self.encoder_final_size = 16
        self.hidden_dims = hidden_dims

        # encoder
        encoder_layers = []
        for i in range(len(hidden_dims)-1):
            encoder_layers.append(nn.Sequential(
                                        nn.Conv2d(hidden_dims[i], hidden_dims[i+1], kernel_size=3, stride=2, padding=1),
                                        nn.BatchNorm2d(hidden_dims[i+1]),
                                        nn.LeakyReLU()
                                  ))
        
        self.encoder = nn.Sequential(*encoder_layers)
        self.mu = nn.Linear(hidden_dims[-1] * self.encoder_final_size, z_dim)
        self.logvar = nn.Linear(hidden_dims[-1] * self.encoder_final_size, z_dim)

        # decoder
        self.decoder_in = nn.Linear(z_dim, hidden_dims[-1] * self.encoder_final_size)

        decoder_layers = []
        for i in range(len(hidden_dims)-1, 0, -1):
            decoder_layers.append(nn.Sequential(
                                      nn.ConvTranspose2d(hidden_dims[i], hidden_dims[i] if i == 1 else hidden_dims[i-1], kernel_size=3, stride=2, padding=1, output_padding=1),
                                      nn.BatchNorm2d(hidden_dims[i] if i == 1 else hidden_dims[i-1]),
                                      nn.LeakyReLU()
                                  ))
        
        self.decoder = nn.Sequential(*decoder_layers,
                                     nn.Conv2d(hidden_dims[1], hidden_dims[0], kernel_size=3, padding=1),
                                     nn.Sigmoid())
    
    def encode(self, x):
        out = self.encoder(x)
        out = torch.flatten(out, start_dim=1)
        mu = self.mu(out)
        logvar = self.logvar(out)
        self.mu_value = mu
        self.logvar_value = logvar
        self.x = x
        return mu, logvar
    
    def decode(self, x):
        out = self.decoder_in(x).view(-1, self.hidden_dims[-1], 4, 4)
        out = self.decoder(out)
        return out
    
    def reparam(self, mu, logvar):
        std = torch.exp(logvar / 2)
        epsilon = torch.autograd.Variable(torch.randn_like(std))
        return std * epsilon + mu


    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparam(mu, logvar)
        out = self.decode(z)
        self.x_cons = out
        return out, z
    
    def loss(self):
        reconstruction_loss = F.mse_loss(self.x_cons, self.x, reduction='sum')
        kl_div = torch.mean(torch.sum(-0.5 * (1 + self.logvar_value - self.mu_value ** 2 - self.logvar_value.exp()), dim=1), dim=0)
        return reconstruction_loss + self.beta * kl_div, reconstruction_loss, kl_div

In [None]:
in_channels, z_dim, beta = 3, 128, 0.5
model = MyBetaVAE(in_channels, z_dim, beta)
model = model.to(device)
print(model, next(model.parameters()).is_cuda)

MyBetaVAE(
  (encoder): Sequential(
    (0): Sequential(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.01)
    )
    (1): Sequential(
      (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.01)
    )
    (2): Sequential(
      (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.01)
    )
    (3): Sequential(
      (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.01)
    )
  )
  (mu): 

In [None]:
learningRate = 0.0005
weightDecay = 0.0
epochs = 140
gamma = 0.95
torch.manual_seed(42)
np.random.seed(11785)

optimizer = torch.optim.Adam(model.parameters(), lr=learningRate, weight_decay=weightDecay)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=gamma, verbose=True)

Adjusting learning rate of group 0 to 5.0000e-04.


## DataLoader

In [None]:
!mv /content/data /content/unique-142p

In [None]:
!mkdir /content/data
!mkdir /content/data/unique-142p
!mv /content/unique-142p /content/data/unique-142p

In [None]:
!ls /content/data

unique-142p


In [None]:
transforms = torchvision.transforms.Compose([
    torchvision.transforms.RandomHorizontalFlip(),
    torchvision.transforms.Resize((64, 64)),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

test_transforms = torchvision.transforms.Compose([
    torchvision.transforms.Resize((64, 64)),
    torchvision.transforms.ToTensor()                                              
])

def get_dataloaders(path, shuffle, portion, val_split, batch_size, test=False):
    dataset = torchvision.datasets.ImageFolder(root=path, transform=transforms)
    if test:
      test_dataset = torchvision.datasets.ImageFolder(root=path, transform=test_transforms)

    shuffle_dataset = shuffle
    use_proportion = portion
    validation_split = val_split
    batch_size = batch_size
    random_seed= 42

    # Creating data indices for training and validation splits:
    indices = list(range(len(dataset)))
    use_length = int(len(dataset) * use_proportion)
    split = int(np.floor((1 - validation_split) * len(dataset)) * use_proportion)
    if shuffle_dataset:
        np.random.seed(random_seed)
        np.random.shuffle(indices)
    train_indices = indices[:split]
    val_indices = indices[split:use_length]
    print(len(train_indices), len(val_indices))
    print(len(dataset), use_length, split)

    # Creating PT data samplers and loaders:
    train_sampler = SubsetRandomSampler(train_indices)
    val_sampler = SubsetRandomSampler(val_indices)
  

    train_dataloader = DataLoader(dataset, batch_size=batch_size, drop_last=True,sampler=train_sampler)
    val_dataloader = DataLoader(dataset, batch_size=batch_size, drop_last=True,sampler=val_sampler)

    if test:
      test_dataloader = DataLoader(test_dataset, batch_size=batch_size, drop_last=True, sampler=train_sampler)
      return test_dataloader, test_dataloader
    

    return train_dataloader, val_dataloader

In [None]:
train_loader, val_loader= get_dataloaders('/content/data', shuffle=True, portion=1.0, val_split=0.2, batch_size=256)

70389 17598
87987 87987 70389


## Train

In [None]:
def train(model, train_loader, val_loader, epochs, folder=''):
    model.train()
    best_loss = float('inf')

    for epoch in range(epochs):
        model.train()
        start_time = time.time()
        avg_loss = 0.0
        for batch_num, (x,y) in enumerate(tqdm(train_loader, position=0, leave=True)):
            x = x.to(device)
            
            optimizer.zero_grad()

            recons_x, z = model(x)
            loss, mse, kld = model.loss()

            with open(folder+'/train_loss.txt', 'a') as ft:
                ft.write('%f\n' % loss.item())
            with open(folder+'/train_mse_loss.txt', 'a') as ft1:
                ft1.write('%f\n' % mse.item())
            with open(folder+'/train_kld_loss.txt', 'a') as ft2:
                ft2.write('%f\n' % kld.item())
            
            loss.backward()
            optimizer.step()

            avg_loss += loss.item()
            

            if batch_num % 200 == 199:
                print('Epoch: {}\tBatch: {}\tAvg-Loss: {:.4f}'.format(epoch+1, batch_num+1, avg_loss/200))
                avg_loss = 0.0    
            
            torch.cuda.empty_cache()
            del x
            del recons_x
            del z
            del loss
            del mse
            del kld
            torch.cuda.empty_cache()

        end_time = time.time()
        print(end_time - start_time)
        val_loss = val(model, val_loader)
        scheduler.step()
        if val_loss < best_loss:
            best_loss = val_loss
            checkpoint = {"model": model.state_dict(), 
                          "optimizer": optimizer.state_dict(),
                          "scheduler": scheduler.state_dict()}
            torch.save(checkpoint, folder+'/v2_best_checkpoint_epoch'+str(epoch)+'.pt')
        if (epoch + 1) % 10 == 0:
            checkpoint = {"model": model.state_dict(), 
                          "optimizer": optimizer.state_dict(),
                          "scheduler": scheduler.state_dict()}
            torch.save(checkpoint, folder+'/v2_checkpoint_epoch'+str(epoch)+'.pt')
        print('Val Loss: {:.4f}'.format(val_loss))
        print('lr: ', optimizer.param_groups[0]['lr'])


def val(model, loader, folder=''):
    model.eval()
    avg_loss = 0.0
    avg_mse = 0.0
    avg_kld = 0.0

    with torch.no_grad():
        for batch_num, (x,y) in enumerate(tqdm(loader, position=0, leave=True)):
            x = x.to(device)
            _, _ = model(x)
            loss, mse, kld = model.loss()

            avg_loss += loss.item()  
            avg_mse += mse.item()
            avg_kld += kld.item()
            
            torch.cuda.empty_cache()
            del x
            del loss
            del mse
            del kld
            torch.cuda.empty_cache()

    model.train()
    avg_loss = avg_loss / len(loader)
    avg_mse = avg_mse / len(loader)
    avg_kld = avg_kld / len(loader)
    with open(folder+'/val_loss.txt', 'a') as fv:
        fv.write('%f\n' % avg_loss)
    with open(folder+'/val_mse_loss.txt', 'a') as fv1:
        fv1.write('%f\n' % avg_mse)
    with open(folder+'/val_kld_loss.txt', 'a') as fv2:
        fv2.write('%f\n' % avg_kld)

    return avg_loss

In [None]:
train(model, train_loader, val_loader, epochs)

 73%|███████▎  | 200/274 [01:32<00:34,  2.15it/s]

Epoch: 1	Batch: 200	Avg-Loss: 3125383.4863


100%|██████████| 274/274 [02:06<00:00,  2.16it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

126.82081341743469


100%|██████████| 68/68 [00:28<00:00,  2.38it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 5.0000e-04.
Val Loss: 3044773.4338
lr:  0.0005


 73%|███████▎  | 200/274 [01:32<00:33,  2.19it/s]

Epoch: 2	Batch: 200	Avg-Loss: 3045376.0663


100%|██████████| 274/274 [02:06<00:00,  2.17it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

126.13153910636902


100%|██████████| 68/68 [00:27<00:00,  2.43it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 4.7500e-04.
Val Loss: 3012074.6324
lr:  0.000475


 73%|███████▎  | 200/274 [01:31<00:34,  2.14it/s]

Epoch: 3	Batch: 200	Avg-Loss: 3028440.8363


100%|██████████| 274/274 [02:06<00:00,  2.17it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

126.38673686981201


100%|██████████| 68/68 [00:27<00:00,  2.43it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 4.7500e-04.
Val Loss: 2997698.8897
lr:  0.000475


 73%|███████▎  | 200/274 [01:32<00:33,  2.21it/s]

Epoch: 4	Batch: 200	Avg-Loss: 3029860.7775


100%|██████████| 274/274 [02:06<00:00,  2.16it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

126.72908639907837


100%|██████████| 68/68 [00:28<00:00,  2.42it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 4.7500e-04.
Val Loss: 2989057.8015
lr:  0.000475


 73%|███████▎  | 200/274 [01:30<00:33,  2.24it/s]

Epoch: 5	Batch: 200	Avg-Loss: 3007239.6088


100%|██████████| 274/274 [02:04<00:00,  2.20it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

124.55518388748169


100%|██████████| 68/68 [00:27<00:00,  2.43it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 4.5125e-04.
Val Loss: 2997271.1287
lr:  0.00045125


 73%|███████▎  | 200/274 [01:30<00:35,  2.08it/s]

Epoch: 6	Batch: 200	Avg-Loss: 3013672.0825


100%|██████████| 274/274 [02:05<00:00,  2.18it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

125.72826886177063


100%|██████████| 68/68 [00:27<00:00,  2.47it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 4.5125e-04.
Val Loss: 2987300.6801
lr:  0.00045125


 73%|███████▎  | 200/274 [01:30<00:33,  2.23it/s]

Epoch: 7	Batch: 200	Avg-Loss: 2996523.1075


100%|██████████| 274/274 [02:04<00:00,  2.20it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

124.40284824371338


100%|██████████| 68/68 [00:27<00:00,  2.46it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 4.5125e-04.
Val Loss: 2979428.1397
lr:  0.00045125


 73%|███████▎  | 200/274 [01:31<00:32,  2.27it/s]

Epoch: 8	Batch: 200	Avg-Loss: 2998254.7875


100%|██████████| 274/274 [02:05<00:00,  2.18it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

125.91966986656189


100%|██████████| 68/68 [00:27<00:00,  2.47it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 4.2869e-04.
Val Loss: 2977428.8346
lr:  0.0004286875


 73%|███████▎  | 200/274 [01:31<00:33,  2.23it/s]

Epoch: 9	Batch: 200	Avg-Loss: 3001294.6812


100%|██████████| 274/274 [02:05<00:00,  2.18it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

125.86363697052002


100%|██████████| 68/68 [00:27<00:00,  2.45it/s]


Adjusting learning rate of group 0 to 4.2869e-04.


  0%|          | 0/274 [00:00<?, ?it/s]

Val Loss: 2974096.3309
lr:  0.0004286875


 73%|███████▎  | 200/274 [01:30<00:33,  2.20it/s]

Epoch: 10	Batch: 200	Avg-Loss: 2991098.0713


100%|██████████| 274/274 [02:04<00:00,  2.20it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

124.5270004272461


100%|██████████| 68/68 [00:27<00:00,  2.44it/s]


Adjusting learning rate of group 0 to 4.2869e-04.


  0%|          | 0/274 [00:00<?, ?it/s]

Val Loss: 2969176.3272
lr:  0.0004286875


 73%|███████▎  | 200/274 [01:31<00:33,  2.23it/s]

Epoch: 11	Batch: 200	Avg-Loss: 2995278.7475


100%|██████████| 274/274 [02:05<00:00,  2.19it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

125.09795880317688


100%|██████████| 68/68 [00:27<00:00,  2.46it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 4.0725e-04.
Val Loss: 2970450.7500
lr:  0.00040725312499999993


 73%|███████▎  | 200/274 [01:32<00:33,  2.24it/s]

Epoch: 12	Batch: 200	Avg-Loss: 3002494.3400


100%|██████████| 274/274 [02:06<00:00,  2.17it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

126.1700325012207


100%|██████████| 68/68 [00:27<00:00,  2.45it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 4.0725e-04.
Val Loss: 2971120.9412
lr:  0.00040725312499999993


 73%|███████▎  | 200/274 [01:31<00:33,  2.24it/s]

Epoch: 13	Batch: 200	Avg-Loss: 2992428.2363


100%|██████████| 274/274 [02:05<00:00,  2.18it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

125.57029747962952


100%|██████████| 68/68 [00:27<00:00,  2.47it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 4.0725e-04.
Val Loss: 2965179.3346
lr:  0.00040725312499999993


 73%|███████▎  | 200/274 [01:33<00:32,  2.25it/s]

Epoch: 14	Batch: 200	Avg-Loss: 2969679.8450


100%|██████████| 274/274 [02:07<00:00,  2.16it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

127.06177878379822


100%|██████████| 68/68 [00:27<00:00,  2.45it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 3.8689e-04.
Val Loss: 2960354.0846
lr:  0.0003868904687499999


 73%|███████▎  | 200/274 [01:32<00:33,  2.23it/s]

Epoch: 15	Batch: 200	Avg-Loss: 2980281.2738


100%|██████████| 274/274 [02:06<00:00,  2.16it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

126.96720552444458


100%|██████████| 68/68 [00:27<00:00,  2.46it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 3.8689e-04.
Val Loss: 2959906.4779
lr:  0.0003868904687499999


 73%|███████▎  | 200/274 [01:30<00:32,  2.25it/s]

Epoch: 16	Batch: 200	Avg-Loss: 2984708.6250


100%|██████████| 274/274 [02:03<00:00,  2.21it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

123.70864081382751


100%|██████████| 68/68 [00:27<00:00,  2.47it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 3.8689e-04.
Val Loss: 2961766.4963
lr:  0.0003868904687499999


 73%|███████▎  | 200/274 [01:29<00:33,  2.23it/s]

Epoch: 17	Batch: 200	Avg-Loss: 2988003.4175


100%|██████████| 274/274 [02:03<00:00,  2.23it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

123.07151508331299


100%|██████████| 68/68 [00:27<00:00,  2.48it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 3.6755e-04.
Val Loss: 2963909.5846
lr:  0.0003675459453124999


 73%|███████▎  | 200/274 [01:30<00:32,  2.28it/s]

Epoch: 18	Batch: 200	Avg-Loss: 2990066.0987


100%|██████████| 274/274 [02:05<00:00,  2.19it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

125.06057238578796


100%|██████████| 68/68 [00:27<00:00,  2.48it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 3.6755e-04.
Val Loss: 2957311.5368
lr:  0.0003675459453124999


 73%|███████▎  | 200/274 [01:29<00:32,  2.30it/s]

Epoch: 19	Batch: 200	Avg-Loss: 2983955.5925


100%|██████████| 274/274 [02:02<00:00,  2.24it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

122.41311311721802


100%|██████████| 68/68 [00:26<00:00,  2.53it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 3.6755e-04.
Val Loss: 2956870.0257
lr:  0.0003675459453124999


 73%|███████▎  | 200/274 [01:29<00:33,  2.24it/s]

Epoch: 20	Batch: 200	Avg-Loss: 2974228.5812


100%|██████████| 274/274 [02:03<00:00,  2.22it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

123.26935768127441


100%|██████████| 68/68 [00:27<00:00,  2.49it/s]


Adjusting learning rate of group 0 to 3.4917e-04.


  0%|          | 0/274 [00:00<?, ?it/s]

Val Loss: 2952227.8456
lr:  0.00034916864804687486


 73%|███████▎  | 200/274 [01:31<00:35,  2.10it/s]

Epoch: 21	Batch: 200	Avg-Loss: 2975755.9162


100%|██████████| 274/274 [02:05<00:00,  2.18it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

125.44020438194275


100%|██████████| 68/68 [00:26<00:00,  2.55it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 3.4917e-04.
Val Loss: 2950463.4779
lr:  0.00034916864804687486


 73%|███████▎  | 200/274 [01:28<00:33,  2.22it/s]

Epoch: 22	Batch: 200	Avg-Loss: 2980786.6325


100%|██████████| 274/274 [02:02<00:00,  2.24it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

122.11542439460754


100%|██████████| 68/68 [00:26<00:00,  2.53it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 3.4917e-04.
Val Loss: 2956164.8750
lr:  0.00034916864804687486


 73%|███████▎  | 200/274 [01:29<00:32,  2.25it/s]

Epoch: 23	Batch: 200	Avg-Loss: 2980766.1913


100%|██████████| 274/274 [02:01<00:00,  2.25it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

121.8715672492981


100%|██████████| 68/68 [00:26<00:00,  2.53it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 3.3171e-04.
Val Loss: 2951492.6140
lr:  0.0003317102156445311


 73%|███████▎  | 200/274 [01:30<00:32,  2.26it/s]

Epoch: 24	Batch: 200	Avg-Loss: 2966311.4275


100%|██████████| 274/274 [02:03<00:00,  2.22it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

123.44521045684814


100%|██████████| 68/68 [00:26<00:00,  2.54it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 3.3171e-04.
Val Loss: 2954471.8015
lr:  0.0003317102156445311


 73%|███████▎  | 200/274 [01:29<00:33,  2.18it/s]

Epoch: 25	Batch: 200	Avg-Loss: 2980143.4600


100%|██████████| 274/274 [02:03<00:00,  2.23it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

123.01858329772949


100%|██████████| 68/68 [00:27<00:00,  2.45it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 3.3171e-04.
Val Loss: 2944714.6654
lr:  0.0003317102156445311


 73%|███████▎  | 200/274 [01:31<00:32,  2.28it/s]

Epoch: 26	Batch: 200	Avg-Loss: 2977060.0137


100%|██████████| 274/274 [02:04<00:00,  2.20it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

124.38548874855042


100%|██████████| 68/68 [00:26<00:00,  2.52it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 3.1512e-04.
Val Loss: 2945782.0588
lr:  0.0003151247048623045


 73%|███████▎  | 200/274 [01:30<00:36,  2.05it/s]

Epoch: 27	Batch: 200	Avg-Loss: 2967447.8013


100%|██████████| 274/274 [02:03<00:00,  2.22it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

123.59247708320618


100%|██████████| 68/68 [00:26<00:00,  2.55it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 3.1512e-04.
Val Loss: 2949004.2537
lr:  0.0003151247048623045


 73%|███████▎  | 200/274 [01:29<00:32,  2.28it/s]

Epoch: 28	Batch: 200	Avg-Loss: 2974638.1100


100%|██████████| 274/274 [02:02<00:00,  2.23it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

122.97041511535645


100%|██████████| 68/68 [00:27<00:00,  2.45it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 3.1512e-04.
Val Loss: 2946174.3309
lr:  0.0003151247048623045


 73%|███████▎  | 200/274 [01:34<00:37,  1.99it/s]

Epoch: 29	Batch: 200	Avg-Loss: 2975065.1075


100%|██████████| 274/274 [02:08<00:00,  2.12it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

128.9560842514038


100%|██████████| 68/68 [00:27<00:00,  2.46it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 2.9937e-04.
Val Loss: 2951465.7868
lr:  0.00029936846961918924


 73%|███████▎  | 200/274 [01:30<00:33,  2.24it/s]

Epoch: 30	Batch: 200	Avg-Loss: 2961330.1263


100%|██████████| 274/274 [02:03<00:00,  2.22it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

123.15097689628601


100%|██████████| 68/68 [00:26<00:00,  2.55it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 2.9937e-04.
Val Loss: 2946168.3934
lr:  0.00029936846961918924


 73%|███████▎  | 200/274 [01:28<00:32,  2.27it/s]

Epoch: 31	Batch: 200	Avg-Loss: 2973720.4762


100%|██████████| 274/274 [02:00<00:00,  2.28it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

120.3382158279419


100%|██████████| 68/68 [00:26<00:00,  2.53it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 2.9937e-04.
Val Loss: 2947955.4596
lr:  0.00029936846961918924


 73%|███████▎  | 200/274 [01:28<00:32,  2.28it/s]

Epoch: 32	Batch: 200	Avg-Loss: 2986150.9700


100%|██████████| 274/274 [02:02<00:00,  2.23it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

122.72672295570374


100%|██████████| 68/68 [00:27<00:00,  2.46it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 2.8440e-04.
Val Loss: 2947509.4301
lr:  0.00028440004613822977


 73%|███████▎  | 200/274 [01:30<00:33,  2.23it/s]

Epoch: 33	Batch: 200	Avg-Loss: 2955364.7662


100%|██████████| 274/274 [02:03<00:00,  2.21it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

123.92757320404053


100%|██████████| 68/68 [00:28<00:00,  2.42it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 2.8440e-04.
Val Loss: 2947990.5184
lr:  0.00028440004613822977


 73%|███████▎  | 200/274 [01:31<00:32,  2.25it/s]

Epoch: 34	Batch: 200	Avg-Loss: 2979772.7600


100%|██████████| 274/274 [02:04<00:00,  2.20it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

124.35752630233765


100%|██████████| 68/68 [00:27<00:00,  2.45it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 2.8440e-04.
Val Loss: 2945719.9412
lr:  0.00028440004613822977


 73%|███████▎  | 200/274 [01:32<00:34,  2.15it/s]

Epoch: 35	Batch: 200	Avg-Loss: 2962378.9987


100%|██████████| 274/274 [02:07<00:00,  2.15it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

127.6795334815979


100%|██████████| 68/68 [00:27<00:00,  2.43it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 2.7018e-04.
Val Loss: 2940860.8787
lr:  0.00027018004383131826


 73%|███████▎  | 200/274 [01:30<00:33,  2.22it/s]

Epoch: 36	Batch: 200	Avg-Loss: 2966696.0362


100%|██████████| 274/274 [02:04<00:00,  2.20it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

124.52224946022034


100%|██████████| 68/68 [00:27<00:00,  2.49it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 2.7018e-04.
Val Loss: 2940214.4007
lr:  0.00027018004383131826


 73%|███████▎  | 200/274 [01:31<00:34,  2.14it/s]

Epoch: 37	Batch: 200	Avg-Loss: 2964737.7687


100%|██████████| 274/274 [02:03<00:00,  2.21it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

123.86596894264221


100%|██████████| 68/68 [00:27<00:00,  2.49it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 2.7018e-04.
Val Loss: 2943588.0110
lr:  0.00027018004383131826


 73%|███████▎  | 200/274 [01:30<00:32,  2.29it/s]

Epoch: 38	Batch: 200	Avg-Loss: 2966680.9188


100%|██████████| 274/274 [02:03<00:00,  2.22it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

123.62605118751526


100%|██████████| 68/68 [00:27<00:00,  2.49it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 2.5667e-04.
Val Loss: 2941260.1691
lr:  0.00025667104163975234


 73%|███████▎  | 200/274 [01:32<00:32,  2.26it/s]

Epoch: 39	Batch: 200	Avg-Loss: 2972937.9688


100%|██████████| 274/274 [02:05<00:00,  2.18it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

125.88077902793884


100%|██████████| 68/68 [00:27<00:00,  2.52it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 2.5667e-04.
Val Loss: 2937009.8971
lr:  0.00025667104163975234


 73%|███████▎  | 200/274 [01:31<00:34,  2.16it/s]

Epoch: 40	Batch: 200	Avg-Loss: 2956216.5137


100%|██████████| 274/274 [02:04<00:00,  2.21it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

124.07163310050964


100%|██████████| 68/68 [00:26<00:00,  2.53it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 2.5667e-04.
Val Loss: 2938335.3640
lr:  0.00025667104163975234


 73%|███████▎  | 200/274 [01:28<00:31,  2.32it/s]

Epoch: 41	Batch: 200	Avg-Loss: 2958947.7875


100%|██████████| 274/274 [02:02<00:00,  2.23it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

122.62185549736023


100%|██████████| 68/68 [00:26<00:00,  2.53it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 2.4384e-04.
Val Loss: 2938513.6838
lr:  0.00024383748955776472


 73%|███████▎  | 200/274 [01:29<00:32,  2.30it/s]

Epoch: 42	Batch: 200	Avg-Loss: 2963759.7600


100%|██████████| 274/274 [02:02<00:00,  2.24it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

122.18583822250366


100%|██████████| 68/68 [00:26<00:00,  2.54it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 2.4384e-04.
Val Loss: 2944342.0809
lr:  0.00024383748955776472


 73%|███████▎  | 200/274 [01:30<00:32,  2.28it/s]

Epoch: 43	Batch: 200	Avg-Loss: 2975685.7188


100%|██████████| 274/274 [02:03<00:00,  2.21it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

123.94358134269714


100%|██████████| 68/68 [00:27<00:00,  2.46it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 2.4384e-04.
Val Loss: 2945061.0257
lr:  0.00024383748955776472


 73%|███████▎  | 200/274 [01:30<00:32,  2.25it/s]

Epoch: 44	Batch: 200	Avg-Loss: 2966598.4388


100%|██████████| 274/274 [02:03<00:00,  2.22it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

123.18207716941833


100%|██████████| 68/68 [00:27<00:00,  2.45it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 2.3165e-04.
Val Loss: 2939430.8419
lr:  0.00023164561507987649


 73%|███████▎  | 200/274 [01:32<00:33,  2.24it/s]

Epoch: 45	Batch: 200	Avg-Loss: 2955530.1162


100%|██████████| 274/274 [02:06<00:00,  2.17it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

126.44991898536682


100%|██████████| 68/68 [00:27<00:00,  2.45it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 2.3165e-04.
Val Loss: 2945875.5772
lr:  0.00023164561507987649


 73%|███████▎  | 200/274 [01:31<00:33,  2.22it/s]

Epoch: 46	Batch: 200	Avg-Loss: 2963677.3112


100%|██████████| 274/274 [02:05<00:00,  2.18it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

125.69983315467834


100%|██████████| 68/68 [00:27<00:00,  2.45it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 2.3165e-04.
Val Loss: 2940941.7500
lr:  0.00023164561507987649


 73%|███████▎  | 200/274 [01:31<00:33,  2.23it/s]

Epoch: 47	Batch: 200	Avg-Loss: 2965419.3188


100%|██████████| 274/274 [02:06<00:00,  2.16it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

126.79919362068176


100%|██████████| 68/68 [00:28<00:00,  2.38it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 2.2006e-04.
Val Loss: 2936914.6838
lr:  0.00022006333432588265


 73%|███████▎  | 200/274 [01:32<00:33,  2.20it/s]

Epoch: 48	Batch: 200	Avg-Loss: 2957372.6237


100%|██████████| 274/274 [02:05<00:00,  2.18it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

125.79148888587952


100%|██████████| 68/68 [00:27<00:00,  2.45it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 2.2006e-04.
Val Loss: 2941575.7904
lr:  0.00022006333432588265


 73%|███████▎  | 200/274 [01:30<00:33,  2.19it/s]

Epoch: 49	Batch: 200	Avg-Loss: 2959345.3838


100%|██████████| 274/274 [02:04<00:00,  2.20it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

124.63519239425659


100%|██████████| 68/68 [00:28<00:00,  2.43it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 2.2006e-04.
Val Loss: 2941354.9669
lr:  0.00022006333432588265


 73%|███████▎  | 200/274 [01:34<00:33,  2.21it/s]

Epoch: 50	Batch: 200	Avg-Loss: 2960128.1175


100%|██████████| 274/274 [02:09<00:00,  2.11it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

129.92233562469482


100%|██████████| 68/68 [00:28<00:00,  2.42it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 2.0906e-04.
Val Loss: 2942345.6287
lr:  0.00020906016760958852


 73%|███████▎  | 200/274 [01:34<00:33,  2.21it/s]

Epoch: 51	Batch: 200	Avg-Loss: 2954766.8200


100%|██████████| 274/274 [02:09<00:00,  2.11it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

129.85111331939697


100%|██████████| 68/68 [00:28<00:00,  2.41it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 2.0906e-04.
Val Loss: 2939898.3640
lr:  0.00020906016760958852


 73%|███████▎  | 200/274 [01:33<00:35,  2.06it/s]

Epoch: 52	Batch: 200	Avg-Loss: 2964280.0400


100%|██████████| 274/274 [02:07<00:00,  2.14it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

127.76187205314636


100%|██████████| 68/68 [00:28<00:00,  2.39it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 2.0906e-04.
Val Loss: 2938172.7941
lr:  0.00020906016760958852


 73%|███████▎  | 200/274 [01:33<00:33,  2.20it/s]

Epoch: 53	Batch: 200	Avg-Loss: 2959474.9813


100%|██████████| 274/274 [02:07<00:00,  2.15it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

127.42953205108643


100%|██████████| 68/68 [00:28<00:00,  2.41it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.9861e-04.
Val Loss: 2942139.3934
lr:  0.00019860715922910907


 73%|███████▎  | 200/274 [01:36<00:34,  2.12it/s]

Epoch: 54	Batch: 200	Avg-Loss: 2954088.7338


100%|██████████| 274/274 [02:10<00:00,  2.09it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

130.9161412715912


100%|██████████| 68/68 [00:28<00:00,  2.38it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.9861e-04.
Val Loss: 2943400.8934
lr:  0.00019860715922910907


 73%|███████▎  | 200/274 [01:33<00:33,  2.18it/s]

Epoch: 55	Batch: 200	Avg-Loss: 2962709.0175


100%|██████████| 274/274 [02:08<00:00,  2.14it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

128.0763440132141


100%|██████████| 68/68 [00:28<00:00,  2.38it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.9861e-04.
Val Loss: 2938299.2831
lr:  0.00019860715922910907


 73%|███████▎  | 200/274 [01:33<00:33,  2.20it/s]

Epoch: 56	Batch: 200	Avg-Loss: 2968589.0800


100%|██████████| 274/274 [02:07<00:00,  2.15it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

127.4123752117157


100%|██████████| 68/68 [00:28<00:00,  2.41it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.8868e-04.
Val Loss: 2937927.7610
lr:  0.0001886768012676536


 73%|███████▎  | 200/274 [01:31<00:33,  2.21it/s]

Epoch: 57	Batch: 200	Avg-Loss: 2959569.3075


100%|██████████| 274/274 [02:05<00:00,  2.18it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

125.87896871566772


100%|██████████| 68/68 [00:28<00:00,  2.42it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.8868e-04.
Val Loss: 2937900.3676
lr:  0.0001886768012676536


 73%|███████▎  | 200/274 [01:32<00:34,  2.16it/s]

Epoch: 58	Batch: 200	Avg-Loss: 2963320.0375


100%|██████████| 274/274 [02:05<00:00,  2.19it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

125.0660331249237


100%|██████████| 68/68 [00:28<00:00,  2.42it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.8868e-04.
Val Loss: 2934578.6029
lr:  0.0001886768012676536


 73%|███████▎  | 200/274 [01:33<00:33,  2.21it/s]

Epoch: 59	Batch: 200	Avg-Loss: 2950393.6437


100%|██████████| 274/274 [02:07<00:00,  2.16it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

127.06825423240662


100%|██████████| 68/68 [00:28<00:00,  2.42it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.7924e-04.
Val Loss: 2938349.9301
lr:  0.0001792429612042709


 73%|███████▎  | 200/274 [01:34<00:33,  2.20it/s]

Epoch: 60	Batch: 200	Avg-Loss: 2968020.1625


100%|██████████| 274/274 [02:09<00:00,  2.12it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

129.4502077102661


100%|██████████| 68/68 [00:28<00:00,  2.39it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.7924e-04.
Val Loss: 2939872.8640
lr:  0.0001792429612042709


 73%|███████▎  | 200/274 [01:30<00:32,  2.28it/s]

Epoch: 61	Batch: 200	Avg-Loss: 2957875.5625


100%|██████████| 274/274 [02:06<00:00,  2.16it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

126.95779514312744


100%|██████████| 68/68 [00:27<00:00,  2.46it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.7924e-04.
Val Loss: 2938800.5809
lr:  0.0001792429612042709


 73%|███████▎  | 200/274 [01:33<00:34,  2.17it/s]

Epoch: 62	Batch: 200	Avg-Loss: 2956835.1612


100%|██████████| 274/274 [02:07<00:00,  2.14it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

127.9406635761261


100%|██████████| 68/68 [00:28<00:00,  2.42it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.7028e-04.
Val Loss: 2933713.1471
lr:  0.00017028081314405735


 73%|███████▎  | 200/274 [01:33<00:37,  1.95it/s]

Epoch: 63	Batch: 200	Avg-Loss: 2957831.3912


100%|██████████| 274/274 [02:07<00:00,  2.15it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

127.57556390762329


100%|██████████| 68/68 [00:27<00:00,  2.43it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.7028e-04.
Val Loss: 2937207.1544
lr:  0.00017028081314405735


 73%|███████▎  | 200/274 [01:32<00:33,  2.23it/s]

Epoch: 64	Batch: 200	Avg-Loss: 2951512.1012


100%|██████████| 274/274 [02:06<00:00,  2.17it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

126.07221817970276


100%|██████████| 68/68 [00:27<00:00,  2.47it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.7028e-04.
Val Loss: 2938894.6875
lr:  0.00017028081314405735


 73%|███████▎  | 200/274 [01:31<00:34,  2.15it/s]

Epoch: 65	Batch: 200	Avg-Loss: 2962682.5600


100%|██████████| 274/274 [02:05<00:00,  2.18it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

125.69651913642883


100%|██████████| 68/68 [00:27<00:00,  2.46it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.6177e-04.
Val Loss: 2936540.8493
lr:  0.00016176677248685447


 73%|███████▎  | 200/274 [01:33<00:33,  2.18it/s]

Epoch: 66	Batch: 200	Avg-Loss: 2949735.2800


100%|██████████| 274/274 [02:07<00:00,  2.14it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

127.76114153862


100%|██████████| 68/68 [00:28<00:00,  2.42it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.6177e-04.
Val Loss: 2940672.8015
lr:  0.00016176677248685447


 73%|███████▎  | 200/274 [01:32<00:32,  2.27it/s]

Epoch: 67	Batch: 200	Avg-Loss: 2959537.0975


100%|██████████| 274/274 [02:05<00:00,  2.19it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

125.34582018852234


100%|██████████| 68/68 [00:27<00:00,  2.51it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.6177e-04.
Val Loss: 2935349.2537
lr:  0.00016176677248685447


 73%|███████▎  | 200/274 [01:29<00:33,  2.20it/s]

Epoch: 68	Batch: 200	Avg-Loss: 2952390.4537


100%|██████████| 274/274 [02:04<00:00,  2.20it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

124.32071018218994


100%|██████████| 68/68 [00:27<00:00,  2.48it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.5368e-04.
Val Loss: 2936773.2243
lr:  0.00015367843386251173


 73%|███████▎  | 200/274 [01:33<00:33,  2.23it/s]

Epoch: 69	Batch: 200	Avg-Loss: 2949723.8825


100%|██████████| 274/274 [02:07<00:00,  2.15it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

127.61057615280151


100%|██████████| 68/68 [00:27<00:00,  2.45it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.5368e-04.
Val Loss: 2938002.2978
lr:  0.00015367843386251173


 73%|███████▎  | 200/274 [01:33<00:34,  2.18it/s]

Epoch: 70	Batch: 200	Avg-Loss: 2967764.5562


100%|██████████| 274/274 [02:06<00:00,  2.16it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

126.79338550567627


100%|██████████| 68/68 [00:27<00:00,  2.48it/s]


Adjusting learning rate of group 0 to 1.5368e-04.


  0%|          | 0/274 [00:00<?, ?it/s]

Val Loss: 2938041.3162
lr:  0.00015367843386251173


 73%|███████▎  | 200/274 [01:33<00:34,  2.13it/s]

Epoch: 71	Batch: 200	Avg-Loss: 2961248.5875


100%|██████████| 274/274 [02:07<00:00,  2.15it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

127.41861462593079


100%|██████████| 68/68 [00:27<00:00,  2.46it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.4599e-04.
Val Loss: 2934755.0404
lr:  0.00014599451216938612


 73%|███████▎  | 200/274 [01:35<00:35,  2.07it/s]

Epoch: 72	Batch: 200	Avg-Loss: 2954554.1412


100%|██████████| 274/274 [02:10<00:00,  2.10it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

130.3328971862793


100%|██████████| 68/68 [00:28<00:00,  2.42it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.4599e-04.
Val Loss: 2934877.5809
lr:  0.00014599451216938612


 73%|███████▎  | 200/274 [01:32<00:32,  2.30it/s]

Epoch: 73	Batch: 200	Avg-Loss: 2951777.1412


100%|██████████| 274/274 [02:08<00:00,  2.13it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

128.39363837242126


100%|██████████| 68/68 [00:26<00:00,  2.54it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.4599e-04.
Val Loss: 2936741.5551
lr:  0.00014599451216938612


 73%|███████▎  | 200/274 [01:30<00:31,  2.32it/s]

Epoch: 74	Batch: 200	Avg-Loss: 2966862.7212


100%|██████████| 274/274 [02:04<00:00,  2.21it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

124.15022659301758


100%|██████████| 68/68 [00:26<00:00,  2.52it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.3869e-04.
Val Loss: 2940082.5809
lr:  0.00013869478656091682


 73%|███████▎  | 200/274 [01:32<00:34,  2.18it/s]

Epoch: 75	Batch: 200	Avg-Loss: 2954184.7575


100%|██████████| 274/274 [02:06<00:00,  2.17it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

126.28332042694092


100%|██████████| 68/68 [00:26<00:00,  2.53it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.3869e-04.
Val Loss: 2940078.1471
lr:  0.00013869478656091682


 73%|███████▎  | 200/274 [01:34<00:35,  2.07it/s]

Epoch: 76	Batch: 200	Avg-Loss: 2962299.3525


100%|██████████| 274/274 [02:07<00:00,  2.16it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

127.1203989982605


100%|██████████| 68/68 [00:27<00:00,  2.45it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.3869e-04.
Val Loss: 2937706.3051
lr:  0.00013869478656091682


 73%|███████▎  | 200/274 [01:34<00:32,  2.25it/s]

Epoch: 77	Batch: 200	Avg-Loss: 2953321.9638


100%|██████████| 274/274 [02:07<00:00,  2.14it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

127.8527774810791


100%|██████████| 68/68 [00:27<00:00,  2.47it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.3176e-04.
Val Loss: 2935150.2978
lr:  0.00013176004723287096


 73%|███████▎  | 200/274 [01:34<00:32,  2.25it/s]

Epoch: 78	Batch: 200	Avg-Loss: 2961359.7287


100%|██████████| 274/274 [02:07<00:00,  2.14it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

127.93581199645996


100%|██████████| 68/68 [00:27<00:00,  2.50it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.3176e-04.
Val Loss: 2935629.1140
lr:  0.00013176004723287096


 73%|███████▎  | 200/274 [01:30<00:32,  2.26it/s]

Epoch: 79	Batch: 200	Avg-Loss: 2964555.0275


100%|██████████| 274/274 [02:03<00:00,  2.22it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

123.56305241584778


100%|██████████| 68/68 [00:27<00:00,  2.49it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.3176e-04.
Val Loss: 2942630.6434
lr:  0.00013176004723287096


 73%|███████▎  | 200/274 [01:30<00:32,  2.30it/s]

Epoch: 80	Batch: 200	Avg-Loss: 2949037.7725


100%|██████████| 274/274 [02:03<00:00,  2.21it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

123.76534175872803


100%|██████████| 68/68 [00:27<00:00,  2.46it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.2517e-04.
Val Loss: 2937975.8456
lr:  0.0001251720448712274


 73%|███████▎  | 200/274 [01:33<00:32,  2.25it/s]

Epoch: 81	Batch: 200	Avg-Loss: 2955875.7338


100%|██████████| 274/274 [02:07<00:00,  2.16it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

127.04987120628357


100%|██████████| 68/68 [00:27<00:00,  2.48it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.2517e-04.
Val Loss: 2939934.1765
lr:  0.0001251720448712274


 73%|███████▎  | 200/274 [01:31<00:32,  2.29it/s]

Epoch: 82	Batch: 200	Avg-Loss: 2945626.0525


100%|██████████| 274/274 [02:06<00:00,  2.17it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

126.07012557983398


100%|██████████| 68/68 [00:27<00:00,  2.45it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.2517e-04.
Val Loss: 2933266.8971
lr:  0.0001251720448712274


 73%|███████▎  | 200/274 [01:30<00:32,  2.24it/s]

Epoch: 83	Batch: 200	Avg-Loss: 2962044.3675


100%|██████████| 274/274 [02:05<00:00,  2.19it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

125.2986261844635


100%|██████████| 68/68 [00:28<00:00,  2.43it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.1891e-04.
Val Loss: 2938673.5846
lr:  0.00011891344262766602


 73%|███████▎  | 200/274 [01:32<00:32,  2.24it/s]

Epoch: 84	Batch: 200	Avg-Loss: 2961247.9875


100%|██████████| 274/274 [02:06<00:00,  2.16it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

126.6549563407898


100%|██████████| 68/68 [00:27<00:00,  2.49it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.1891e-04.
Val Loss: 2932798.8566
lr:  0.00011891344262766602


 73%|███████▎  | 200/274 [01:31<00:32,  2.27it/s]

Epoch: 85	Batch: 200	Avg-Loss: 2971861.1400


100%|██████████| 274/274 [02:06<00:00,  2.17it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

126.10342836380005


100%|██████████| 68/68 [00:28<00:00,  2.42it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.1891e-04.
Val Loss: 2934679.9669
lr:  0.00011891344262766602


 73%|███████▎  | 200/274 [01:31<00:33,  2.18it/s]

Epoch: 86	Batch: 200	Avg-Loss: 2948118.0838


100%|██████████| 274/274 [02:07<00:00,  2.15it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

127.29022359848022


100%|██████████| 68/68 [00:28<00:00,  2.40it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.1297e-04.
Val Loss: 2934767.3934
lr:  0.00011296777049628272


 73%|███████▎  | 200/274 [01:34<00:33,  2.19it/s]

Epoch: 87	Batch: 200	Avg-Loss: 2962946.2412


100%|██████████| 274/274 [02:09<00:00,  2.11it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

129.60871291160583


100%|██████████| 68/68 [00:28<00:00,  2.39it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.1297e-04.
Val Loss: 2932520.2757
lr:  0.00011296777049628272


 73%|███████▎  | 200/274 [01:34<00:35,  2.11it/s]

Epoch: 88	Batch: 200	Avg-Loss: 2959879.8725


100%|██████████| 274/274 [02:08<00:00,  2.13it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

128.76949286460876


100%|██████████| 68/68 [00:28<00:00,  2.38it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.1297e-04.
Val Loss: 2934541.4081
lr:  0.00011296777049628272


 73%|███████▎  | 200/274 [01:35<00:34,  2.16it/s]

Epoch: 89	Batch: 200	Avg-Loss: 2949623.1463


100%|██████████| 274/274 [02:10<00:00,  2.10it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

130.3873372077942


100%|██████████| 68/68 [00:27<00:00,  2.44it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.0732e-04.
Val Loss: 2936649.1324
lr:  0.00010731938197146858


 73%|███████▎  | 200/274 [01:35<00:34,  2.17it/s]

Epoch: 90	Batch: 200	Avg-Loss: 2951406.3000


100%|██████████| 274/274 [02:10<00:00,  2.09it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

130.8388741016388


100%|██████████| 68/68 [00:28<00:00,  2.40it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.0732e-04.
Val Loss: 2937189.3897
lr:  0.00010731938197146858


 73%|███████▎  | 200/274 [01:35<00:53,  1.38it/s]

Epoch: 91	Batch: 200	Avg-Loss: 2965181.5875


100%|██████████| 274/274 [02:10<00:00,  2.11it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

130.03464436531067


100%|██████████| 68/68 [00:27<00:00,  2.43it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.0732e-04.
Val Loss: 2936152.8529
lr:  0.00010731938197146858


 73%|███████▎  | 200/274 [01:33<00:33,  2.21it/s]

Epoch: 92	Batch: 200	Avg-Loss: 2943132.2988


100%|██████████| 274/274 [02:07<00:00,  2.15it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

127.35442614555359


100%|██████████| 68/68 [00:28<00:00,  2.41it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.0195e-04.
Val Loss: 2934635.4890
lr:  0.00010195341287289515


 73%|███████▎  | 200/274 [01:35<00:36,  2.02it/s]

Epoch: 93	Batch: 200	Avg-Loss: 2959122.9137


100%|██████████| 274/274 [02:10<00:00,  2.10it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

130.39563298225403


100%|██████████| 68/68 [00:28<00:00,  2.41it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.0195e-04.
Val Loss: 2937536.1581
lr:  0.00010195341287289515


 73%|███████▎  | 200/274 [01:35<00:34,  2.17it/s]

Epoch: 94	Batch: 200	Avg-Loss: 2957967.1525


100%|██████████| 274/274 [02:09<00:00,  2.11it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

129.87379837036133


100%|██████████| 68/68 [00:28<00:00,  2.40it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.0195e-04.
Val Loss: 2934756.6728
lr:  0.00010195341287289515


 73%|███████▎  | 200/274 [01:32<00:33,  2.21it/s]

Epoch: 95	Batch: 200	Avg-Loss: 2952154.6437


100%|██████████| 274/274 [02:07<00:00,  2.14it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

127.95985078811646


100%|██████████| 68/68 [00:27<00:00,  2.43it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 9.6856e-05.
Val Loss: 2933977.5846
lr:  9.685574222925039e-05


 73%|███████▎  | 200/274 [01:33<00:33,  2.21it/s]

Epoch: 96	Batch: 200	Avg-Loss: 2967997.1987


100%|██████████| 274/274 [02:08<00:00,  2.14it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

128.08288478851318


100%|██████████| 68/68 [00:27<00:00,  2.44it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 9.6856e-05.
Val Loss: 2934362.4301
lr:  9.685574222925039e-05


 73%|███████▎  | 200/274 [01:33<00:33,  2.18it/s]

Epoch: 97	Batch: 200	Avg-Loss: 2951868.6313


100%|██████████| 274/274 [02:07<00:00,  2.15it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

127.24721813201904


100%|██████████| 68/68 [00:28<00:00,  2.42it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 9.6856e-05.
Val Loss: 2933066.1507
lr:  9.685574222925039e-05


 73%|███████▎  | 200/274 [01:34<00:33,  2.20it/s]

Epoch: 98	Batch: 200	Avg-Loss: 2942026.8825


100%|██████████| 274/274 [02:09<00:00,  2.12it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

129.13858938217163


100%|██████████| 68/68 [00:28<00:00,  2.39it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 9.2013e-05.
Val Loss: 2933733.3640
lr:  9.201295511778786e-05


 73%|███████▎  | 200/274 [01:34<00:36,  2.01it/s]

Epoch: 99	Batch: 200	Avg-Loss: 2963274.7513


100%|██████████| 274/274 [02:08<00:00,  2.13it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

128.8950002193451


100%|██████████| 68/68 [00:28<00:00,  2.40it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 9.2013e-05.
Val Loss: 2937192.7757
lr:  9.201295511778786e-05


 73%|███████▎  | 200/274 [01:33<00:34,  2.17it/s]

Epoch: 100	Batch: 200	Avg-Loss: 2954042.5162


100%|██████████| 274/274 [02:07<00:00,  2.15it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

127.49952960014343


100%|██████████| 68/68 [00:28<00:00,  2.41it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 9.2013e-05.
Val Loss: 2936967.3566
lr:  9.201295511778786e-05


 73%|███████▎  | 200/274 [01:33<00:34,  2.16it/s]

Epoch: 101	Batch: 200	Avg-Loss: 2958551.4638


100%|██████████| 274/274 [02:08<00:00,  2.14it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

128.20417761802673


100%|██████████| 68/68 [00:28<00:00,  2.42it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 8.7412e-05.
Val Loss: 2936442.2426
lr:  8.741230736189846e-05


 73%|███████▎  | 200/274 [01:33<00:40,  1.81it/s]

Epoch: 102	Batch: 200	Avg-Loss: 2948136.3275


100%|██████████| 274/274 [02:06<00:00,  2.16it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

126.68212938308716


100%|██████████| 68/68 [00:28<00:00,  2.37it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 8.7412e-05.
Val Loss: 2929688.6029
lr:  8.741230736189846e-05


 73%|███████▎  | 200/274 [01:32<00:34,  2.12it/s]

Epoch: 103	Batch: 200	Avg-Loss: 2959394.3575


100%|██████████| 274/274 [02:08<00:00,  2.13it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

128.3718581199646


100%|██████████| 68/68 [00:28<00:00,  2.38it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 8.7412e-05.
Val Loss: 2929215.1544
lr:  8.741230736189846e-05


 73%|███████▎  | 200/274 [01:34<00:34,  2.16it/s]

Epoch: 104	Batch: 200	Avg-Loss: 2957171.3225


100%|██████████| 274/274 [02:09<00:00,  2.12it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

129.55220007896423


100%|██████████| 68/68 [00:28<00:00,  2.37it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 8.3042e-05.
Val Loss: 2934883.6949
lr:  8.304169199380353e-05


 73%|███████▎  | 200/274 [01:33<00:34,  2.12it/s]

Epoch: 105	Batch: 200	Avg-Loss: 2961007.7563


100%|██████████| 274/274 [02:08<00:00,  2.13it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

128.54194450378418


100%|██████████| 68/68 [00:27<00:00,  2.44it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 8.3042e-05.
Val Loss: 2932242.7684
lr:  8.304169199380353e-05


 73%|███████▎  | 200/274 [01:33<00:33,  2.24it/s]

Epoch: 106	Batch: 200	Avg-Loss: 2957711.8537


100%|██████████| 274/274 [02:09<00:00,  2.11it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

129.67458033561707


100%|██████████| 68/68 [00:28<00:00,  2.41it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 8.3042e-05.
Val Loss: 2934479.3088
lr:  8.304169199380353e-05


 73%|███████▎  | 200/274 [01:33<00:33,  2.20it/s]

Epoch: 107	Batch: 200	Avg-Loss: 2951558.1050


100%|██████████| 274/274 [02:08<00:00,  2.13it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

128.6648452281952


100%|██████████| 68/68 [00:27<00:00,  2.43it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 7.8890e-05.
Val Loss: 2935511.9926
lr:  7.888960739411335e-05


 73%|███████▎  | 200/274 [01:33<00:33,  2.23it/s]

Epoch: 108	Batch: 200	Avg-Loss: 2947955.5750


100%|██████████| 274/274 [02:08<00:00,  2.12it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

128.97665095329285


100%|██████████| 68/68 [00:28<00:00,  2.40it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 7.8890e-05.
Val Loss: 2929804.3787
lr:  7.888960739411335e-05


 73%|███████▎  | 200/274 [01:34<00:33,  2.19it/s]

Epoch: 109	Batch: 200	Avg-Loss: 2959170.4337


100%|██████████| 274/274 [02:09<00:00,  2.12it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

129.2407443523407


100%|██████████| 68/68 [00:27<00:00,  2.45it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 7.8890e-05.
Val Loss: 2936948.0625
lr:  7.888960739411335e-05


 73%|███████▎  | 200/274 [01:34<00:34,  2.15it/s]

Epoch: 110	Batch: 200	Avg-Loss: 2960279.8213


100%|██████████| 274/274 [02:09<00:00,  2.12it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

129.176696062088


100%|██████████| 68/68 [00:28<00:00,  2.39it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 7.4945e-05.
Val Loss: 2929883.0074
lr:  7.494512702440768e-05


 73%|███████▎  | 200/274 [01:36<00:33,  2.21it/s]

Epoch: 111	Batch: 200	Avg-Loss: 2952169.2012


100%|██████████| 274/274 [02:11<00:00,  2.08it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

131.50845336914062


100%|██████████| 68/68 [00:28<00:00,  2.40it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 7.4945e-05.
Val Loss: 2933977.2316
lr:  7.494512702440768e-05


 73%|███████▎  | 200/274 [01:38<00:35,  2.09it/s]

Epoch: 112	Batch: 200	Avg-Loss: 2961735.5575


100%|██████████| 274/274 [02:14<00:00,  2.03it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

134.73410749435425


100%|██████████| 68/68 [00:28<00:00,  2.40it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 7.4945e-05.
Val Loss: 2933534.9853
lr:  7.494512702440768e-05


 73%|███████▎  | 200/274 [01:35<00:34,  2.17it/s]

Epoch: 113	Batch: 200	Avg-Loss: 2945944.3075


100%|██████████| 274/274 [02:09<00:00,  2.11it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

129.67806458473206


100%|██████████| 68/68 [00:28<00:00,  2.41it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 7.1198e-05.
Val Loss: 2930328.7941
lr:  7.119787067318729e-05


 73%|███████▎  | 200/274 [01:34<00:33,  2.18it/s]

Epoch: 114	Batch: 200	Avg-Loss: 2948349.9125


100%|██████████| 274/274 [02:08<00:00,  2.13it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

128.41012334823608


100%|██████████| 68/68 [00:27<00:00,  2.44it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 7.1198e-05.
Val Loss: 2936367.3199
lr:  7.119787067318729e-05


 73%|███████▎  | 200/274 [01:33<00:32,  2.24it/s]

Epoch: 115	Batch: 200	Avg-Loss: 2954039.8250


100%|██████████| 274/274 [02:08<00:00,  2.13it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

128.4669246673584


100%|██████████| 68/68 [00:28<00:00,  2.40it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 7.1198e-05.
Val Loss: 2930590.9228
lr:  7.119787067318729e-05


 73%|███████▎  | 200/274 [01:37<00:47,  1.54it/s]

Epoch: 116	Batch: 200	Avg-Loss: 2959197.3775


100%|██████████| 274/274 [02:12<00:00,  2.07it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

132.20510840415955


100%|██████████| 68/68 [00:28<00:00,  2.38it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 6.7638e-05.
Val Loss: 2934433.1912
lr:  6.763797713952792e-05


 73%|███████▎  | 200/274 [01:36<00:35,  2.08it/s]

Epoch: 117	Batch: 200	Avg-Loss: 2949480.6012


100%|██████████| 274/274 [02:12<00:00,  2.07it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

132.27316093444824


100%|██████████| 68/68 [00:28<00:00,  2.38it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 6.7638e-05.
Val Loss: 2935305.6103
lr:  6.763797713952792e-05


 73%|███████▎  | 200/274 [01:34<00:33,  2.20it/s]

Epoch: 118	Batch: 200	Avg-Loss: 2969727.5063


100%|██████████| 274/274 [02:09<00:00,  2.12it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

129.40698099136353


100%|██████████| 68/68 [00:28<00:00,  2.40it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 6.7638e-05.
Val Loss: 2934261.8897
lr:  6.763797713952792e-05


 73%|███████▎  | 200/274 [01:34<00:33,  2.20it/s]

Epoch: 119	Batch: 200	Avg-Loss: 2955814.2100


100%|██████████| 274/274 [02:11<00:00,  2.09it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

131.04596519470215


100%|██████████| 68/68 [00:28<00:00,  2.41it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 6.4256e-05.
Val Loss: 2935257.5772
lr:  6.425607828255152e-05


 73%|███████▎  | 200/274 [01:37<00:34,  2.12it/s]

Epoch: 120	Batch: 200	Avg-Loss: 2959406.3850


100%|██████████| 274/274 [02:15<00:00,  2.02it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

135.8209252357483


100%|██████████| 68/68 [00:29<00:00,  2.34it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 6.4256e-05.
Val Loss: 2932537.5993
lr:  6.425607828255152e-05


 73%|███████▎  | 200/274 [01:40<00:34,  2.13it/s]

Epoch: 121	Batch: 200	Avg-Loss: 2950994.3413


100%|██████████| 274/274 [02:15<00:00,  2.03it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

135.25427675247192


100%|██████████| 68/68 [00:29<00:00,  2.32it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 6.4256e-05.
Val Loss: 2931279.7610
lr:  6.425607828255152e-05


 73%|███████▎  | 200/274 [01:39<00:38,  1.90it/s]

Epoch: 122	Batch: 200	Avg-Loss: 2955129.0525


100%|██████████| 274/274 [02:14<00:00,  2.03it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

134.74695205688477


100%|██████████| 68/68 [00:28<00:00,  2.37it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 6.1043e-05.
Val Loss: 2937084.1618
lr:  6.104327436842394e-05


 73%|███████▎  | 200/274 [01:36<00:33,  2.20it/s]

Epoch: 123	Batch: 200	Avg-Loss: 2957190.9888


100%|██████████| 274/274 [02:10<00:00,  2.10it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

130.5472011566162


100%|██████████| 68/68 [00:28<00:00,  2.43it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 6.1043e-05.
Val Loss: 2932877.4228
lr:  6.104327436842394e-05


 73%|███████▎  | 200/274 [01:34<01:07,  1.10it/s]

Epoch: 124	Batch: 200	Avg-Loss: 2950043.5663


100%|██████████| 274/274 [02:09<00:00,  2.12it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

129.1335711479187


100%|██████████| 68/68 [00:28<00:00,  2.39it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 6.1043e-05.
Val Loss: 2936759.0037
lr:  6.104327436842394e-05


 73%|███████▎  | 200/274 [01:33<00:35,  2.09it/s]

Epoch: 125	Batch: 200	Avg-Loss: 2958004.1450


100%|██████████| 274/274 [02:08<00:00,  2.13it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

128.4921898841858


100%|██████████| 68/68 [00:28<00:00,  2.43it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 5.7991e-05.
Val Loss: 2935873.0809
lr:  5.799111065000274e-05


 73%|███████▎  | 200/274 [01:34<00:34,  2.17it/s]

Epoch: 126	Batch: 200	Avg-Loss: 2949648.4975


100%|██████████| 274/274 [02:09<00:00,  2.12it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

129.32679677009583


100%|██████████| 68/68 [00:27<00:00,  2.46it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 5.7991e-05.
Val Loss: 2925299.7978
lr:  5.799111065000274e-05


 73%|███████▎  | 200/274 [01:32<00:33,  2.22it/s]

Epoch: 127	Batch: 200	Avg-Loss: 2951823.6650


100%|██████████| 274/274 [02:06<00:00,  2.16it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

126.91982626914978


100%|██████████| 68/68 [00:27<00:00,  2.44it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 5.7991e-05.
Val Loss: 2929179.1949
lr:  5.799111065000274e-05


 73%|███████▎  | 200/274 [01:33<00:33,  2.19it/s]

Epoch: 128	Batch: 200	Avg-Loss: 2947586.4050


100%|██████████| 274/274 [02:08<00:00,  2.13it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

128.86837673187256


100%|██████████| 68/68 [00:28<00:00,  2.34it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 5.5092e-05.
Val Loss: 2936041.2684
lr:  5.5091555117502596e-05


 73%|███████▎  | 200/274 [01:34<00:33,  2.22it/s]

Epoch: 129	Batch: 200	Avg-Loss: 2944612.5925


100%|██████████| 274/274 [02:10<00:00,  2.10it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

130.2688386440277


100%|██████████| 68/68 [00:28<00:00,  2.41it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 5.5092e-05.
Val Loss: 2925663.3309
lr:  5.5091555117502596e-05


 73%|███████▎  | 200/274 [01:33<00:34,  2.17it/s]

Epoch: 130	Batch: 200	Avg-Loss: 2957382.4413


100%|██████████| 274/274 [02:07<00:00,  2.14it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

127.774982213974


100%|██████████| 68/68 [00:27<00:00,  2.45it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 5.5092e-05.
Val Loss: 2932403.2904
lr:  5.5091555117502596e-05


 73%|███████▎  | 200/274 [01:35<00:34,  2.13it/s]

Epoch: 131	Batch: 200	Avg-Loss: 2954552.6575


100%|██████████| 274/274 [02:10<00:00,  2.10it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

130.66354942321777


100%|██████████| 68/68 [00:28<00:00,  2.41it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 5.2337e-05.
Val Loss: 2935091.6618
lr:  5.2336977361627463e-05


 73%|███████▎  | 200/274 [01:37<00:34,  2.16it/s]

Epoch: 132	Batch: 200	Avg-Loss: 2943474.0450


100%|██████████| 274/274 [02:11<00:00,  2.08it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

131.938903093338


100%|██████████| 68/68 [00:28<00:00,  2.38it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 5.2337e-05.
Val Loss: 2934165.0699
lr:  5.2336977361627463e-05


 73%|███████▎  | 200/274 [01:34<00:34,  2.15it/s]

Epoch: 133	Batch: 200	Avg-Loss: 2949208.4575


100%|██████████| 274/274 [02:10<00:00,  2.10it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

130.20588564872742


100%|██████████| 68/68 [00:28<00:00,  2.37it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 5.2337e-05.
Val Loss: 2935163.3088
lr:  5.2336977361627463e-05


 73%|███████▎  | 200/274 [01:34<00:34,  2.18it/s]

Epoch: 134	Batch: 200	Avg-Loss: 2950963.1950


100%|██████████| 274/274 [02:10<00:00,  2.11it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

130.16524577140808


100%|██████████| 68/68 [00:28<00:00,  2.37it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 4.9720e-05.
Val Loss: 2932000.2978
lr:  4.972012849354609e-05


 73%|███████▎  | 200/274 [01:36<00:49,  1.50it/s]

Epoch: 135	Batch: 200	Avg-Loss: 2963077.8825


100%|██████████| 274/274 [02:12<00:00,  2.07it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

132.3241012096405


100%|██████████| 68/68 [00:28<00:00,  2.38it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 4.9720e-05.
Val Loss: 2934790.8162
lr:  4.972012849354609e-05


 73%|███████▎  | 200/274 [01:36<00:34,  2.14it/s]

Epoch: 136	Batch: 200	Avg-Loss: 2947292.1725


100%|██████████| 274/274 [02:11<00:00,  2.08it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

131.59843468666077


100%|██████████| 68/68 [00:28<00:00,  2.42it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 4.9720e-05.
Val Loss: 2937220.1507
lr:  4.972012849354609e-05


 73%|███████▎  | 200/274 [01:36<00:35,  2.09it/s]

Epoch: 137	Batch: 200	Avg-Loss: 2967016.1362


100%|██████████| 274/274 [02:11<00:00,  2.09it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

131.2907259464264


100%|██████████| 68/68 [00:28<00:00,  2.41it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 4.7234e-05.
Val Loss: 2934404.0368
lr:  4.723412206886878e-05


 73%|███████▎  | 200/274 [01:35<00:36,  2.04it/s]

Epoch: 138	Batch: 200	Avg-Loss: 2946219.9287


100%|██████████| 274/274 [02:11<00:00,  2.09it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

131.08803701400757


100%|██████████| 68/68 [00:28<00:00,  2.40it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 4.7234e-05.
Val Loss: 2932414.7243
lr:  4.723412206886878e-05


 73%|███████▎  | 200/274 [01:35<00:33,  2.20it/s]

Epoch: 139	Batch: 200	Avg-Loss: 2952134.4525


100%|██████████| 274/274 [02:11<00:00,  2.09it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

131.17773509025574


100%|██████████| 68/68 [00:28<00:00,  2.41it/s]
  0%|          | 0/274 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 4.7234e-05.
Val Loss: 2931021.9926
lr:  4.723412206886878e-05


 73%|███████▎  | 200/274 [01:33<00:33,  2.22it/s]

Epoch: 140	Batch: 200	Avg-Loss: 2939011.1550


100%|██████████| 274/274 [02:07<00:00,  2.15it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

127.25381970405579


100%|██████████| 68/68 [00:28<00:00,  2.39it/s]

Adjusting learning rate of group 0 to 4.4872e-05.
Val Loss: 2934961.6581
lr:  4.487241596542534e-05



