In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import timm
import torch.optim as optim
import numpy as np
import pandas as pd
from tqdm import tqdm
torch.manual_seed(1)

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
cpu = torch.device('cpu')
print('Running on device: {}'.format(device))

Running on device: cuda:0


In [2]:
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()
        
    def forward(self, x):
        return x

In [3]:
import torch.nn as nn

class DFDCNet(nn.Module):
    def __init__(self, input_size, output_size, hidden_dim, n_layers, drop_prob=0.5):
        super(DFDCNet, self).__init__()
        self.output_size = output_size
        self.n_layers = n_layers
        self.hidden_dim = hidden_dim
        
        self.mixnet = timm.create_model("mixnet_s", pretrained=True)
        self.mixnet.classifier = Identity()
#         l = 0
#         for param in self.mixnet.parameters():
#             l = l + 1
#             if l > 200:
#                 param.requires_grad = True
                
        self.lstm = nn.LSTM(1536, hidden_dim, n_layers, dropout=drop_prob, batch_first=True)
        self.dropout = nn.Dropout(0.5)
        self.batchnorm = nn.BatchNorm1d(hidden_dim)
        self.elu = nn.ELU()
        self.fc1 = nn.Linear(hidden_dim, 32)
#         self.fc2 = nn.Linear(64, 32)
#         self.fc3 = nn.Linear(32, 16)
        self.fc4 = nn.Linear(32, output_size)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x, hidden):
        batch_size, seqlen, c, h, w = x.size()
        x = x.reshape(batch_size*seqlen, c, h, w).float()
        x = self.mixnet(x)
        x = x.reshape(batch_size, seqlen, x.shape[1])
        lstm_out, hidden = self.lstm(x, hidden)
        lstm_out = lstm_out.contiguous().view(-1, self.hidden_dim)
        
        out = self.dropout(lstm_out)
#         out = self.batchnorm(out)
        out = self.fc1(out)
        out = self.elu(out)
#         out = self.fc2(out)
#         out = self.elu(out)
#         out = self.fc3(out)
#         out = self.elu(out)
        out = self.fc4(out)
        out = self.sigmoid(out)
        
        out = out.view(batch_size, -1)
        out = out[:,-1]
        return out, hidden
    
    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data
        hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(device),
                      weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(device))
        return hidden

In [4]:
#  !ls -ln data_images | head -n 50

In [5]:
for p in np.arange(0, 50):
    if p == 0:
        metadata = torch.load('data_images/000metadata_part_' + str(p) + '.pt', map_location = cpu)
    else:
        metadata_p = torch.load('data_images/000metadata_part_' + str(p) + '.pt', map_location = cpu)
        metadata = pd.concat([metadata, metadata_p])

In [6]:
metadata.n_face.value_counts()

1    104891
2      8879
0      5076
3       308
Name: n_face, dtype: int64

In [7]:
metadata.head()

Unnamed: 0,label,original,split,n_face
owxbbpjpch.mp4,FAKE,wynotylpnm.mp4,train,1
vpmyeepbep.mp4,REAL,,train,1
fzvpbrzssi.mp4,REAL,,train,1
htorvhbcae.mp4,FAKE,wclvkepakb.mp4,train,1
fckxaqjbxk.mp4,FAKE,vpmyeepbep.mp4,train,1


In [8]:
X = np.array(metadata.index[metadata.n_face == 1])
Y = np.array(1 * (metadata.label[metadata.n_face == 1] == 'REAL'))

In [9]:
X[0:5]

array(['owxbbpjpch.mp4', 'vpmyeepbep.mp4', 'fzvpbrzssi.mp4',
       'htorvhbcae.mp4', 'fckxaqjbxk.mp4'], dtype=object)

In [10]:
Y[0:5]

array([0, 1, 1, 0, 0])

In [11]:
len(X)

104891

In [12]:
len(Y)

104891

In [13]:
n_videos = len(X)
n_videos_train = int(1000 * 80)
n_videos_val = int(100 * (np.floor((n_videos - n_videos_train)/100)))
print(str(n_videos_train) + ' for training')
print(str(n_videos_val) + ' for validation')

80000 for training
24800 for validation


In [14]:
from torch.utils import data
class Dataset(data.Dataset):
    'Characterizes a dataset for PyTorch'
    def __init__(self, list_IDs, labels):
        'Initialization'
        self.labels = labels
        self.list_IDs = list_IDs

    def __len__(self):
        'Denotes the total number of samples'
        return len(self.list_IDs)

    def __getitem__(self, index):
        'Generates one sample of data'
        # Select sample
        ID = self.list_IDs[index]
        # Load data and get label
        X = torch.load('data_images/1face_X_'+ ID + '.pt', map_location = cpu)
        y = self.labels[index]

        return X[[0,2,4,6,8]], y

In [15]:
from torch.utils.data import TensorDataset, DataLoader
dataset = Dataset(X[0:n_videos_train + n_videos_val], Y[0:n_videos_train + n_videos_val])
train_data, val_data = torch.utils.data.random_split(dataset, [n_videos_train, n_videos_val])
train_batch_size = 40
val_batch_size = 25
train_loader = DataLoader(train_data, shuffle=True, batch_size=train_batch_size, num_workers = 8)
val_loader = DataLoader(val_data, shuffle=True, batch_size=val_batch_size, num_workers = 8)

In [16]:
input_size = 512
output_size = 1
hidden_dim = 512
n_layers = 2

model = DFDCNet(input_size, output_size, hidden_dim, n_layers)
model.to(device)

DFDCNet(
  (mixnet): EfficientNet(
    (conv_stem): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act1): ReLU(inplace=True)
    (blocks): Sequential(
      (0): Sequential(
        (0): DepthwiseSeparableConv(
          (conv_dw): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
          (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act1): ReLU(inplace=True)
          (conv_pw): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act2): Identity()
        )
      )
      (1): Sequential(
        (0): InvertedResidual(
          (conv_pw): MixedConv2d(
            (0): Conv2d(8, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (1): Conv2d(8,

In [17]:
param_optimizer = list(model.named_parameters())

train_criterion = nn.BCELoss()
val_criterion = nn.BCELoss()

optimizer = optim.Adam(model.parameters(), lr=0.0001)

In [18]:
# scheduler
from torch.optim.lr_scheduler import ReduceLROnPlateau
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=500, verbose=True)

In [19]:
torch.cuda.empty_cache() 

In [20]:
! rm log_training.log

In [21]:
import logging
logging.basicConfig(filename = 'log_training.log',level = logging.INFO)


In [22]:

from datetime import datetime

now = datetime.now()

current_time = now.strftime("%H:%M:%S")
print("Current Time =", current_time)

Current Time = 23:42:18


In [23]:
epochs = 5
counter = 0
print_every = 1000
clip = .5
valid_loss_min = .15
val_loss = torch.tensor(np.Inf)
model.train()
for i in range(epochs):
    h = model.init_hidden(train_batch_size)
    for inputs, labels in tqdm(train_loader):

        counter += 1
        h = tuple([e.data for e in h])
        inputs, labels = inputs.to(device), labels.to(device)
        model.zero_grad()
        output, h = model(inputs, h)
        loss = train_criterion(output.squeeze(), labels.float())
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), clip)
        optimizer.step()
        torch.cuda.empty_cache() 
#         print(counter)
        if counter % 100 == 0:
            logging.info("Time: {}...".format(datetime.now().strftime("%H:%M:%S")) + 
                         "Epoch: {}/{}...".format(i+1, epochs) +  
                         "Step: {}...".format(counter) +
                         "Loss: {:.6f}...".format(loss.item())) 
        
        if counter%print_every == 0:
            logging.info(str(counter))
            val_h = model.init_hidden(val_batch_size)
            val_losses = []
            model.eval()
            for inp, lab in tqdm(val_loader):
                
                val_h = tuple([each.data for each in val_h])
                inp, lab = inp.to(device), lab.to(device)
                out, val_h = model(inp, val_h)
                val_loss = val_criterion(out.squeeze(), lab.float())
                val_losses.append(val_loss.item())
            model.train()
            print("Time: {}...".format(datetime.now().strftime("%H:%M:%S")) + 
                  "Epoch: {}/{}...".format(i+1, epochs),
                  "Step: {}...".format(counter),
                  "Loss: {:.6f}...".format(loss.item()),
                  "Val Loss: {:.6f}".format(np.mean(val_losses)))
            logging.info("Time: {}...".format(datetime.now().strftime("%H:%M:%S")) + 
                         "Epoch: {}/{}...".format(i+1, epochs) +  
                         "Step: {}...".format(counter) +
                         "Loss: {:.6f}...".format(loss.item()) +
                         "Val Loss: {:.6f}".format(np.mean(val_losses)))
            if np.mean(val_losses) <= valid_loss_min:
                torch.save(model.state_dict(), './model_1face_unfroze.pt')
                print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(valid_loss_min,np.mean(val_losses)))
                logging.info('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(valid_loss_min,np.mean(val_losses)))
                valid_loss_min = np.mean(val_losses)
                torch.cuda.empty_cache() 
    scheduler.step(val_loss.item())


 50%|████▉     | 999/2000 [22:27<13:15,  1.26it/s]  
  0%|          | 0/992 [00:00<?, ?it/s][A
  0%|          | 1/992 [00:10<2:48:30, 10.20s/it][A
  0%|          | 2/992 [00:10<1:58:37,  7.19s/it][A
  0%|          | 3/992 [00:10<1:23:28,  5.06s/it][A
  0%|          | 4/992 [00:10<58:52,  3.58s/it]  [A
  1%|          | 6/992 [00:10<41:28,  2.52s/it][A
  1%|          | 7/992 [00:10<29:31,  1.80s/it][A
  1%|          | 8/992 [00:10<21:09,  1.29s/it][A
  1%|          | 9/992 [00:19<55:38,  3.40s/it][A
  1%|          | 10/992 [00:19<39:30,  2.41s/it][A
  1%|          | 12/992 [00:19<27:57,  1.71s/it][A
  1%|▏         | 14/992 [00:19<20:02,  1.23s/it][A
  2%|▏         | 16/992 [00:19<14:19,  1.14it/s][A
  2%|▏         | 18/992 [00:27<29:02,  1.79s/it][A
  2%|▏         | 20/992 [00:27<20:40,  1.28s/it][A
  2%|▏         | 22/992 [00:27<14:48,  1.09it/s][A
  2%|▏         | 24/992 [00:28<10:44,  1.50it/s][A
  3%|▎         | 26/992 [00:35<25:11,  1.57s/it][A
  3%|▎         | 28/

 25%|██▍       | 245/992 [04:19<11:45,  1.06it/s][A
 25%|██▍       | 247/992 [04:19<08:31,  1.46it/s][A
 25%|██▌       | 249/992 [04:26<19:11,  1.55s/it][A
 25%|██▌       | 250/992 [04:27<15:15,  1.23s/it][A
 25%|██▌       | 252/992 [04:27<11:00,  1.12it/s][A
 26%|██▌       | 254/992 [04:27<07:55,  1.55it/s][A
 26%|██▌       | 256/992 [04:27<06:02,  2.03it/s][A
 26%|██▌       | 257/992 [04:34<29:15,  2.39s/it][A
 26%|██▌       | 258/992 [04:36<26:20,  2.15s/it][A
 26%|██▌       | 260/992 [04:36<18:40,  1.53s/it][A
 26%|██▋       | 262/992 [04:36<13:19,  1.10s/it][A
 27%|██▋       | 264/992 [04:36<09:37,  1.26it/s][A
 27%|██▋       | 265/992 [04:43<32:58,  2.72s/it][A
 27%|██▋       | 266/992 [04:45<28:34,  2.36s/it][A
 27%|██▋       | 268/992 [04:45<20:18,  1.68s/it][A
 27%|██▋       | 270/992 [04:45<14:24,  1.20s/it][A
 27%|██▋       | 272/992 [04:45<10:19,  1.16it/s][A
 28%|██▊       | 274/992 [04:53<21:23,  1.79s/it][A
 28%|██▊       | 276/992 [04:53<15:09,  1.27s/

 54%|█████▍    | 539/992 [10:01<10:44,  1.42s/it][A
 55%|█████▍    | 541/992 [10:01<07:37,  1.01s/it][A
 55%|█████▍    | 543/992 [10:01<05:28,  1.37it/s][A
 55%|█████▍    | 545/992 [10:10<13:29,  1.81s/it][A
 55%|█████▌    | 547/992 [10:10<09:33,  1.29s/it][A
 55%|█████▌    | 548/992 [10:10<06:56,  1.07it/s][A
 55%|█████▌    | 550/992 [10:10<04:59,  1.48it/s][A
 56%|█████▌    | 552/992 [10:10<03:38,  2.02it/s][A
 56%|█████▌    | 554/992 [10:19<12:08,  1.66s/it][A
 56%|█████▌    | 556/992 [10:19<08:37,  1.19s/it][A
 56%|█████▋    | 558/992 [10:19<06:08,  1.18it/s][A
 56%|█████▋    | 560/992 [10:19<04:25,  1.62it/s][A
 57%|█████▋    | 562/992 [10:29<13:55,  1.94s/it][A
 57%|█████▋    | 564/992 [10:29<09:51,  1.38s/it][A
 57%|█████▋    | 566/992 [10:30<07:00,  1.01it/s][A
 57%|█████▋    | 568/992 [10:30<05:02,  1.40it/s][A
 57%|█████▋    | 569/992 [10:40<26:06,  3.70s/it][A
 58%|█████▊    | 571/992 [10:41<18:20,  2.61s/it][A
 58%|█████▊    | 573/992 [10:41<12:58,  1.86s/

 85%|████████▍ | 839/992 [17:39<02:47,  1.09s/it][A
 85%|████████▍ | 839/992 [17:52<02:47,  1.09s/it][A
 85%|████████▍ | 841/992 [17:55<07:56,  3.16s/it][A
 85%|████████▍ | 843/992 [17:55<05:32,  2.23s/it][A
 85%|████████▌ | 845/992 [17:55<03:52,  1.58s/it][A
 85%|████████▌ | 847/992 [17:55<02:43,  1.13s/it][A
 86%|████████▌ | 849/992 [18:12<07:56,  3.33s/it][A
 86%|████████▌ | 851/992 [18:12<05:31,  2.35s/it][A
 86%|████████▌ | 852/992 [18:12<03:54,  1.68s/it][A
 86%|████████▌ | 854/992 [18:12<02:44,  1.19s/it][A
 86%|████████▋ | 856/992 [18:13<01:56,  1.17it/s][A
 86%|████████▋ | 858/992 [18:28<06:33,  2.94s/it][A
 87%|████████▋ | 860/992 [18:28<04:33,  2.07s/it][A
 87%|████████▋ | 862/992 [18:28<03:11,  1.47s/it][A
 87%|████████▋ | 864/992 [18:29<02:14,  1.05s/it][A
 87%|████████▋ | 865/992 [18:44<11:15,  5.32s/it][A
 87%|████████▋ | 867/992 [18:44<07:48,  3.75s/it][A
 88%|████████▊ | 869/992 [18:44<05:24,  2.64s/it][A
 88%|████████▊ | 871/992 [18:44<03:46,  1.87s/

Time: 00:27:34...Epoch: 1/5... Step: 1000... Loss: 0.075605... Val Loss: 0.133982
Validation loss decreased (0.150000 --> 0.133982).  Saving model ...


100%|█████████▉| 1999/2000 [1:32:51<00:01,  1.62s/it]   
  0%|          | 0/992 [00:00<?, ?it/s][A
  0%|          | 1/992 [00:07<2:10:32,  7.90s/it][A
  0%|          | 3/992 [00:08<1:31:35,  5.56s/it][A
  1%|          | 5/992 [00:08<1:04:22,  3.91s/it][A
  1%|          | 6/992 [00:08<45:34,  2.77s/it]  [A
  1%|          | 8/992 [00:08<32:10,  1.96s/it][A
  1%|          | 9/992 [00:14<53:20,  3.26s/it][A
  1%|          | 11/992 [00:14<37:39,  2.30s/it][A
  1%|▏         | 13/992 [00:15<26:42,  1.64s/it][A
  1%|▏         | 14/992 [00:15<19:14,  1.18s/it][A
  2%|▏         | 16/992 [00:15<13:46,  1.18it/s][A
  2%|▏         | 18/992 [00:21<25:33,  1.57s/it][A
  2%|▏         | 20/992 [00:22<18:17,  1.13s/it][A
  2%|▏         | 22/992 [00:22<13:06,  1.23it/s][A
  2%|▏         | 24/992 [00:22<09:39,  1.67it/s][A
  3%|▎         | 26/992 [00:29<22:44,  1.41s/it][A
  3%|▎         | 28/992 [00:29<16:14,  1.01s/it][A
  3%|▎         | 30/992 [00:29<11:42,  1.37it/s][A
  3%|▎        

 26%|██▌       | 253/992 [03:38<10:08,  1.22it/s][A
 26%|██▌       | 255/992 [03:38<07:19,  1.68it/s][A
 26%|██▌       | 257/992 [03:42<12:47,  1.04s/it][A
 26%|██▌       | 258/992 [03:43<13:34,  1.11s/it][A
 26%|██▌       | 259/992 [03:45<13:46,  1.13s/it][A
 26%|██▋       | 261/992 [03:45<09:54,  1.23it/s][A
 27%|██▋       | 263/992 [03:45<07:09,  1.70it/s][A
 27%|██▋       | 265/992 [03:49<12:09,  1.00s/it][A
 27%|██▋       | 266/992 [03:50<12:42,  1.05s/it][A
 27%|██▋       | 267/992 [03:51<13:28,  1.12s/it][A
 27%|██▋       | 269/992 [03:51<09:41,  1.24it/s][A
 27%|██▋       | 271/992 [03:52<07:03,  1.70it/s][A
 28%|██▊       | 273/992 [03:56<11:58,  1.00it/s][A
 28%|██▊       | 274/992 [03:57<13:04,  1.09s/it][A
 28%|██▊       | 275/992 [03:58<13:12,  1.11s/it][A
 28%|██▊       | 277/992 [03:58<09:29,  1.26it/s][A
 28%|██▊       | 279/992 [03:58<06:54,  1.72it/s][A
 28%|██▊       | 281/992 [04:02<11:16,  1.05it/s][A
 28%|██▊       | 282/992 [04:03<13:03,  1.10s/

 48%|████▊     | 476/992 [06:45<10:08,  1.18s/it][A
 48%|████▊     | 478/992 [06:46<07:14,  1.18it/s][A
 48%|████▊     | 480/992 [06:46<05:15,  1.62it/s][A
 48%|████▊     | 481/992 [06:48<09:33,  1.12s/it][A
 49%|████▊     | 482/992 [06:50<11:39,  1.37s/it][A
 49%|████▊     | 483/992 [06:52<13:37,  1.61s/it][A
 49%|████▉     | 484/992 [06:52<09:46,  1.15s/it][A
 49%|████▉     | 486/992 [06:52<06:59,  1.21it/s][A
 49%|████▉     | 488/992 [06:53<05:04,  1.65it/s][A
 49%|████▉     | 489/992 [06:55<09:25,  1.12s/it][A
 49%|████▉     | 490/992 [06:57<10:57,  1.31s/it][A
 49%|████▉     | 491/992 [06:59<13:16,  1.59s/it][A
 50%|████▉     | 493/992 [06:59<09:27,  1.14s/it][A
 50%|████▉     | 494/992 [06:59<06:52,  1.21it/s][A
 50%|█████     | 496/992 [06:59<04:58,  1.66it/s][A
 50%|█████     | 497/992 [07:01<08:35,  1.04s/it][A
 50%|█████     | 498/992 [07:03<10:32,  1.28s/it][A
 50%|█████     | 499/992 [07:05<12:29,  1.52s/it][A
 51%|█████     | 501/992 [07:05<08:55,  1.09s/

 71%|███████   | 705/992 [09:57<04:54,  1.02s/it][A
 71%|███████   | 706/992 [09:59<06:06,  1.28s/it][A
 71%|███████▏  | 707/992 [10:00<06:06,  1.29s/it][A
 71%|███████▏  | 709/992 [10:00<04:20,  1.08it/s][A
 72%|███████▏  | 711/992 [10:00<03:07,  1.50it/s][A
 72%|███████▏  | 713/992 [10:04<04:26,  1.05it/s][A
 72%|███████▏  | 714/992 [10:05<05:28,  1.18s/it][A
 72%|███████▏  | 715/992 [10:07<05:57,  1.29s/it][A
 72%|███████▏  | 717/992 [10:07<04:15,  1.07it/s][A
 72%|███████▏  | 719/992 [10:07<03:03,  1.49it/s][A
 73%|███████▎  | 720/992 [10:07<02:16,  1.99it/s][A
 73%|███████▎  | 721/992 [10:10<05:18,  1.18s/it][A
 73%|███████▎  | 722/992 [10:12<05:47,  1.29s/it][A
 73%|███████▎  | 723/992 [10:13<06:27,  1.44s/it][A
 73%|███████▎  | 725/992 [10:14<04:36,  1.04s/it][A
 73%|███████▎  | 727/992 [10:14<03:17,  1.34it/s][A
 73%|███████▎  | 728/992 [10:14<02:26,  1.80it/s][A
 73%|███████▎  | 729/992 [10:16<05:13,  1.19s/it][A
 74%|███████▎  | 730/992 [10:18<06:04,  1.39s/

 99%|█████████▊| 978/992 [13:49<00:17,  1.28s/it][A
 99%|█████████▉| 980/992 [13:49<00:11,  1.09it/s][A
 99%|█████████▉| 982/992 [13:49<00:06,  1.50it/s][A
 99%|█████████▉| 984/992 [13:49<00:03,  2.05it/s][A
 99%|█████████▉| 986/992 [13:55<00:07,  1.28s/it][A
100%|█████████▉| 988/992 [13:55<00:03,  1.09it/s][A
100%|█████████▉| 990/992 [13:56<00:01,  1.51it/s][A
100%|██████████| 2000/2000 [1:46:48<00:00, 252.20s/it]

Time: 01:29:06...Epoch: 1/5... Step: 2000... Loss: 0.072664... Val Loss: 0.108946
Validation loss decreased (0.133982 --> 0.108946).  Saving model ...



 50%|████▉     | 999/2000 [23:44<15:25,  1.08it/s]  
  0%|          | 0/992 [00:00<?, ?it/s][A
  0%|          | 1/992 [00:09<2:34:06,  9.33s/it][A
  0%|          | 3/992 [00:09<1:48:05,  6.56s/it][A
  1%|          | 5/992 [00:09<1:15:50,  4.61s/it][A
  1%|          | 6/992 [00:09<53:43,  3.27s/it]  [A
  1%|          | 8/992 [00:09<37:51,  2.31s/it][A
  1%|          | 9/992 [00:17<1:03:33,  3.88s/it][A
  1%|          | 11/992 [00:17<44:49,  2.74s/it] [A
  1%|▏         | 13/992 [00:17<31:42,  1.94s/it][A
  1%|▏         | 14/992 [00:17<22:49,  1.40s/it][A
  2%|▏         | 16/992 [00:18<16:19,  1.00s/it][A
  2%|▏         | 17/992 [00:24<41:24,  2.55s/it][A
  2%|▏         | 18/992 [00:24<29:36,  1.82s/it][A
  2%|▏         | 20/992 [00:24<21:19,  1.32s/it][A
  2%|▏         | 22/992 [00:24<15:14,  1.06it/s][A
  2%|▏         | 24/992 [00:24<11:08,  1.45it/s][A
  3%|▎         | 25/992 [00:30<35:55,  2.23s/it][A
  3%|▎         | 26/992 [00:31<26:21,  1.64s/it][A
  3%|▎        

 29%|██▊       | 284/992 [03:58<10:17,  1.15it/s][A
 29%|██▉       | 286/992 [03:58<07:30,  1.57it/s][A
 29%|██▉       | 288/992 [03:58<05:32,  2.12it/s][A
 29%|██▉       | 289/992 [04:03<20:40,  1.76s/it][A
 29%|██▉       | 290/992 [04:04<17:16,  1.48s/it][A
 29%|██▉       | 292/992 [04:04<13:05,  1.12s/it][A
 30%|██▉       | 294/992 [04:04<09:25,  1.23it/s][A
 30%|██▉       | 296/992 [04:05<06:48,  1.70it/s][A
 30%|██▉       | 297/992 [04:10<21:46,  1.88s/it][A
 30%|███       | 298/992 [04:10<18:16,  1.58s/it][A
 30%|███       | 300/992 [04:11<13:14,  1.15s/it][A
 30%|███       | 302/992 [04:11<09:28,  1.21it/s][A
 31%|███       | 304/992 [04:11<06:51,  1.67it/s][A
 31%|███       | 305/992 [04:16<22:25,  1.96s/it][A
 31%|███       | 306/992 [04:17<17:50,  1.56s/it][A
 31%|███       | 308/992 [04:17<13:07,  1.15s/it][A
 31%|███▏      | 310/992 [04:17<09:24,  1.21it/s][A
 31%|███▏      | 312/992 [04:17<06:47,  1.67it/s][A
 32%|███▏      | 313/992 [04:22<22:01,  1.95s/

 55%|█████▍    | 543/992 [07:24<03:23,  2.21it/s][A
 55%|█████▍    | 545/992 [07:29<08:49,  1.19s/it][A
 55%|█████▌    | 547/992 [07:30<06:17,  1.18it/s][A
 55%|█████▌    | 549/992 [07:30<04:39,  1.58it/s][A
 55%|█████▌    | 550/992 [07:30<03:31,  2.09it/s][A
 56%|█████▌    | 552/992 [07:30<02:35,  2.83it/s][A
 56%|█████▌    | 554/992 [07:36<08:24,  1.15s/it][A
 56%|█████▌    | 555/992 [07:36<06:07,  1.19it/s][A
 56%|█████▌    | 556/992 [07:36<04:30,  1.61it/s][A
 56%|█████▋    | 558/992 [07:36<03:18,  2.19it/s][A
 56%|█████▋    | 560/992 [07:37<02:29,  2.89it/s][A
 57%|█████▋    | 562/992 [07:42<07:59,  1.12s/it][A
 57%|█████▋    | 564/992 [07:43<05:44,  1.24it/s][A
 57%|█████▋    | 566/992 [07:43<04:11,  1.70it/s][A
 57%|█████▋    | 568/992 [07:43<03:03,  2.31it/s][A
 57%|█████▋    | 570/992 [07:49<08:36,  1.22s/it][A
 58%|█████▊    | 571/992 [07:49<06:14,  1.12it/s][A
 58%|█████▊    | 573/992 [07:49<04:29,  1.55it/s][A
 58%|█████▊    | 575/992 [07:49<03:18,  2.10it

 85%|████████▍ | 839/992 [11:17<01:10,  2.16it/s][A
 85%|████████▍ | 841/992 [11:23<03:00,  1.19s/it][A
 85%|████████▍ | 843/992 [11:23<02:08,  1.16it/s][A
 85%|████████▌ | 845/992 [11:23<01:31,  1.60it/s][A
 85%|████████▌ | 847/992 [11:23<01:07,  2.16it/s][A
 86%|████████▌ | 849/992 [11:29<02:47,  1.17s/it][A
 86%|████████▌ | 851/992 [11:29<01:58,  1.19it/s][A
 86%|████████▌ | 853/992 [11:29<01:24,  1.64it/s][A
 86%|████████▌ | 855/992 [11:29<01:03,  2.17it/s][A
 86%|████████▋ | 857/992 [11:35<02:34,  1.14s/it][A
 87%|████████▋ | 859/992 [11:35<01:49,  1.21it/s][A
 87%|████████▋ | 861/992 [11:35<01:18,  1.68it/s][A
 87%|████████▋ | 863/992 [11:36<01:03,  2.04it/s][A
 87%|████████▋ | 865/992 [11:41<02:24,  1.13s/it][A
 87%|████████▋ | 867/992 [11:41<01:42,  1.22it/s][A
 88%|████████▊ | 869/992 [11:41<01:12,  1.69it/s][A
 88%|████████▊ | 871/992 [11:42<00:59,  2.04it/s][A
 88%|████████▊ | 873/992 [11:47<02:16,  1.15s/it][A
 88%|████████▊ | 875/992 [11:47<01:36,  1.21it

Time: 02:06:08...Epoch: 2/5... Step: 3000... Loss: 0.010556... Val Loss: 0.111206


100%|█████████▉| 1999/2000 [1:09:11<00:01,  1.39s/it]  
  0%|          | 0/992 [00:00<?, ?it/s][A
  0%|          | 1/992 [00:07<2:02:41,  7.43s/it][A
  0%|          | 2/992 [00:07<1:26:24,  5.24s/it][A
  0%|          | 4/992 [00:07<1:00:47,  3.69s/it][A
  1%|          | 6/992 [00:07<42:52,  2.61s/it]  [A
  1%|          | 8/992 [00:08<30:22,  1.85s/it][A
  1%|          | 9/992 [00:14<51:30,  3.14s/it][A
  1%|          | 11/992 [00:14<36:20,  2.22s/it][A
  1%|          | 12/992 [00:14<26:40,  1.63s/it][A
  1%|▏         | 14/992 [00:14<19:00,  1.17s/it][A
  2%|▏         | 16/992 [00:14<13:35,  1.20it/s][A
  2%|▏         | 18/992 [00:21<25:11,  1.55s/it][A
  2%|▏         | 20/992 [00:21<17:59,  1.11s/it][A
  2%|▏         | 22/992 [00:21<12:56,  1.25it/s][A
  2%|▏         | 24/992 [00:21<09:31,  1.69it/s][A
  3%|▎         | 26/992 [00:28<21:58,  1.36s/it][A
  3%|▎         | 28/992 [00:28<15:42,  1.02it/s][A
  3%|▎         | 30/992 [00:28<11:20,  1.41it/s][A
  3%|▎         

 24%|██▍       | 241/992 [03:29<27:38,  2.21s/it][A
 24%|██▍       | 243/992 [03:29<19:33,  1.57s/it][A
 25%|██▍       | 245/992 [03:29<13:58,  1.12s/it][A
 25%|██▍       | 246/992 [03:29<11:24,  1.09it/s][A
 25%|██▌       | 248/992 [03:30<08:18,  1.49it/s][A
 25%|██▌       | 249/992 [03:36<27:47,  2.24s/it][A
 25%|██▌       | 251/992 [03:36<19:46,  1.60s/it][A
 26%|██▌       | 253/992 [03:36<14:04,  1.14s/it][A
 26%|██▌       | 254/992 [03:36<10:14,  1.20it/s][A
 26%|██▌       | 256/992 [03:36<07:23,  1.66it/s][A
 26%|██▌       | 258/992 [03:42<16:37,  1.36s/it][A
 26%|██▌       | 259/992 [03:42<12:06,  1.01it/s][A
 26%|██▋       | 261/992 [03:43<08:41,  1.40it/s][A
 27%|██▋       | 263/992 [03:43<06:25,  1.89it/s][A
 27%|██▋       | 265/992 [03:49<15:35,  1.29s/it][A
 27%|██▋       | 266/992 [03:49<11:17,  1.07it/s][A
 27%|██▋       | 268/992 [03:49<08:09,  1.48it/s][A
 27%|██▋       | 270/992 [03:49<05:57,  2.02it/s][A
 27%|██▋       | 272/992 [03:49<04:25,  2.71it

 54%|█████▍    | 536/992 [07:26<03:16,  2.32it/s][A
 54%|█████▍    | 538/992 [07:32<08:57,  1.18s/it][A
 54%|█████▍    | 540/992 [07:33<06:25,  1.17it/s][A
 55%|█████▍    | 542/992 [07:33<04:42,  1.59it/s][A
 55%|█████▍    | 544/992 [07:33<03:27,  2.16it/s][A
 55%|█████▌    | 546/992 [07:39<08:51,  1.19s/it][A
 55%|█████▌    | 547/992 [07:39<06:26,  1.15it/s][A
 55%|█████▌    | 549/992 [07:39<04:39,  1.59it/s][A
 56%|█████▌    | 551/992 [07:39<03:23,  2.16it/s][A
 56%|█████▌    | 553/992 [07:45<08:36,  1.18s/it][A
 56%|█████▌    | 555/992 [07:45<06:09,  1.18it/s][A
 56%|█████▌    | 557/992 [07:45<04:25,  1.64it/s][A
 56%|█████▋    | 559/992 [07:45<03:14,  2.23it/s][A
 57%|█████▋    | 561/992 [07:51<08:58,  1.25s/it][A
 57%|█████▋    | 563/992 [07:52<06:26,  1.11it/s][A
 57%|█████▋    | 565/992 [07:52<04:41,  1.51it/s][A
 57%|█████▋    | 567/992 [07:52<03:24,  2.08it/s][A
 57%|█████▋    | 569/992 [07:58<08:44,  1.24s/it][A
 58%|█████▊    | 571/992 [07:58<06:15,  1.12it

 84%|████████▍ | 831/992 [11:27<01:17,  2.09it/s][A
 84%|████████▍ | 833/992 [11:31<02:47,  1.06s/it][A
 84%|████████▍ | 834/992 [11:33<02:58,  1.13s/it][A
 84%|████████▍ | 835/992 [11:33<02:10,  1.20it/s][A
 84%|████████▍ | 837/992 [11:33<01:33,  1.66it/s][A
 85%|████████▍ | 839/992 [11:33<01:08,  2.23it/s][A
 85%|████████▍ | 841/992 [11:38<02:31,  1.01s/it][A
 85%|████████▍ | 842/992 [11:39<02:48,  1.13s/it][A
 85%|████████▌ | 844/992 [11:39<02:00,  1.23it/s][A
 85%|████████▌ | 846/992 [11:40<01:25,  1.70it/s][A
 85%|████████▌ | 847/992 [11:40<01:04,  2.23it/s][A
 86%|████████▌ | 849/992 [11:44<02:23,  1.00s/it][A
 86%|████████▌ | 850/992 [11:46<02:40,  1.13s/it][A
 86%|████████▌ | 852/992 [11:46<01:53,  1.23it/s][A
 86%|████████▌ | 854/992 [11:46<01:21,  1.70it/s][A
 86%|████████▋ | 856/992 [11:46<00:58,  2.31it/s][A
 86%|████████▋ | 858/992 [11:52<02:40,  1.20s/it][A
 87%|████████▋ | 860/992 [11:52<01:53,  1.16it/s][A
 87%|████████▋ | 862/992 [11:52<01:20,  1.60it

Time: 02:52:01...Epoch: 2/5... Step: 4000... Loss: 0.141025... Val Loss: 0.072334
Validation loss decreased (0.108946 --> 0.072334).  Saving model ...


100%|██████████| 2000/2000 [1:22:53<00:00, 247.71s/it]
 50%|████▉     | 999/2000 [24:45<13:18,  1.25it/s]  
  0%|          | 0/992 [00:00<?, ?it/s][A
  0%|          | 1/992 [00:09<2:44:28,  9.96s/it][A
  0%|          | 2/992 [00:10<1:55:34,  7.00s/it][A
  0%|          | 3/992 [00:10<1:24:00,  5.10s/it][A
  0%|          | 4/992 [00:10<59:20,  3.60s/it]  [A
  1%|          | 6/992 [00:10<41:51,  2.55s/it][A
  1%|          | 8/992 [00:11<29:35,  1.80s/it][A
  1%|          | 9/992 [00:17<54:05,  3.30s/it][A
  1%|          | 11/992 [00:19<41:50,  2.56s/it][A
  1%|          | 12/992 [00:19<29:49,  1.83s/it][A
  1%|▏         | 14/992 [00:19<21:11,  1.30s/it][A
  2%|▏         | 15/992 [00:19<15:21,  1.06it/s][A
  2%|▏         | 17/992 [00:25<24:19,  1.50s/it][A
  2%|▏         | 19/992 [00:26<19:54,  1.23s/it][A
  2%|▏         | 20/992 [00:26<14:29,  1.12it/s][A
  2%|▏         | 22/992 [00:26<10:26,  1.55it/s][A
  2%|▏         | 24/992 [00:27<07:41,  2.10it/s][A
  3%|▎         |

 23%|██▎       | 227/992 [03:16<18:10,  1.43s/it][A
 23%|██▎       | 229/992 [03:16<13:02,  1.03s/it][A
 23%|██▎       | 230/992 [03:18<16:14,  1.28s/it][A
 23%|██▎       | 232/992 [03:18<11:34,  1.09it/s][A
 23%|██▎       | 233/992 [03:22<24:34,  1.94s/it][A
 24%|██▎       | 235/992 [03:22<17:24,  1.38s/it][A
 24%|██▍       | 237/992 [03:23<12:39,  1.01s/it][A
 24%|██▍       | 238/992 [03:24<15:36,  1.24s/it][A
 24%|██▍       | 240/992 [03:25<11:07,  1.13it/s][A
 24%|██▍       | 241/992 [03:29<22:31,  1.80s/it][A
 24%|██▍       | 243/992 [03:29<16:01,  1.28s/it][A
 25%|██▍       | 245/992 [03:29<11:35,  1.07it/s][A
 25%|██▍       | 246/992 [03:31<14:11,  1.14s/it][A
 25%|██▌       | 248/992 [03:31<10:10,  1.22it/s][A
 25%|██▌       | 249/992 [03:35<21:35,  1.74s/it][A
 25%|██▌       | 251/992 [03:35<15:22,  1.24s/it][A
 26%|██▌       | 253/992 [03:35<11:11,  1.10it/s][A
 26%|██▌       | 254/992 [03:37<14:27,  1.18s/it][A
 26%|██▌       | 256/992 [03:37<10:23,  1.18it

 46%|████▋     | 461/992 [06:28<09:00,  1.02s/it][A
 47%|████▋     | 462/992 [06:30<10:47,  1.22s/it][A
 47%|████▋     | 464/992 [06:30<07:41,  1.14it/s][A
 47%|████▋     | 465/992 [06:34<16:06,  1.83s/it][A
 47%|████▋     | 466/992 [06:34<11:32,  1.32s/it][A
 47%|████▋     | 467/992 [06:35<08:57,  1.02s/it][A
 47%|████▋     | 469/992 [06:35<06:46,  1.29it/s][A
 47%|████▋     | 470/992 [06:37<09:31,  1.09s/it][A
 48%|████▊     | 472/992 [06:37<06:47,  1.28it/s][A
 48%|████▊     | 473/992 [06:41<14:46,  1.71s/it][A
 48%|████▊     | 475/992 [06:42<11:19,  1.31s/it][A
 48%|████▊     | 477/992 [06:42<08:06,  1.06it/s][A
 48%|████▊     | 478/992 [06:43<10:05,  1.18s/it][A
 48%|████▊     | 480/992 [06:44<07:14,  1.18it/s][A
 48%|████▊     | 481/992 [06:48<15:06,  1.77s/it][A
 49%|████▊     | 483/992 [06:48<11:40,  1.38s/it][A
 49%|████▉     | 485/992 [06:49<08:23,  1.01it/s][A
 49%|████▉     | 486/992 [06:50<09:53,  1.17s/it][A
 49%|████▉     | 488/992 [06:50<07:06,  1.18it

 71%|███████   | 701/992 [09:53<05:02,  1.04s/it][A
 71%|███████   | 702/992 [09:54<05:28,  1.13s/it][A
 71%|███████   | 704/992 [09:54<03:56,  1.22it/s][A
 71%|███████   | 705/992 [09:58<08:44,  1.83s/it][A
 71%|███████▏  | 707/992 [09:59<06:51,  1.44s/it][A
 71%|███████▏  | 709/992 [10:00<04:52,  1.03s/it][A
 72%|███████▏  | 710/992 [10:01<05:11,  1.11s/it][A
 72%|███████▏  | 712/992 [10:01<03:43,  1.25it/s][A
 72%|███████▏  | 713/992 [10:05<08:08,  1.75s/it][A
 72%|███████▏  | 714/992 [10:05<05:49,  1.26s/it][A
 72%|███████▏  | 715/992 [10:07<06:08,  1.33s/it][A
 72%|███████▏  | 717/992 [10:07<04:21,  1.05it/s][A
 72%|███████▏  | 718/992 [10:08<04:24,  1.04it/s][A
 73%|███████▎  | 720/992 [10:08<03:10,  1.43it/s][A
 73%|███████▎  | 721/992 [10:12<07:32,  1.67s/it][A
 73%|███████▎  | 723/992 [10:13<06:19,  1.41s/it][A
 73%|███████▎  | 724/992 [10:13<04:33,  1.02s/it][A
 73%|███████▎  | 726/992 [10:14<03:40,  1.21it/s][A
 73%|███████▎  | 728/992 [10:14<02:39,  1.66it

 95%|█████████▌| 944/992 [13:20<00:34,  1.41it/s][A
 95%|█████████▌| 945/992 [13:23<01:07,  1.43s/it][A
 95%|█████████▌| 947/992 [13:25<01:00,  1.35s/it][A
 96%|█████████▌| 949/992 [13:26<00:41,  1.03it/s][A
 96%|█████████▌| 950/992 [13:26<00:39,  1.07it/s][A
 96%|█████████▌| 952/992 [13:27<00:26,  1.49it/s][A
 96%|█████████▌| 953/992 [13:30<00:55,  1.42s/it][A
 96%|█████████▋| 955/992 [13:32<00:50,  1.36s/it][A
 96%|█████████▋| 957/992 [13:32<00:34,  1.02it/s][A
 97%|█████████▋| 958/992 [13:33<00:33,  1.00it/s][A
 97%|█████████▋| 960/992 [13:34<00:23,  1.39it/s][A
 97%|█████████▋| 961/992 [13:36<00:42,  1.36s/it][A
 97%|█████████▋| 963/992 [13:39<00:38,  1.32s/it][A
 97%|█████████▋| 965/992 [13:39<00:25,  1.06it/s][A
 97%|█████████▋| 966/992 [13:40<00:24,  1.06it/s][A
 98%|█████████▊| 968/992 [13:40<00:16,  1.47it/s][A
 98%|█████████▊| 969/992 [13:43<00:31,  1.37s/it][A
 98%|█████████▊| 971/992 [13:45<00:27,  1.32s/it][A
 98%|█████████▊| 973/992 [13:46<00:17,  1.06it

Time: 03:30:49...Epoch: 3/5... Step: 5000... Loss: 0.211465... Val Loss: 0.075385


100%|█████████▉| 1999/2000 [1:11:16<00:01,  1.23s/it]  
  0%|          | 0/992 [00:00<?, ?it/s][A
  0%|          | 1/992 [00:07<2:01:17,  7.34s/it][A
  0%|          | 2/992 [00:07<1:25:22,  5.17s/it][A
  0%|          | 4/992 [00:07<59:59,  3.64s/it]  [A
  1%|          | 6/992 [00:07<42:17,  2.57s/it][A
  1%|          | 8/992 [00:07<29:57,  1.83s/it][A
  1%|          | 9/992 [00:13<50:24,  3.08s/it][A
  1%|          | 11/992 [00:14<35:34,  2.18s/it][A
  1%|▏         | 13/992 [00:14<25:11,  1.54s/it][A
  2%|▏         | 15/992 [00:14<17:56,  1.10s/it][A
  2%|▏         | 17/992 [00:20<28:38,  1.76s/it][A
  2%|▏         | 19/992 [00:21<20:21,  1.26s/it][A
  2%|▏         | 21/992 [00:21<14:36,  1.11it/s][A
  2%|▏         | 23/992 [00:21<10:34,  1.53it/s][A
  3%|▎         | 25/992 [00:27<22:46,  1.41s/it][A
  3%|▎         | 27/992 [00:27<16:15,  1.01s/it][A
  3%|▎         | 29/992 [00:28<11:42,  1.37it/s][A
  3%|▎         | 31/992 [00:28<08:30,  1.88it/s][A
  3%|▎         | 

 30%|██▉       | 295/992 [04:07<04:23,  2.65it/s][A
 30%|██▉       | 297/992 [04:14<13:58,  1.21s/it][A
 30%|███       | 299/992 [04:14<10:05,  1.14it/s][A
 30%|███       | 301/992 [04:14<07:19,  1.57it/s][A
 31%|███       | 303/992 [04:14<05:23,  2.13it/s][A
 31%|███       | 305/992 [04:20<14:31,  1.27s/it][A
 31%|███       | 307/992 [04:20<10:27,  1.09it/s][A
 31%|███       | 309/992 [04:21<07:31,  1.51it/s][A
 31%|███▏      | 311/992 [04:21<05:32,  2.05it/s][A
 32%|███▏      | 313/992 [04:27<14:51,  1.31s/it][A
 32%|███▏      | 314/992 [04:27<10:46,  1.05it/s][A
 32%|███▏      | 316/992 [04:28<07:47,  1.45it/s][A
 32%|███▏      | 318/992 [04:28<05:40,  1.98it/s][A
 32%|███▏      | 320/992 [04:28<04:16,  2.62it/s][A
 32%|███▏      | 322/992 [04:34<13:34,  1.22s/it][A
 33%|███▎      | 324/992 [04:34<09:42,  1.15it/s][A
 33%|███▎      | 326/992 [04:34<07:01,  1.58it/s][A
 33%|███▎      | 328/992 [04:35<05:08,  2.15it/s][A
 33%|███▎      | 330/992 [04:41<14:10,  1.29s/

 59%|█████▉    | 583/992 [08:15<03:12,  2.13it/s][A
 59%|█████▉    | 585/992 [08:22<08:38,  1.27s/it][A
 59%|█████▉    | 587/992 [08:22<06:08,  1.10it/s][A
 59%|█████▉    | 589/992 [08:22<04:25,  1.52it/s][A
 60%|█████▉    | 591/992 [08:22<03:13,  2.07it/s][A
 60%|█████▉    | 593/992 [08:29<08:46,  1.32s/it][A
 60%|█████▉    | 595/992 [08:29<06:15,  1.06it/s][A
 60%|██████    | 597/992 [08:29<04:31,  1.46it/s][A
 60%|██████    | 599/992 [08:29<03:17,  1.99it/s][A
 61%|██████    | 601/992 [08:35<08:34,  1.32s/it][A
 61%|██████    | 603/992 [08:36<06:06,  1.06it/s][A
 61%|██████    | 605/992 [08:36<04:37,  1.40it/s][A
 61%|██████    | 607/992 [08:36<03:21,  1.91it/s][A
 61%|██████▏   | 609/992 [08:42<08:18,  1.30s/it][A
 62%|██████▏   | 611/992 [08:43<05:56,  1.07it/s][A
 62%|██████▏   | 612/992 [08:43<04:20,  1.46it/s][A
 62%|██████▏   | 613/992 [08:43<03:15,  1.94it/s][A
 62%|██████▏   | 615/992 [08:43<02:25,  2.59it/s][A
 62%|██████▏   | 617/992 [08:49<07:46,  1.24s/

 88%|████████▊ | 876/992 [12:30<01:25,  1.36it/s][A
 89%|████████▊ | 878/992 [12:30<01:00,  1.88it/s][A
 89%|████████▊ | 880/992 [12:30<00:44,  2.52it/s][A
 89%|████████▉ | 882/992 [12:36<02:17,  1.25s/it][A
 89%|████████▉ | 884/992 [12:36<01:37,  1.11it/s][A
 89%|████████▉ | 886/992 [12:37<01:09,  1.53it/s][A
 90%|████████▉ | 888/992 [12:37<00:49,  2.10it/s][A
 90%|████████▉ | 890/992 [12:43<02:08,  1.26s/it][A
 90%|████████▉ | 891/992 [12:43<01:32,  1.10it/s][A
 90%|█████████ | 893/992 [12:43<01:05,  1.52it/s][A
 90%|█████████ | 895/992 [12:43<00:46,  2.07it/s][A
 90%|█████████ | 897/992 [12:49<02:00,  1.27s/it][A
 91%|█████████ | 898/992 [12:50<01:26,  1.09it/s][A
 91%|█████████ | 900/992 [12:50<01:01,  1.51it/s][A
 91%|█████████ | 902/992 [12:50<00:43,  2.05it/s][A
 91%|█████████ | 904/992 [12:50<00:31,  2.76it/s][A
 91%|█████████▏| 906/992 [12:56<01:43,  1.20s/it][A
 92%|█████████▏| 908/992 [12:56<01:12,  1.16it/s][A
 92%|█████████▏| 910/992 [12:57<00:51,  1.59it

Time: 04:17:23...Epoch: 3/5... Step: 6000... Loss: 0.004105... Val Loss: 0.071666
Validation loss decreased (0.072334 --> 0.071666).  Saving model ...



 50%|████▉     | 999/2000 [24:46<13:26,  1.24it/s]  
  0%|          | 0/992 [00:00<?, ?it/s][A
  0%|          | 1/992 [00:09<2:43:36,  9.91s/it][A
  0%|          | 3/992 [00:10<1:54:38,  6.95s/it][A
  1%|          | 5/992 [00:11<1:22:33,  5.02s/it][A
  1%|          | 6/992 [00:11<58:17,  3.55s/it]  [A
  1%|          | 8/992 [00:11<41:08,  2.51s/it][A
  1%|          | 9/992 [00:18<1:05:55,  4.02s/it][A
  1%|          | 11/992 [00:19<46:24,  2.84s/it] [A
  1%|          | 12/992 [00:19<33:15,  2.04s/it][A
  1%|▏         | 13/992 [00:20<27:58,  1.71s/it][A
  2%|▏         | 15/992 [00:20<20:00,  1.23s/it][A
  2%|▏         | 17/992 [00:25<27:40,  1.70s/it][A
  2%|▏         | 18/992 [00:26<21:26,  1.32s/it][A
  2%|▏         | 20/992 [00:26<15:19,  1.06it/s][A
  2%|▏         | 21/992 [00:27<13:26,  1.20it/s][A
  2%|▏         | 23/992 [00:27<09:42,  1.66it/s][A
  3%|▎         | 25/992 [00:32<20:07,  1.25s/it][A
  3%|▎         | 26/992 [00:33<19:28,  1.21s/it][A
  3%|▎        

 26%|██▌       | 260/992 [03:49<13:43,  1.12s/it][A
 26%|██▋       | 262/992 [03:49<09:50,  1.24it/s][A
 27%|██▋       | 263/992 [03:49<07:17,  1.67it/s][A
 27%|██▋       | 264/992 [03:49<05:29,  2.21it/s][A
 27%|██▋       | 265/992 [03:54<23:34,  1.95s/it][A
 27%|██▋       | 266/992 [03:55<17:26,  1.44s/it][A
 27%|██▋       | 268/992 [03:55<12:24,  1.03s/it][A
 27%|██▋       | 269/992 [03:55<09:19,  1.29it/s][A
 27%|██▋       | 271/992 [03:55<06:46,  1.77it/s][A
 27%|██▋       | 272/992 [03:55<05:48,  2.07it/s][A
 28%|██▊       | 273/992 [04:01<24:05,  2.01s/it][A
 28%|██▊       | 274/992 [04:01<18:07,  1.52s/it][A
 28%|██▊       | 276/992 [04:01<12:56,  1.08s/it][A
 28%|██▊       | 277/992 [04:02<09:26,  1.26it/s][A
 28%|██▊       | 279/992 [04:02<06:51,  1.73it/s][A
 28%|██▊       | 280/992 [04:02<06:07,  1.94it/s][A
 28%|██▊       | 281/992 [04:08<24:34,  2.07s/it][A
 28%|██▊       | 282/992 [04:08<18:11,  1.54s/it][A
 29%|██▊       | 284/992 [04:08<12:57,  1.10s/

 53%|█████▎    | 521/992 [07:26<09:11,  1.17s/it][A
 53%|█████▎    | 523/992 [07:26<06:36,  1.18it/s][A
 53%|█████▎    | 525/992 [07:27<06:00,  1.30it/s][A
 53%|█████▎    | 527/992 [07:28<04:23,  1.76it/s][A
 53%|█████▎    | 529/992 [07:33<09:13,  1.20s/it][A
 54%|█████▎    | 531/992 [07:33<06:36,  1.16it/s][A
 54%|█████▎    | 533/992 [07:34<05:49,  1.31it/s][A
 54%|█████▍    | 535/992 [07:34<04:13,  1.81it/s][A
 54%|█████▍    | 537/992 [07:39<08:50,  1.17s/it][A
 54%|█████▍    | 539/992 [07:40<06:20,  1.19it/s][A
 55%|█████▍    | 541/992 [07:41<05:42,  1.32it/s][A
 55%|█████▍    | 542/992 [07:41<04:13,  1.77it/s][A
 55%|█████▍    | 544/992 [07:41<03:05,  2.41it/s][A
 55%|█████▍    | 545/992 [07:46<13:40,  1.84s/it][A
 55%|█████▌    | 547/992 [07:46<09:41,  1.31s/it][A
 55%|█████▌    | 549/992 [07:48<08:13,  1.11s/it][A
 56%|█████▌    | 551/992 [07:48<05:55,  1.24it/s][A
 56%|█████▌    | 553/992 [07:53<09:42,  1.33s/it][A
 56%|█████▌    | 555/992 [07:53<06:56,  1.05it

 79%|███████▉  | 784/992 [11:04<01:38,  2.12it/s][A
 79%|███████▉  | 786/992 [11:11<04:18,  1.25s/it][A
 79%|███████▉  | 788/992 [11:11<03:03,  1.11it/s][A
 80%|███████▉  | 790/992 [11:11<02:11,  1.53it/s][A
 80%|███████▉  | 792/992 [11:11<01:35,  2.09it/s][A
 80%|████████  | 794/992 [11:18<04:23,  1.33s/it][A
 80%|████████  | 795/992 [11:18<03:09,  1.04it/s][A
 80%|████████  | 796/992 [11:18<02:19,  1.40it/s][A
 80%|████████  | 798/992 [11:18<01:40,  1.93it/s][A
 81%|████████  | 800/992 [11:18<01:14,  2.57it/s][A
 81%|████████  | 802/992 [11:25<03:55,  1.24s/it][A
 81%|████████  | 804/992 [11:25<02:47,  1.12it/s][A
 81%|████████  | 805/992 [11:25<02:06,  1.47it/s][A
 81%|████████▏ | 807/992 [11:25<01:31,  2.02it/s][A
 82%|████████▏ | 809/992 [11:30<03:14,  1.06s/it][A
 82%|████████▏ | 810/992 [11:32<04:06,  1.35s/it][A
 82%|████████▏ | 812/992 [11:32<02:54,  1.03it/s][A
 82%|████████▏ | 813/992 [11:32<02:10,  1.37it/s][A
 82%|████████▏ | 815/992 [11:32<01:33,  1.89it

Time: 04:56:17...Epoch: 4/5... Step: 7000... Loss: 0.029471... Val Loss: 0.073481


100%|█████████▉| 1999/2000 [1:11:15<00:01,  1.79s/it]  
  0%|          | 0/992 [00:00<?, ?it/s][A
  0%|          | 1/992 [00:07<2:01:52,  7.38s/it][A
  0%|          | 3/992 [00:07<1:25:36,  5.19s/it][A
  1%|          | 5/992 [00:07<1:00:07,  3.66s/it][A
  1%|          | 7/992 [00:07<42:22,  2.58s/it]  [A
  1%|          | 9/992 [00:14<45:01,  2.75s/it][A
  1%|          | 11/992 [00:14<31:47,  1.94s/it][A
  1%|▏         | 13/992 [00:14<22:45,  1.39s/it][A
  2%|▏         | 15/992 [00:14<16:19,  1.00s/it][A
  2%|▏         | 17/992 [00:21<27:00,  1.66s/it][A
  2%|▏         | 18/992 [00:21<19:28,  1.20s/it][A
  2%|▏         | 20/992 [00:21<13:57,  1.16it/s][A
  2%|▏         | 22/992 [00:21<10:06,  1.60it/s][A
  2%|▏         | 24/992 [00:21<07:23,  2.18it/s][A
  3%|▎         | 26/992 [00:27<20:03,  1.25s/it][A
  3%|▎         | 27/992 [00:28<15:04,  1.07it/s][A
  3%|▎         | 29/992 [00:28<10:50,  1.48it/s][A
  3%|▎         | 31/992 [00:28<07:56,  2.01it/s][A
  3%|▎        

 25%|██▌       | 252/992 [03:38<13:57,  1.13s/it][A
 26%|██▌       | 254/992 [03:38<10:01,  1.23it/s][A
 26%|██▌       | 256/992 [03:38<07:16,  1.69it/s][A
 26%|██▌       | 258/992 [03:44<15:37,  1.28s/it][A
 26%|██▌       | 259/992 [03:44<11:20,  1.08it/s][A
 26%|██▋       | 261/992 [03:44<08:11,  1.49it/s][A
 26%|██▋       | 262/992 [03:44<06:14,  1.95it/s][A
 27%|██▋       | 264/992 [03:44<04:37,  2.62it/s][A
 27%|██▋       | 266/992 [03:50<13:58,  1.16s/it][A
 27%|██▋       | 268/992 [03:50<10:02,  1.20it/s][A
 27%|██▋       | 270/992 [03:51<07:17,  1.65it/s][A
 27%|██▋       | 272/992 [03:51<05:25,  2.21it/s][A
 28%|██▊       | 274/992 [03:57<14:48,  1.24s/it][A
 28%|██▊       | 276/992 [03:57<10:35,  1.13it/s][A
 28%|██▊       | 278/992 [03:57<07:43,  1.54it/s][A
 28%|██▊       | 280/992 [03:57<05:37,  2.11it/s][A
 28%|██▊       | 282/992 [04:03<14:17,  1.21s/it][A
 29%|██▊       | 284/992 [04:03<10:13,  1.15it/s][A
 29%|██▉       | 286/992 [04:04<07:25,  1.59it

 49%|████▉     | 490/992 [06:57<11:24,  1.36s/it][A
 49%|████▉     | 491/992 [06:58<08:44,  1.05s/it][A
 50%|████▉     | 493/992 [06:58<06:16,  1.33it/s][A
 50%|████▉     | 495/992 [06:58<04:33,  1.82it/s][A
 50%|█████     | 497/992 [07:02<08:27,  1.02s/it][A
 50%|█████     | 498/992 [07:04<11:10,  1.36s/it][A
 50%|█████     | 499/992 [07:04<08:05,  1.02it/s][A
 50%|█████     | 500/992 [07:04<05:55,  1.38it/s][A
 51%|█████     | 502/992 [07:05<04:17,  1.91it/s][A
 51%|█████     | 503/992 [07:05<03:18,  2.46it/s][A
 51%|█████     | 505/992 [07:09<07:15,  1.12it/s][A
 51%|█████     | 506/992 [07:11<10:33,  1.30s/it][A
 51%|█████     | 508/992 [07:11<07:36,  1.06it/s][A
 51%|█████▏    | 510/992 [07:11<05:27,  1.47it/s][A
 52%|█████▏    | 512/992 [07:12<04:01,  1.99it/s][A
 52%|█████▏    | 513/992 [07:16<12:37,  1.58s/it][A
 52%|█████▏    | 514/992 [07:18<13:18,  1.67s/it][A
 52%|█████▏    | 516/992 [07:18<09:29,  1.20s/it][A
 52%|█████▏    | 518/992 [07:18<06:47,  1.16it

 74%|███████▍  | 737/992 [10:26<08:53,  2.09s/it][A
 74%|███████▍  | 738/992 [10:26<06:20,  1.50s/it][A
 75%|███████▍  | 740/992 [10:27<04:58,  1.19s/it][A
 75%|███████▍  | 742/992 [10:28<03:33,  1.17it/s][A
 75%|███████▌  | 744/992 [10:28<02:33,  1.62it/s][A
 75%|███████▌  | 745/992 [10:33<08:25,  2.05s/it][A
 75%|███████▌  | 747/992 [10:33<05:57,  1.46s/it][A
 75%|███████▌  | 748/992 [10:34<05:03,  1.24s/it][A
 76%|███████▌  | 750/992 [10:34<03:37,  1.11it/s][A
 76%|███████▌  | 752/992 [10:34<02:37,  1.53it/s][A
 76%|███████▌  | 753/992 [10:40<08:14,  2.07s/it][A
 76%|███████▌  | 755/992 [10:40<05:49,  1.48s/it][A
 76%|███████▌  | 756/992 [10:41<05:16,  1.34s/it][A
 76%|███████▋  | 758/992 [10:41<03:44,  1.04it/s][A
 77%|███████▋  | 760/992 [10:41<02:40,  1.44it/s][A
 77%|███████▋  | 761/992 [10:47<08:05,  2.10s/it][A
 77%|███████▋  | 763/992 [10:47<05:41,  1.49s/it][A
 77%|███████▋  | 764/992 [10:48<05:32,  1.46s/it][A
 77%|███████▋  | 766/992 [10:48<03:55,  1.04s/

 99%|█████████▉| 980/992 [13:53<00:15,  1.31s/it][A
 99%|█████████▉| 982/992 [13:53<00:09,  1.06it/s][A
 99%|█████████▉| 984/992 [13:53<00:05,  1.46it/s][A
 99%|█████████▉| 985/992 [13:58<00:12,  1.74s/it][A
 99%|█████████▉| 986/992 [13:59<00:10,  1.68s/it][A
100%|█████████▉| 988/992 [14:00<00:05,  1.29s/it][A
100%|█████████▉| 990/992 [14:00<00:01,  1.09it/s][A
100%|██████████| 992/992 [14:00<00:00,  1.51it/s][A

Time: 05:42:42...Epoch: 4/5... Step: 8000... Loss: 0.083177... Val Loss: 0.063347


100%|██████████| 2000/2000 [1:25:18<00:00, 254.02s/it]
  0%|          | 0/2000 [00:00<?, ?it/s]

Validation loss decreased (0.071666 --> 0.063347).  Saving model ...


 50%|████▉     | 999/2000 [24:28<15:25,  1.08it/s]  
  0%|          | 0/992 [00:00<?, ?it/s][A
  0%|          | 1/992 [00:10<3:00:25, 10.92s/it][A
  0%|          | 3/992 [00:11<2:06:28,  7.67s/it][A
  0%|          | 4/992 [00:11<1:29:20,  5.43s/it][A
  1%|          | 5/992 [00:11<1:03:06,  3.84s/it][A
  1%|          | 7/992 [00:11<44:29,  2.71s/it]  [A
  1%|          | 9/992 [00:19<49:24,  3.02s/it][A
  1%|          | 10/992 [00:19<38:20,  2.34s/it][A
  1%|          | 12/992 [00:20<27:33,  1.69s/it][A
  1%|▏         | 14/992 [00:20<19:46,  1.21s/it][A
  2%|▏         | 16/992 [00:20<14:08,  1.15it/s][A
  2%|▏         | 17/992 [00:27<44:14,  2.72s/it][A
  2%|▏         | 19/992 [00:27<31:40,  1.95s/it][A
  2%|▏         | 20/992 [00:28<23:14,  1.43s/it][A
  2%|▏         | 22/992 [00:28<16:34,  1.03s/it][A
  2%|▏         | 24/992 [00:28<13:00,  1.24it/s][A
  3%|▎         | 25/992 [00:34<38:02,  2.36s/it][A
  3%|▎         | 27/992 [00:34<26:53,  1.67s/it][A
  3%|▎         |

 23%|██▎       | 224/992 [03:25<11:38,  1.10it/s][A
 23%|██▎       | 225/992 [03:29<21:30,  1.68s/it][A
 23%|██▎       | 227/992 [03:30<16:44,  1.31s/it][A
 23%|██▎       | 229/992 [03:30<12:00,  1.06it/s][A
 23%|██▎       | 230/992 [03:32<16:41,  1.31s/it][A
 23%|██▎       | 232/992 [03:32<11:55,  1.06it/s][A
 23%|██▎       | 233/992 [03:36<22:11,  1.75s/it][A
 24%|██▎       | 235/992 [03:36<16:32,  1.31s/it][A
 24%|██▍       | 237/992 [03:36<11:54,  1.06it/s][A
 24%|██▍       | 238/992 [03:39<16:47,  1.34s/it][A
 24%|██▍       | 240/992 [03:39<12:02,  1.04it/s][A
 24%|██▍       | 241/992 [03:43<22:05,  1.76s/it][A
 24%|██▍       | 243/992 [03:43<16:09,  1.29s/it][A
 25%|██▍       | 245/992 [03:43<11:35,  1.07it/s][A
 25%|██▍       | 246/992 [03:45<16:36,  1.34s/it][A
 25%|██▌       | 248/992 [03:46<11:54,  1.04it/s][A
 25%|██▌       | 249/992 [03:49<22:00,  1.78s/it][A
 25%|██▌       | 251/992 [03:50<16:25,  1.33s/it][A
 26%|██▌       | 253/992 [03:50<11:45,  1.05it

 47%|████▋     | 464/992 [06:51<07:27,  1.18it/s][A
 47%|████▋     | 465/992 [06:54<14:58,  1.70s/it][A
 47%|████▋     | 467/992 [06:55<11:44,  1.34s/it][A
 47%|████▋     | 469/992 [06:56<08:22,  1.04it/s][A
 47%|████▋     | 470/992 [06:57<09:47,  1.13s/it][A
 48%|████▊     | 472/992 [06:57<07:02,  1.23it/s][A
 48%|████▊     | 473/992 [07:01<15:13,  1.76s/it][A
 48%|████▊     | 475/992 [07:02<12:04,  1.40s/it][A
 48%|████▊     | 477/992 [07:02<08:35,  1.00s/it][A
 48%|████▊     | 478/992 [07:04<10:09,  1.19s/it][A
 48%|████▊     | 480/992 [07:04<07:19,  1.16it/s][A
 48%|████▊     | 481/992 [07:08<15:27,  1.82s/it][A
 49%|████▊     | 483/992 [07:09<11:59,  1.41s/it][A
 49%|████▉     | 485/992 [07:09<08:32,  1.01s/it][A
 49%|████▉     | 486/992 [07:11<10:16,  1.22s/it][A
 49%|████▉     | 488/992 [07:11<07:22,  1.14it/s][A
 49%|████▉     | 489/992 [07:15<15:27,  1.84s/it][A
 49%|████▉     | 491/992 [07:16<11:40,  1.40s/it][A
 50%|████▉     | 493/992 [07:16<08:22,  1.01s/

 69%|██████▉   | 685/992 [10:00<04:00,  1.27it/s][A
 69%|██████▉   | 686/992 [10:01<04:27,  1.14it/s][A
 69%|██████▉   | 688/992 [10:02<03:18,  1.53it/s][A
 69%|██████▉   | 689/992 [10:06<09:01,  1.79s/it][A
 70%|██████▉   | 690/992 [10:06<06:28,  1.29s/it][A
 70%|██████▉   | 691/992 [10:07<05:28,  1.09s/it][A
 70%|██████▉   | 693/992 [10:07<03:56,  1.26it/s][A
 70%|██████▉   | 694/992 [10:08<04:27,  1.11it/s][A
 70%|███████   | 696/992 [10:08<03:14,  1.52it/s][A
 70%|███████   | 697/992 [10:13<08:39,  1.76s/it][A
 70%|███████   | 699/992 [10:14<06:36,  1.35s/it][A
 71%|███████   | 701/992 [10:14<04:44,  1.02it/s][A
 71%|███████   | 702/992 [10:15<05:24,  1.12s/it][A
 71%|███████   | 704/992 [10:15<03:51,  1.24it/s][A
 71%|███████   | 705/992 [10:20<08:42,  1.82s/it][A
 71%|███████▏  | 707/992 [10:20<06:39,  1.40s/it][A
 71%|███████▏  | 709/992 [10:21<04:44,  1.00s/it][A
 72%|███████▏  | 710/992 [10:22<05:06,  1.09s/it][A
 72%|███████▏  | 712/992 [10:22<03:39,  1.27it

 92%|█████████▏| 915/992 [13:19<01:56,  1.51s/it][A
 92%|█████████▏| 917/992 [13:19<01:21,  1.08s/it][A
 93%|█████████▎| 918/992 [13:19<01:05,  1.12it/s][A
 93%|█████████▎| 920/992 [13:19<00:46,  1.54it/s][A
 93%|█████████▎| 921/992 [13:26<02:47,  2.36s/it][A
 93%|█████████▎| 923/992 [13:26<01:56,  1.68s/it][A
 93%|█████████▎| 925/992 [13:26<01:20,  1.20s/it][A
 93%|█████████▎| 926/992 [13:27<01:11,  1.08s/it][A
 94%|█████████▎| 928/992 [13:27<00:49,  1.29it/s][A
 94%|█████████▎| 929/992 [13:33<02:21,  2.24s/it][A
 94%|█████████▍| 931/992 [13:33<01:37,  1.59s/it][A
 94%|█████████▍| 933/992 [13:33<01:07,  1.14s/it][A
 94%|█████████▍| 935/992 [13:34<00:50,  1.13it/s][A
 94%|█████████▍| 937/992 [13:39<01:21,  1.48s/it][A
 95%|█████████▍| 939/992 [13:40<00:56,  1.06s/it][A
 95%|█████████▍| 941/992 [13:40<00:39,  1.31it/s][A
 95%|█████████▍| 942/992 [13:40<00:34,  1.44it/s][A
 95%|█████████▌| 944/992 [13:40<00:24,  1.97it/s][A
 95%|█████████▌| 945/992 [13:46<01:38,  2.10s/

Time: 06:21:33...Epoch: 5/5... Step: 9000... Loss: 0.002032... Val Loss: 0.075584


100%|█████████▉| 1999/2000 [1:12:02<00:01,  1.14s/it]  
  0%|          | 0/992 [00:00<?, ?it/s][A
  0%|          | 1/992 [00:07<2:03:53,  7.50s/it][A
  0%|          | 3/992 [00:07<1:26:59,  5.28s/it][A
  1%|          | 5/992 [00:07<1:01:06,  3.71s/it][A
  1%|          | 7/992 [00:07<43:05,  2.62s/it]  [A
  1%|          | 9/992 [00:14<45:31,  2.78s/it][A
  1%|          | 11/992 [00:14<32:39,  2.00s/it][A
  1%|▏         | 13/992 [00:14<23:08,  1.42s/it][A
  2%|▏         | 15/992 [00:14<16:32,  1.02s/it][A
  2%|▏         | 17/992 [00:21<27:05,  1.67s/it][A
  2%|▏         | 19/992 [00:21<19:44,  1.22s/it][A
  2%|▏         | 21/992 [00:21<14:11,  1.14it/s][A
  2%|▏         | 23/992 [00:21<10:14,  1.58it/s][A
  3%|▎         | 25/992 [00:28<22:05,  1.37s/it][A
  3%|▎         | 26/992 [00:28<16:43,  1.04s/it][A
  3%|▎         | 27/992 [00:28<13:05,  1.23it/s][A
  3%|▎         | 29/992 [00:28<09:31,  1.68it/s][A
  3%|▎         | 31/992 [00:28<06:58,  2.29it/s][A
  3%|▎        

 25%|██▍       | 245/992 [03:37<10:53,  1.14it/s][A
 25%|██▍       | 247/992 [03:37<07:53,  1.57it/s][A
 25%|██▌       | 248/992 [03:38<08:40,  1.43it/s][A
 25%|██▌       | 249/992 [03:43<24:05,  1.95s/it][A
 25%|██▌       | 250/992 [03:43<19:05,  1.54s/it][A
 25%|██▌       | 251/992 [03:44<15:31,  1.26s/it][A
 26%|██▌       | 253/992 [03:44<11:07,  1.11it/s][A
 26%|██▌       | 255/992 [03:44<08:01,  1.53it/s][A
 26%|██▌       | 256/992 [03:45<09:00,  1.36it/s][A
 26%|██▌       | 257/992 [07:49<15:04:22, 73.83s/it][A
 26%|██▌       | 258/992 [07:50<10:33:24, 51.78s/it][A
 26%|██▌       | 259/992 [07:50<7:25:13, 36.44s/it] [A
 26%|██▌       | 260/992 [07:50<5:11:36, 25.54s/it][A
 26%|██▋       | 262/992 [07:51<3:37:46, 17.90s/it][A
 27%|██▋       | 264/992 [08:05<2:58:15, 14.69s/it][A
 27%|██▋       | 265/992 [08:27<3:25:57, 17.00s/it][A
 27%|██▋       | 266/992 [08:39<3:04:42, 15.27s/it][A
 27%|██▋       | 267/992 [08:59<3:22:42, 16.78s/it][A
 27%|██▋       | 268/992 

 45%|████▍     | 444/992 [25:32<10:47,  1.18s/it][A
 45%|████▍     | 445/992 [25:32<07:51,  1.16it/s][A
 45%|████▌     | 447/992 [25:32<05:42,  1.59it/s][A
 45%|████▌     | 448/992 [25:34<10:16,  1.13s/it][A
 45%|████▌     | 449/992 [25:35<08:00,  1.13it/s][A
 45%|████▌     | 450/992 [25:36<08:05,  1.12it/s][A
 45%|████▌     | 451/992 [25:38<11:54,  1.32s/it][A
 46%|████▌     | 452/992 [25:39<11:19,  1.26s/it][A
 46%|████▌     | 454/992 [25:39<08:05,  1.11it/s][A
 46%|████▌     | 456/992 [25:41<08:49,  1.01it/s][A
 46%|████▌     | 457/992 [25:42<06:30,  1.37it/s][A
 46%|████▌     | 458/992 [25:43<07:19,  1.21it/s][A
 46%|████▋     | 459/992 [25:45<11:37,  1.31s/it][A
 46%|████▋     | 460/992 [25:46<11:14,  1.27s/it][A
 46%|████▋     | 461/992 [25:46<08:09,  1.08it/s][A
 47%|████▋     | 463/992 [25:47<05:54,  1.49it/s][A
 47%|████▋     | 464/992 [25:48<08:38,  1.02it/s][A
 47%|████▋     | 466/992 [25:50<07:43,  1.13it/s][A
 47%|████▋     | 467/992 [25:52<11:40,  1.33s/

 67%|██████▋   | 666/992 [28:47<04:37,  1.17it/s][A
 67%|██████▋   | 667/992 [28:50<08:34,  1.58s/it][A
 67%|██████▋   | 668/992 [28:51<07:52,  1.46s/it][A
 68%|██████▊   | 670/992 [28:51<05:37,  1.05s/it][A
 68%|██████▊   | 672/992 [28:52<04:50,  1.10it/s][A
 68%|██████▊   | 674/992 [28:53<04:13,  1.26it/s][A
 68%|██████▊   | 675/992 [28:57<08:08,  1.54s/it][A
 68%|██████▊   | 676/992 [28:58<07:25,  1.41s/it][A
 68%|██████▊   | 678/992 [28:58<05:16,  1.01s/it][A
 69%|██████▊   | 680/992 [28:59<04:44,  1.10it/s][A
 69%|██████▉   | 682/992 [29:00<04:03,  1.27it/s][A
 69%|██████▉   | 683/992 [29:03<07:42,  1.50s/it][A
 69%|██████▉   | 684/992 [29:05<07:24,  1.44s/it][A
 69%|██████▉   | 686/992 [29:05<05:16,  1.03s/it][A
 69%|██████▉   | 688/992 [29:06<04:44,  1.07it/s][A
 69%|██████▉   | 689/992 [29:06<03:28,  1.45it/s][A
 70%|██████▉   | 690/992 [29:08<04:17,  1.17it/s][A
 70%|██████▉   | 691/992 [29:11<07:25,  1.48s/it][A
 70%|██████▉   | 692/992 [29:12<07:14,  1.45s/

 91%|█████████ | 898/992 [32:15<01:34,  1.01s/it][A
 91%|█████████ | 899/992 [32:18<02:32,  1.64s/it][A
 91%|█████████ | 900/992 [32:18<01:51,  1.21s/it][A
 91%|█████████ | 902/992 [32:19<01:18,  1.14it/s][A
 91%|█████████ | 904/992 [32:22<01:32,  1.05s/it][A
 91%|█████████▏| 906/992 [32:22<01:13,  1.17it/s][A
 91%|█████████▏| 907/992 [32:25<02:03,  1.46s/it][A
 92%|█████████▏| 909/992 [32:25<01:26,  1.04s/it][A
 92%|█████████▏| 910/992 [32:25<01:02,  1.32it/s][A
 92%|█████████▏| 912/992 [32:28<01:16,  1.05it/s][A
 92%|█████████▏| 914/992 [32:29<01:01,  1.26it/s][A
 92%|█████████▏| 915/992 [32:32<01:44,  1.35s/it][A
 92%|█████████▏| 917/992 [32:32<01:12,  1.03it/s][A
 93%|█████████▎| 919/992 [32:32<00:51,  1.42it/s][A
 93%|█████████▎| 920/992 [32:35<01:38,  1.37s/it][A
 93%|█████████▎| 922/992 [32:36<01:14,  1.06s/it][A
 93%|█████████▎| 923/992 [32:38<01:48,  1.58s/it][A
 93%|█████████▎| 925/992 [32:39<01:15,  1.13s/it][A
 93%|█████████▎| 927/992 [32:39<00:52,  1.23it

Time: 07:28:26...Epoch: 5/5... Step: 10000... Loss: 0.083125... Val Loss: 0.062692
Validation loss decreased (0.063347 --> 0.062692).  Saving model ...





# Reference
* https://github.com/ronghanghu/pytorch-gve-lrcn/blob/master/models/pretrained_models.py