In [1]:
#run
# clinc
import collections

import numpy as np
import pandas as pd
from openprompt.data_utils.utils import InputExample
import os
import json, csv
from abc import ABC, abstractmethod
from collections import defaultdict, Counter
from typing import List, Dict, Callable

from openprompt.utils.logging import logger
from openprompt.data_utils.data_processor import DataProcessor


class ClincProcessor(DataProcessor):
    """
    # TODO citation

    Examples:

    .. code-block:: python

        from openprompt.data_utils.conditional_generation_dataset import PROCESSORS

        base_path = "datasets/CondGen"

        dataset_name = "webnlg_2017"
        dataset_path = os.path.join(base_path, dataset_name)
        processor = PROCESSORS[dataset_name.lower()]()
        train_dataset = processor.get_train_examples(dataset_path)
        valid_dataset = processor.get_train_examples(dataset_path)
        test_dataset = processor.get_test_examples(dataset_path)

        assert len(train_dataset) == 18025
        assert len(valid_dataset) == 18025
        assert len(test_dataset) == 4928
        assert test_dataset[0].text_a == " | Abilene_Regional_Airport : cityServed : Abilene,_Texas"
        assert test_dataset[0].text_b == ""
        assert test_dataset[0].tgt_text == "Abilene, Texas is served by the Abilene regional airport."
    """

    def __init__(self, is_id_only=True, mode=None):
        super().__init__()
        self.labels = None
        self.is_id_only = is_id_only
        self.monitor_mode = 'label' if mode == 150 else 'domain'

    def get_examples(self, data_dir: str, split: str, frac: int = 0) -> List[InputExample]:
        examples = []
        if split == 'train' and frac:
            path = os.path.join(data_dir, "{}_{}.csv".format(split, frac))
        else:
            path = os.path.join(data_dir, "{}.csv".format(split))
        data = pd.read_csv(path)
        intents = list(data["intent"])
        domian_index = list(data['domain_index'])
        indexs = list(data["index"])
        utts = list(data["utt"])
        id_utt = [utts[i] for i in range(len(utts)) if indexs[i] != -1]
        # id_intent = [intents[i] for i in range(len(utts)) if indexs[i] != -1]
        ood_utt = [utts[i] for i in range(len(utts)) if indexs[i] == -1]
        if self.is_id_only:
            utts = id_utt
            is_oods = [0 for _ in range(len(utts))]
            new_indexs = [indexs[i] for i in range(len(indexs)) if indexs[i] != -1]
            new_domian_index = [domian_index[i] for i in range(len(domian_index)) if domian_index[i] != -1]
        else:
            utts = id_utt + ood_utt
            is_oods = [0 for _ in range(len(id_utt))] + [1 for _ in range(len(ood_utt))]
            new_indexs = [indexs[i] for i in range(len(indexs)) if indexs[i] != -1] + [-1 for _ in range(len(ood_utt))]
            new_domian_index = [domian_index[i] for i in range(len(domian_index)) if domian_index[i] != -1] + [-1 for _
                                                                                                               in range(
                    len(ood_utt))]
            assert len(new_indexs) == len(utts)
            assert len(new_domian_index) == len(utts)
        if self.monitor_mode == 'label':
            monitor = new_indexs
        elif self.monitor_mode == 'domain':
            monitor = new_domian_index
        for i, (tgt, is_ood, intent) in enumerate(zip(utts, is_oods, monitor)):
            example = InputExample(guid=str(i), text_a="", tgt_text=tgt, meta={"is_ood": is_ood},
                                   label=intent)  # label=intent  tgt_text="the intent is"
            examples.append(example)

        return examples

    def get_src_tgt_len_ratio(self, ):
        pass

    def get_label_words(self, data_dir):
        path = os.path.join(data_dir, "train.csv")
        data = pd.read_csv(path)
        intents = list(data["intent"])
        domian_index = list(data['index'])
        result = collections.defaultdict(str)
        for intent, index in zip(intents, domian_index):
            result[index] = intent
        result = sorted(result.items(), key=lambda x: x[0])
        result = [[b] for a, b in result]
        return result



In [2]:
#run
# 数据集处理
# imdb
import collections

import numpy as np
import pandas as pd
from openprompt.data_utils.utils import InputExample
import os
import json, csv
from abc import ABC, abstractmethod
from collections import defaultdict, Counter
from typing import List, Dict, Callable

from openprompt.utils.logging import logger
from openprompt.data_utils.data_processor import DataProcessor

class IMDBProcessor(DataProcessor):
    """
    # TODO citation

    Examples:

    .. code-block:: python

        from openprompt.data_utils.conditional_generation_dataset import PROCESSORS

        base_path = "datasets/CondGen"

        dataset_name = "webnlg_2017"
        dataset_path = os.path.join(base_path, dataset_name)
        processor = PROCESSORS[dataset_name.lower()]()
        train_dataset = processor.get_train_examples(dataset_path)
        valid_dataset = processor.get_train_examples(dataset_path)
        test_dataset = processor.get_test_examples(dataset_path)

        assert len(train_dataset) == 18025
        assert len(valid_dataset) == 18025
        assert len(test_dataset) == 4928
        assert test_dataset[0].text_a == " | Abilene_Regional_Airport : cityServed : Abilene,_Texas"
        assert test_dataset[0].text_b == ""
        assert test_dataset[0].tgt_text == "Abilene, Texas is served by the Abilene regional airport."
    """

    def __init__(self, is_id_only=True, mode=None):
        super().__init__()
        self.labels = None
        self.is_id_only = is_id_only
        self.monitor_mode = 'label'

    def get_examples(self, data_dir: str, split: str, frac: int = 0) -> List[InputExample]:
        examples = []
        if split == 'train' and frac:
            path = os.path.join(data_dir, "{}_{}.csv".format(split, frac))
        else:
            path = os.path.join(data_dir, "{}.csv".format(split))
        data = pd.read_csv(path)
        intents = list(data["intent"])
        # domian_index = list(data['domain_index'])
        indexs = list(data["index"])
        utts = list(data["utt"])
        id_utt = [utts[i] for i in range(len(utts)) if indexs[i] != -1]
        # id_intent = [intents[i] for i in range(len(utts)) if indexs[i] != -1]
        ood_utt = [utts[i] for i in range(len(utts)) if indexs[i] == -1]
        if self.is_id_only:
            utts = id_utt
            is_oods = [0 for _ in range(len(utts))]
            new_indexs = [indexs[i] for i in range(len(indexs)) if indexs[i] != -1]
            # new_domian_index = [domian_index[i] for i in range(len(domian_index)) if domian_index[i] != -1]
        else:
            utts = id_utt + ood_utt
            is_oods = [0 for _ in range(len(id_utt))] + [1 for _ in range(len(ood_utt))]
            new_indexs = [indexs[i] for i in range(len(indexs)) if indexs[i] != -1] + [-1 for _ in range(len(ood_utt))]
            # new_domian_index = [domian_index[i] for i in range(len(domian_index)) if domian_index[i] != -1] + [-1 for _ in range(len(ood_utt))]
            assert len(new_indexs) == len(utts)
            # assert len(new_domian_index) == len(utts)
        if self.monitor_mode == 'label':
            monitor = new_indexs
        # elif self.monitor_mode == 'domain':
        #     monitor = new_domian_index
        for i, (tgt, is_ood, intent) in enumerate(zip(utts, is_oods, new_indexs)):
            example = InputExample(guid=str(i), text_a="", tgt_text=tgt, meta={"is_ood": is_ood},
                                   label=intent if is_ood == 0 else -1)  # label=intent  tgt_text="the intent is"
            examples.append(example)
        return examples

    def get_src_tgt_len_ratio(self, ):
        pass

    def get_label_words(self, data_dir):
        path = os.path.join(data_dir, "train.csv")
        data = pd.read_csv(path)
        intents = list(data["intent"])
        domian_index = list(data['index'])
        result = collections.defaultdict(str)
        for intent, index in zip(intents, domian_index):
            result[index] = intent
        result = sorted(result.items(), key=lambda x: x[0])
        result = [[b] for a, b in result]
        return result

In [3]:
# run
import yaml
def load_parameters_from_yaml(file_path):
    with open(file_path, 'r') as file:
        parameters = yaml.safe_load(file)
    return parameters
yaml_file_path = 'paras copy.yml'
loaded_parameters = load_parameters_from_yaml(yaml_file_path)
# print("Loaded Parameters:")
# print(loaded_parameters['dataset']['image_size'])
config = loaded_parameters

In [4]:
# run
if config['dataset'] == 'IMDB':
    OOD_DataProcessor = IMDBProcessor
    datasets_dir = "./datasets/imdb_yelp"
    max_seq_length = 256
    batch_size = config['batch_size']

elif config['dataset'] == 'clinc':
    OOD_DataProcessor = ClincProcessor 
    datasets_dir = "./datasets/clinc150/"
    max_seq_length = 128
    batch_size = config['batch_size']


In [5]:
# run
dataset = {}
dataset['train'] = OOD_DataProcessor(True).get_examples(datasets_dir, "train")
# dataset['val'] = OOD_DataProcessor(True).get_examples(datasets_dir, "valid")

dataset['val'] = OOD_DataProcessor(True).get_examples(datasets_dir, "test")

dataset["val_ood"] = OOD_DataProcessor(False).get_examples(datasets_dir, "valid")
dataset['test'] = OOD_DataProcessor(False).get_examples(datasets_dir, "test")
# print(dataset['val'][0])

In [6]:
# run
# dataloader
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, AutoTokenizer, AutoModelForCausalLM
from typing import List, Dict

class TextClassificationDataset(Dataset):
    def __init__(self, data: List[Dict[str, str]], tokenizer: AutoTokenizer, max_length: int = 128):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        text = self.data[idx]["tgt_text"]
        label = self.data[idx]["label"]
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            padding="max_length",
            # padding=True, 
            truncation=True,
            return_tensors="pt"
        )
        
        return {
            "input_ids": encoding["input_ids"].flatten(),
            "attention_mask": encoding["attention_mask"].flatten(),
            "label": torch.tensor(label, dtype=torch.long)
        }

def create_data_loader(data: List[Dict[str, str]], tokenizer: AutoTokenizer, batch_size: int = 32, max_length: int = 128):
    dataset = TextClassificationDataset(data, tokenizer, max_length)
    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    return data_loader

def create_test_data_loader(data: List[Dict[str, str]], tokenizer: AutoTokenizer, batch_size: int = 32, max_length: int = 128):
    dataset = TextClassificationDataset(data, tokenizer, max_length)
    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
    return data_loader

In [7]:
# run
# 评价指标
import numpy as np
import sklearn
from sklearn import metrics


def compute_all_metrics(conf, label, pred):
    np.set_printoptions(precision=3)
    recall = 0.95
    auroc, aupr_in, aupr_out, fpr = auc_and_fpr_recall(conf, label, recall)

    accuracy = acc(pred, label)

    results = [fpr, auroc, aupr_in, aupr_out, accuracy]

    return results


# accuracy
def acc(pred, label):
    ind_pred = pred[label != -1] #id acc
    ind_label = label[label != -1]

    # ind_pred = pred #all acc
    # ind_label = label

    num_tp = np.sum(ind_pred == ind_label)
    acc = num_tp / len(ind_label)

    return acc


# fpr_recall
def fpr_recall(conf, label, tpr):
    gt = np.ones_like(label)
    gt[label == -1] = 0

    fpr_list, tpr_list, threshold_list = metrics.roc_curve(gt, conf)
    fpr = fpr_list[np.argmax(tpr_list >= tpr)]
    thresh = threshold_list[np.argmax(tpr_list >= tpr)]
    return fpr, thresh


# auc
def auc_and_fpr_recall(conf, label, tpr_th):
    # following convention in ML we treat OOD as positive
    ood_indicator = np.zeros_like(label)
    ood_indicator[label == -1] = 1

    # in the postprocessor we assume ID samples will have larger
    # "conf" values than OOD samples
    # therefore here we need to negate the "conf" values
    
    fpr_list, tpr_list, thresholds = metrics.roc_curve(ood_indicator, -conf)
    fpr = fpr_list[np.argmax(tpr_list >= tpr_th)]

    precision_in, recall_in, thresholds_in \
        = metrics.precision_recall_curve(1 - ood_indicator, conf)

    precision_out, recall_out, thresholds_out \
        = metrics.precision_recall_curve(ood_indicator, -conf)

    auroc = metrics.auc(fpr_list, tpr_list)
    aupr_in = metrics.auc(recall_in, precision_in)
    aupr_out = metrics.auc(recall_out, precision_out)

    return auroc, aupr_in, aupr_out, fpr


# ccr_fpr
def ccr_fpr(conf, fpr, pred, label):
    ind_conf = conf[label != -1]
    ind_pred = pred[label != -1]
    ind_label = label[label != -1]

    ood_conf = conf[label == -1]

    num_ind = len(ind_conf)
    num_ood = len(ood_conf)

    fp_num = int(np.ceil(fpr * num_ood))
    thresh = np.sort(ood_conf)[-fp_num]
    num_tp = np.sum((ind_conf > thresh) * (ind_pred == ind_label))
    ccr = num_tp / num_ind

    return ccr


def detection(ind_confidences,
              ood_confidences,
              n_iter=100000,
              return_data=False):
    # calculate the minimum detection error
    Y1 = ood_confidences
    X1 = ind_confidences

    start = np.min([np.min(X1), np.min(Y1)])
    end = np.max([np.max(X1), np.max(Y1)])
    gap = (end - start) / n_iter

    best_error = 1.0
    best_delta = None
    all_thresholds = []
    all_errors = []
    for delta in np.arange(start, end, gap):
        tpr = np.sum(np.sum(X1 < delta)) / np.float(len(X1))
        error2 = np.sum(np.sum(Y1 > delta)) / np.float(len(Y1))
        detection_error = (tpr + error2) / 2.0

        if return_data:
            all_thresholds.append(delta)
            all_errors.append(detection_error)

        if detection_error < best_error:
            best_error = np.minimum(best_error, detection_error)
            best_delta = delta

    if return_data:
        return best_error, best_delta, all_errors, all_thresholds
    else:
        return best_error, best_delta


In [8]:
# run
from transformers import BertTokenizer, BertForSequenceClassification, GPT2ForSequenceClassification, AutoTokenizer, AutoModelForSequenceClassification, GPT2Model, GPT2Config, GPT2Tokenizer
import torch
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader

tokenizer = GPT2Tokenizer.from_pretrained("/10TB_7/GROD/gpt2", padding=True, return_tensors='pt', do_lower_case=True)
tokenizer.pad_token = tokenizer.eos_token

model = GPT2ForSequenceClassification.from_pretrained("/10TB_7/GROD/gpt2", problem_type="multi_label_classification", num_labels=config['K'])
model.config.pad_token_id = model.config.eos_token_id
model.score = nn.Linear(model.config.hidden_size, config['K'], bias=True)

train_data = [{"tgt_text": example.tgt_text, "label": example.label} for example in dataset['train']]
val_data = [{"tgt_text": example.tgt_text, "label": example.label} for example in dataset['val']]
test_data = [{"tgt_text": example.tgt_text, "label": example.label} for example in dataset['test']]

train_dataloader = create_data_loader(train_data, tokenizer, batch_size=config['batch_size'], max_length=max_seq_length)
val_dataloader = create_test_data_loader(val_data, tokenizer, batch_size=config['batch_size'], max_length=max_seq_length)
test_dataloader = create_test_data_loader(test_data, tokenizer, batch_size=config['batch_size'], max_length=max_seq_length)

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at /10TB_7/GROD/gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
model = GPT2ForSequenceClassification.from_pretrained("/10TB_7/GROD/gpt2", problem_type="multi_label_classification", num_labels=config['K'])
model.config.pad_token_id = model.config.eos_token_id
model.score = nn.Linear(model.config.hidden_size, config['K'], bias=True)

# 训练模型
# import torch
# import torch.distributed as dist
# from torch.nn.parallel import DistributedDataParallel as DDP

device = torch.device('cuda:3' if torch.cuda.is_available() else 'cpu')
# dist.init_process_group(backend="nccl", init_method="env://")
# device = torch.distributed.get_rank()
# torch.cuda.set_device(device)
if torch.cuda.device_count() > 1:
    print("Using", torch.cuda.device_count(), "GPUs!")
    model = torch.nn.DataParallel(model)
model = model.to(device)
# model = DDP(model, device_ids=[device])

# print(model)

# feature = (net.module.transformer(input_ids, attention_mask)[0]).clip(max=self.percentile)
# output = net(input_ids, attention_mask).logits

In [9]:
# run
import torch
import torch.nn as nn
import torch.nn.functional as F

class GRODNet(nn.Module):
    def __init__(self, backbone, feat_dim, num_classes):
        super(GRODNet, self).__init__()
        
        self.backbone = backbone
        if hasattr(self.backbone, 'fc'):
            # remove fc otherwise ddp will
            # report unused params
            self.backbone.fc = nn.Identity()

        self.lda = LDA(n_components=feat_dim)
        self.pca = PCA(n_components=feat_dim)

        self.n_cls = num_classes
        self.head1 = nn.Linear(768, 2 * num_classes)
        self.head = nn.Linear(768, self.n_cls + 1)
        self.k = nn.Parameter(torch.tensor([0.1], dtype=torch.float32, requires_grad=True))

    def forward(self, x, y, attention): #x:data feature, y:label
        
        hidden_states = self.backbone.transformer(input_ids=x, attention_mask=attention)[0]
        
        feat = hidden_states[torch.arange(x.size(0), device=hidden_states.device), -1].squeeze()
        
        self.lda.fit(feat, y)
        X_lda = self.lda.transform(feat) #(b, feat_dim)
        
        self.pca.fit(feat)
        X_pca = self.pca.transform(feat)

        return feat, X_lda, X_pca
    def intermediate_forward(self, x, attention):
        hidden_states = self.backbone.transformer(input_ids=x, attention_mask=attention)[0]
        
        feat = hidden_states[torch.arange(x.size(0), device=hidden_states.device), -1].squeeze()
        
        output = self.head(feat)
        score = torch.softmax(output, dim=1)
        score0 = output[:,:-1]
        # score0 = torch.softmax(output[:,:-1], dim=1)
        conf = torch.max(score, dim=1)
        pred = torch.argmax(score, dim=1)
        conf0 = torch.max(score0, dim=1)
        pred0 = torch.argmax(score0, dim=1)
        for i in range(pred.size(0)):
            if pred[i] == output.size(1) - 1:
                # conf[i] = 0.1
                # pred[i] = 1
                score0[i] = 0.5 * torch.ones(score0.size(1)).to(x.device)
            # else:
                # conf[i] = conf0[i]   
        # return score0
        return torch.softmax(score0, dim=1)



class LDA(nn.Module):
    def __init__(self, n_components):
        super(LDA, self).__init__()
        self.n_components = n_components

    def fit(self, X, y):
        try:
            n_samples, n_features = X.shape
        except:
            n_features = X.shape[0]
        classes = torch.unique(y)
        n_classes = len(classes)
        
        means = torch.zeros(n_classes, n_features).to(X.device)
        for i, c in enumerate(classes):
            try:
                means[i] = torch.mean(X[y==c], dim=0)
            except:
                X = torch.unsqueeze(X, dim=0)
                means[i] = torch.mean(X[y==c], dim=0)
        
        overall_mean = torch.mean(X, dim=0)
        
        within_class_scatter = torch.zeros(n_features, n_features).to(X.device)
        for i, c in enumerate(classes):
            class_samples = X[y==c]
            deviation = class_samples - means[i]
            within_class_scatter += torch.mm(deviation.t(), deviation)
        
        between_class_scatter = torch.zeros(n_features, n_features).to(X.device)
        for i, c in enumerate(classes):
            n = len(X[y==c])
            mean_diff = (means[i] - overall_mean).unsqueeze(1)
            between_class_scatter += n * torch.mm(mean_diff, mean_diff.t())

        # torch.backends.cuda.preferred_linalg_library('magma')
        # print((torch.inverse(within_class_scatter) @ between_class_scatter).size()) #(768,768)
        eigenvalues, eigenvectors = torch.linalg.eigh(
        torch.inverse(within_class_scatter @ between_class_scatter  + 1e-2 * torch.eye((within_class_scatter @ between_class_scatter).size(0)).to(X.device)
        ))
        _, top_indices = torch.topk(eigenvalues, k=self.n_components, largest=True)
        self.components = eigenvectors[:, top_indices]

    def transform(self, X):
        return torch.mm(X, self.components)

class PCA(nn.Module):
    def __init__(self, n_components):
        super(PCA, self).__init__()
        self.n_components = n_components

    def fit(self, X):
        try:
            n_samples, n_features = X.shape
        except:
            n_samples = 1
        
        self.mean = torch.mean(X, dim=0)
        X_centered = X - self.mean
        
        covariance_matrix = torch.mm(X_centered.t(), X_centered) / max((n_samples - 1),1)
        
        eigenvalues, eigenvectors = torch.linalg.eigh(covariance_matrix)
        _, top_indices = torch.topk(eigenvalues, k=self.n_components, largest=True)
        self.components = eigenvectors[:, top_indices]

    def transform(self, X):
        X_centered = X - self.mean
        return torch.mm(X_centered, self.components)


In [10]:
# run grod v2
import faiss.contrib.torch_utils
import math
import time
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions import MultivariateNormal
from torch.utils.data import DataLoader, Dataset, Subset, TensorDataset
from tqdm import tqdm
from einops import repeat

torch.autograd.set_detect_anomaly(True)
class GRODTrainer_Soft_Label:
    def __init__(self, net: nn.Module, train_loader: DataLoader,
                 config) -> None:
        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        self.net = net.to(self.device)  
        self.train_loader = train_loader
        self.config = config

        self.n_cls = config['dataset']['num_classes']


        self.optimizer = torch.optim.AdamW(
            params=net.parameters(),
            lr=config['optimizer']['lr'],
            weight_decay=config['optimizer']['weight_decay'],
        )

        self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            self.optimizer, T_max = 10
        )

        self.head = self.net.head
        self.head1 = self.net.head1
        self.alpha = config['trainer']['alpha']
        self.nums_rounded = config['trainer']['nums_rounded']
        self.gamma = config['trainer']['gamma']
        self.stat_smooth = 0.3
        self.batch_size = config['dataset']['batch_size']
        self.threshold = 20
        
        self.k = self.net.k
        
        self.best_accuracy = 0.0  # Update best accuracy
        self.best_model_state = None  # Save current model state    

    def train(self, epochs):
        # adjust_learning_rate(self.config, self.optimizer, epoch_idx - 1)
        # self.net = nn.DataParallel(self.net)
        self.net.train()
        self.net.to(self.device)
        for epoch_idx in range(epochs):
            loss_avg = 0.0
            train_dataiter = iter(self.train_loader)
    
            sub_datasets_in_mu = torch.zeros((self.n_cls, 768)).to(self.device) #(K,f)
            dataset_in_mu = torch.zeros(768).to(self.device) #(f)
            dataset_in_cov = torch.zeros(768, 768).to(self.device)
            sub_datasets_in_cov = torch.zeros((self.n_cls, 768, 768)).to(self.device)
            sub_datasets_in_distances = torch.zeros(self.n_cls).to(self.device)
            
            # torch.autograd.detect_anomaly(True)        
            
            #### Warmup: first x step without lda ood, compute mu and cov for each class instead ###
            warmup = int(self.threshold * self.n_cls / self.batch_size)
            data_warmup = None
            print("Warmup...")
            if warmup == 0:
                pass
            else:
                for train_step in tqdm(range(1,
                                            warmup + 1),
                                    desc='Epoch {:03d}: '.format(epoch_idx),
                                    position=0,
                                    leave=True):
                    with torch.no_grad():
                        batch = next(train_dataiter)
                        data = batch['input_ids'].to(self.device)
                        target = batch['label'].to(self.device)        
                        attention_mask = batch['attention_mask'].to(self.device)  
                        
                        data_in, feat_lda, feat_pca = self.net(data, target, attention_mask)             
                        
                        if train_step == 1:
                            data_warmup = data_in
                        else:    
                            data_warmup = torch.cat((data_warmup, data_in), dim=0) 
            
            if warmup == 0:
                pass
            else:        
                dataset_in_mu = torch.mean(data_warmup, dim = 0)
                cov0 = torch.tensor(self.calculate_covariance_matrix(data_warmup).detach() + 1e-4 * torch.eye(dataset_in_mu.size(0)).to(self.device).detach(), dtype = torch.double)
                L = torch.linalg.cholesky(cov0).detach()
                L_inv = torch.linalg.inv(L).detach()
                dataset_in_cov = torch.tensor(torch.mm(L_inv.t(), L_inv).unsqueeze(0).detach(), dtype=torch.float)
                
                sub_datasets_in = [Subset(data_warmup, torch.where(target == i)[0]) for i in range(self.n_cls)]      
                    

                for i in range(len(sub_datasets_in)):
                    dataloader = DataLoader(sub_datasets_in[i], batch_size=int(self.threshold * self.n_cls), shuffle=False)
                    for batch in dataloader:
                        tensor_data_in = batch
                    
                        mean =  torch.mean(tensor_data_in, dim = 0)
                        cov0 = (self.calculate_covariance_matrix(tensor_data_in)+1e-4 * torch.eye(mean.size(0)).to(self.device)).detach()
                        L = torch.linalg.cholesky(cov0).detach()
                        L_inv = torch.linalg.inv(L).detach()

                        # Solve the inverse of a symmetric positive definite matrix A using the inverse of a lower triangular matrix
                        cov = torch.mm(L_inv.t(), L_inv)

                        sub_datasets_in_cov[i,:,:] = cov.detach()
                        sub_datasets_in_mu[i,:] = mean.detach()      
                        sub_datasets_in_distances[i] = torch.max(self.mahalanobis(tensor_data_in, sub_datasets_in_mu.clone(), sub_datasets_in_cov.clone())[:,i]).detach()                                              
            #### Warmup: first x step without lda ood, compute mu and cov for each class instead ###     
            
            self.net.train()
            for train_step in tqdm(range(warmup + 1,
                                        len(train_dataiter)),
                                desc='Epoch {:03d}: '.format(epoch_idx),
                                position=0,
                                leave=True):

                batch = next(train_dataiter)
                data = batch['input_ids'].to(self.device)
                target = batch['label'].to(self.device)        
                attention_mask = batch['attention_mask'].to(self.device)  
                
                data_in, feat_lda, feat_pca = self.net(data, target, attention_mask)    
                
                
                data = data_in
                data_in = data_in.detach()
                feat_lda = feat_lda.detach()
                feat_pca = feat_pca.detach()

                # generate rounded ood data
                sub_datasets_in = [Subset(data_in, torch.where(target == i)[0]) for i in range(self.n_cls)]
                sub_datasets_lda = [Subset(feat_lda, torch.where(target == i)[0]) for i in range(self.n_cls)]   
                
                # Count the number of samples in each sub-dataset
                dataset_lengths = torch.tensor([len(subset) for subset in sub_datasets_lda])
                mask = dataset_lengths > 2
                lda_class = len(dataset_lengths[mask])
                
                
                reshaped_rounded_data, dataset_in_mu = self.grod_generate_pca(data_in, feat_lda, feat_pca, train_step, dataset_in_mu)
                
                data = torch.cat((data, reshaped_rounded_data), dim = 0)
                
                if lda_class > 0:
                    reshaped_rounded_data, sub_datasets_in_mu, sub_datasets_in_cov, sub_datasets_in_distances = self.grod_generate_lda(feat_lda, sub_datasets_in, sub_datasets_lda, sub_datasets_in_mu, sub_datasets_in_cov, sub_datasets_in_distances, lda_class)
            
                    data = torch.cat((data, reshaped_rounded_data), dim = 0)
            
                data = torch.cat((data, reshaped_rounded_data), dim = 0)
                # print(reshaped_rounded_data.size())

                    

                data_add = data[data_in.size(0):]   
                # print(data_add.size())
                    
                
                distances = self.mahalanobis(data_add, sub_datasets_in_mu, sub_datasets_in_cov).to(self.device) #(n,k)
                
                # Calculate the minimum distance and corresponding category index of each sample point
                min_distances, min_distances_clas = torch.min(distances, dim=1)                       
                # Get the sub-dataset distance corresponding to each sample point
                sub_distances = sub_datasets_in_distances[min_distances_clas.to(self.device)]
                
                ### soft label of outliers ###
                target_add = torch.zeros((data_add.size(0), self.n_cls + 1)).to(self.device) #(n, K+1)
                extend = (sub_datasets_in_distances / (distances + 1e-3)).detach()
                extend = torch.where(torch.isnan(extend), torch.tensor(1e-5, dtype=torch.float32), extend)
                extend = torch.clamp(extend, -80, 80)
                target_add[:,:-1] = torch.exp(- (1 - extend))
                # extend_ood = torch.gather(extend, 1, min_distances_clas.unsqueeze(1)).squeeze(1).to(self.device)
                
                extend_ood, _ = torch.max(extend, dim=1)
                extend_ood = torch.clamp(extend_ood, -80, 80)
                # print(extend)
                assert not torch.isnan(extend).any(), "NaN values found in `extend`"
                assert not torch.isinf(extend).any(), "Inf values found in `extend`"
                assert not torch.isnan(min_distances_clas).any(), "NaN values found in `min_distances_clas`"
                target_add[:,-1] = torch.clamp(torch.exp(1 - extend_ood), -1e5, 1e5)
                # print(target_add[:,:-1], target_add[:,-1])
                # target_add1 = target_add.clone()
                # for i in range(target_add.size(0)):
                #     target_add1[i, :] = (target_add[i, :]-torch.min(target_add[i, :])) / (torch.max(target_add[i, :]) - torch.min(target_add[i, :]))
                ### soft label of outliers ###
                
                
                
                k_init = (torch.mean(min_distances / sub_distances) - 1) * 10
                
                mask = min_distances > (1 + k_init * self.k.to(self.device)[0]) * sub_distances
                # Use Boolean indexing to remove data points that meet a condition
                cleaned_data_add = data_add[mask.to(self.device)]
                cleaned_target_add = target_add[mask.to(self.device)]
                    
                if cleaned_data_add.size(0) > data_in.size(0) // self.n_cls + 2:
                    indices = torch.randperm(cleaned_data_add.size(0))[:(data_in.size(0) // self.n_cls + 2)].to(self.device)
                    cleaned_data_add_de = cleaned_data_add[indices]
                    cleaned_target_add_de = cleaned_target_add[indices]
                else: 
                    cleaned_data_add_de = cleaned_data_add
                    cleaned_target_add_de = cleaned_target_add
                    
                data = torch.cat((data[:data_in.size(0)], cleaned_data_add_de), dim = 0)
                
                target = F.one_hot(target, num_classes=self.n_cls + 1)
                
                target = torch.cat((target, cleaned_target_add_de), dim = 0)
                # print(target.size())
                
                    

                output = self.head(data)
                # print(output.size())
                # output = F.normalize(output, dim=1)
                loss1 = F.cross_entropy(output, target)

                label_matrix = output
                biclas = torch.zeros(label_matrix.size(0), 2)
                biclas[:,-1] = label_matrix[:,-1]
                biclas[:,0] = torch.sum(label_matrix[:,:-1],-1)
                
                label_biclas = torch.zeros(target.size(0), 2)
                label_biclas[:,-1] = target[:,-1]
                label_biclas[:,0] = torch.sum(target[:,:-1],-1)
                
                loss2 = F.cross_entropy(biclas.to(self.device), label_biclas.to(self.device))
                # print(loss1, loss2, cleaned_target_add_de) 
                loss = (1 - self.gamma) * loss1 + self.gamma * loss2 
                print(loss1, loss2)
                # loss = torch.where(torch.isnan(loss), torch.tensor(1e-5, dtype=torch.float32), loss)
                # loss = torch.clamp(loss, -1e5, 1e5)
                # backward
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()
                self.scheduler.step()

                # exponential moving average, show smooth values
                with torch.no_grad():
                    loss_avg = loss_avg * 0.8 + float(loss) * 0.2

            print(f'Epoch {epoch_idx + 1}/{len(train_dataiter)}, Average Training Loss: {loss_avg:.4f}')
            # accuracy = correct / total
            # print(f'Accuracy on validation set: {accuracy:.4f}')
            accuracy = self.test_model()  # Test model after each epoch
            if accuracy > self.best_accuracy or accuracy == self.best_accuracy:  # If current accuracy is better than best
                self.best_accuracy = accuracy  # Update best accuracy
                self.best_model_state = self.net.state_dict()  # Save current model state                
                
    def test_model(self):
        self.net.eval()  # Switch to evaluation mode
        self.net.to(self.device)
        correct = 0
        total = 0
        val_dataiter = iter(self.train_loader)
        with torch.no_grad():
            for train_step in tqdm(range(1, len(val_dataiter) + 1),
                                    position=0,
                                    leave=True):
                batch = next(val_dataiter)
                input_ids = batch['input_ids'].to(self.device)
                attention_mask = batch["attention_mask"].to(self.device)
                labels = batch['label'].to(self.device)
                
                data_in, feat_lda, feat_pca = self.net(input_ids, labels, attention_mask)
                outputs = self.head(data_in)
                # outputs = self.net.backbone(input_ids, attention_mask).logits
                predicted = torch.argmax(outputs[:,:-1], dim=1)
                # print(predicted, labels)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        accuracy = correct / total
        print(f'Accuracy on validation set: {accuracy:.4f}')
        return accuracy    

    def save_best_model(self, filename):
        if self.best_model_state is not None:
            torch.save(self.best_model_state, filename)

    
    def grod_generate_pca(self, data_in, feat_lda, feat_pca, train_step, dataset_in_mu):
        # generate PCA ood data
        argmax = torch.zeros(feat_pca.size()[1])
        argmax = torch.argmax(feat_pca,dim=0) #feat_dim
        
        argmin = torch.zeros(feat_pca.size()[1])
        argmin = torch.argmin(feat_pca,dim=0) #feat_dim

        for j in range(feat_pca.size()[1]):
            if j==0:
                pcadata_rounded_category = data_in[int(argmax[j].item())].unsqueeze(0)
                pcadata_rounded_category_1 = data_in[int(argmin[j].item())].unsqueeze(0)
            else:
                
                pcadata_rounded_category = torch.cat((pcadata_rounded_category, data_in[int(argmax[j].item())].unsqueeze(0)),dim=0)
                pcadata_rounded_category_1 = torch.cat((pcadata_rounded_category_1, data_in[int(argmin[j].item())].unsqueeze(0)),dim=0)
        
        ### mu and std smoothing ###
        if train_step == 1:
            dataset_in_mu = torch.mean(data_in, dim = 0)
        else:
            dataset_in_mu = (1 - self.stat_smooth) * torch.mean(data_in.detach().clone(), dim = 0) + self.stat_smooth * dataset_in_mu.detach().clone() 
        ### mu and std smoothing ###
        
        # dataset_in_mu = torch.mean(data_in.detach().clone(), dim = 0)
        
        dataset_in_mu =  repeat(dataset_in_mu.squeeze(), "f -> b f", 
                                        f = data_in.size(1), b = feat_lda.size()[1])
        # print(data_rounded_category.size())
        B = pcadata_rounded_category.detach()
        B_1 = pcadata_rounded_category_1.detach()
        # print(A.size())
        pcavector = F.normalize(B.clone() - dataset_in_mu, dim = 1)
        pcavector_1 = F.normalize(B_1.clone() - dataset_in_mu, dim = 1)
        B = torch.add(B, self.alpha * pcavector).detach() #(feat_dim, 768)
        B_1 = torch.add(B_1, self.alpha * pcavector_1).detach() #(feat_dim, 768)
        mean_matrix_0 = B
        mean_matrix_1 = B_1
        # print(A.size())
        mean_matrix = torch.cat((mean_matrix_0, mean_matrix_1), dim = 0)
        # mean_matrix = mean_matrix_0
        std = 1 / 3 * self.alpha
        mu = mean_matrix.T.unsqueeze(2).to(self.device) 
        rand_data = torch.randn(mean_matrix.size(1), self.nums_rounded).to(self.device) 
        gaussian_data = mu + std * rand_data.unsqueeze(1) #(768, num, nums_rounded)
        # print(gaussian_data.size())
        nums = gaussian_data.size(1)
        nums_rounded = gaussian_data.size(2)
        reshaped_rounded_data = gaussian_data.permute(1, 2, 0).contiguous().view(nums * nums_rounded, mean_matrix.size(1)) # (num* nums_rounded, 768)
        # print(reshaped_rounded_data.size(),data.size())
        return reshaped_rounded_data, dataset_in_mu
        
    def grod_generate_lda(self, feat_lda, sub_datasets_in, sub_datasets_lda, sub_datasets_in_mu, sub_datasets_in_cov, sub_datasets_in_distances, lda_class):   
        dataset_lengths = torch.tensor([len(subset) for subset in sub_datasets_lda])
        # Get the index of sub-datasets with the largest amount of data
        top_indices = sorted(range(len(dataset_lengths)), key=lambda i: dataset_lengths[i], reverse=True)[:lda_class]

        
        arg_max = torch.zeros((len(sub_datasets_lda), feat_lda.size()[1]))
        arg_min = torch.zeros((len(sub_datasets_lda), feat_lda.size()[1]))
        k = 0
        for i in top_indices:
            k = k + 1
            dataloader = DataLoader(sub_datasets_lda[i], batch_size=64, shuffle=False)
            for batch in dataloader:
                tensor_data_lda = batch
            dataloader = DataLoader(sub_datasets_in[i], batch_size=64, shuffle=False)
            for batch in dataloader:
                tensor_data_in = batch
            arg_max[i] = torch.argmax(tensor_data_lda, dim=0) #feat_dim                   
            arg_min[i] = torch.argmin(tensor_data_lda, dim=0) #feat_dim

            for j in range(feat_lda.size()[1]):
                # print(argmax[i][j].item())
                if k == 1 and j==0:
                    data_rounded_category = tensor_data_in[int(arg_max[i][j].item())].unsqueeze(0)
                    data_rounded_category_1 = tensor_data_in[int(arg_min[i][j].item())].unsqueeze(0)
                else:
                    data_rounded_category = torch.cat((data_rounded_category, tensor_data_in[int(arg_max[i][j].item())].unsqueeze(0)),dim=0)
                    data_rounded_category_1 = torch.cat((data_rounded_category_1, tensor_data_in[int(arg_min[i][j].item())].unsqueeze(0)),dim=0)

            if tensor_data_in.size(0) > 1:
                
                mean =  torch.mean(tensor_data_in, dim = 0)
                cov0 = (self.calculate_covariance_matrix(tensor_data_in)+1e-4 * torch.eye(mean.size(0)).to(self.device)).detach()
                L = torch.linalg.cholesky(cov0).detach()
                L_inv = torch.linalg.inv(L).detach()

                # Solve the inverse of a symmetric positive definite matrix A using the inverse of a lower triangular matrix
                cov = torch.mm(L_inv.t(), L_inv)
                ### mu and std smoothing ###
                if torch.max(torch.abs(sub_datasets_in_mu[i,:]))<1e-7:
                    sub_datasets_in_cov[i,:,:] = cov.detach()
                    sub_datasets_in_mu[i,:] = mean.detach()                        
                    sub_datasets_in_distances[i] = torch.max(self.mahalanobis(tensor_data_in, sub_datasets_in_mu.clone(), sub_datasets_in_cov.clone())[:,i]).detach()                                                                     
                else:
                    sub_datasets_in_cov[i,:,:] = (1 - self.stat_smooth) * cov.detach().clone().to(self.device) + self.stat_smooth * sub_datasets_in_cov[i,:,:].detach().clone()
                    sub_datasets_in_mu[i,:] = (1 - self.stat_smooth) * mean.detach().clone().to(self.device) + self.stat_smooth * sub_datasets_in_mu[i,:].detach().clone()
                    dists = self.mahalanobis(tensor_data_in, sub_datasets_in_mu.clone(), sub_datasets_in_cov.clone())[:,i]
                    dist = torch.max(dists)
                    sub_datasets_in_distances[i] = (1 - self.stat_smooth) * dist.to(self.device).detach().clone() + self.stat_smooth * sub_datasets_in_distances[i].detach().clone()
                ### mu and std smoothing ###
            
            
            sub_datasets_in_mean =  repeat(sub_datasets_in_mu.clone()[i,:], "f -> b f", 
                                        f = tensor_data_in.size(1), b = feat_lda.size()[1])
            
            A = data_rounded_category[-feat_lda.size()[1]:].detach()
            A_1 = data_rounded_category_1[- feat_lda.size()[1]:].detach()
            vector = F.normalize(A.to(self.device) - sub_datasets_in_mean.to(self.device), dim = 1)
            vector_1 = F.normalize(A_1.to(self.device) - sub_datasets_in_mean.to(self.device), dim = 1)
            A = A + self.alpha * vector.detach().to(self.device) #(feat_dim, 768)
            A_1 = A_1 + self.alpha * vector_1.detach().to(self.device) #(feat_dim, 768)
            if k == 1:
                mean_matrix_0 = A
                mean_matrix_1 = A_1
            else:
                mean_matrix_0 = torch.cat((mean_matrix_0, A), dim = 0) #(num, 768)
                mean_matrix_1 = torch.cat((mean_matrix_1, A_1), dim = 0) #(num, 768)
            mean_matrix = torch.cat((mean_matrix_0, mean_matrix_1), dim = 0)
            # print(mean_matrix.size())
            std = 1 / 3 * self.alpha
            mu = mean_matrix.T.unsqueeze(2).to(self.device) #(768,num,1)
            rand_data = torch.randn(mean_matrix.size(1), self.nums_rounded).to(self.device) #(768,nums_rounded)
            gaussian_data = mu + std * rand_data.unsqueeze(1) #(768, num, nums_rounded)
            # print(gaussian_data.size())
            nums = gaussian_data.size(1)
            nums_rounded = gaussian_data.size(2)
            reshaped_rounded_data = gaussian_data.permute(1, 2, 0).contiguous().view(nums * nums_rounded, mean_matrix.size(1)) # (num* nums_rounded, 768)
            
            return reshaped_rounded_data, sub_datasets_in_mu, sub_datasets_in_cov, sub_datasets_in_distances
    
    def mahalanobis(self, x, support_mean, inv_covmat): #(n,d), (k,d), (k,d,d)
        n = x.size(0)
        d = x.size(1)

        x = x.to(inv_covmat.device)
        support_mean = support_mean.to(inv_covmat.device)

        maha_dists = []
        for i in range(inv_covmat.size(0)):
            class_inv_cov = inv_covmat[i].detach()
            support_class = support_mean[i].detach()
        
            x_mu = x - support_class.unsqueeze(0).expand(n, d)            
            class_inv_cov = class_inv_cov.to(inv_covmat.device)

            # Mahalanobis distances
            left = torch.matmul(x_mu, class_inv_cov)
            # print(x_mu.size(), class_inv_cov.size(), left.size())
            mahal = torch.matmul(left, x_mu.t()).diagonal()
            maha_dists.append(mahal)

        return torch.stack(maha_dists).t()
    
    def calculate_covariance_matrix(self, data):
        mean = torch.mean(data, dim=0)
        mean = mean.unsqueeze(0).expand(data.size(0), data.size(1))
        centered_data = data - mean

        covariance_matrix = torch.mm(centered_data.t(), centered_data) / (centered_data.size(0) - 1 + 1e-7)

        return covariance_matrix


In [11]:
yaml_file_path = 'grod copy.yml'
loaded_parameters = load_parameters_from_yaml(yaml_file_path)
config_grod = loaded_parameters
        
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')


# if torch.cuda.device_count() > 1:
#     print("Using", torch.cuda.device_count(), "GPUs!")
#     model = torch.nn.DataParallel(model)
    # model = torch.nn.DataParallel(model)
    
model_grod = GRODNet(model, 1, 10).to(device)

trainer = GRODTrainer_Soft_Label(model_grod, train_dataloader, config_grod)
trainer.train(config_grod['optimizer']['num_epochs']) 



# Save the best model state
trainer.save_best_model('best_model_grod2_clinc_GPT_0.001.ckpt') 

Warmup...


Epoch 000: 100%|██████████| 3/3 [00:00<00:00,  7.29it/s]
  cov0 = torch.tensor(self.calculate_covariance_matrix(data_warmup).detach() + 1e-4 * torch.eye(dataset_in_mu.size(0)).to(self.device).detach(), dtype = torch.double)
  dataset_in_cov = torch.tensor(torch.mm(L_inv.t(), L_inv).unsqueeze(0).detach(), dtype=torch.float)
Epoch 000:   0%|          | 0/231 [00:00<?, ?it/s]

tensor(5.7240, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0174, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:   1%|          | 2/231 [00:00<01:49,  2.10it/s]

tensor(5.3531, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0195, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:   1%|▏         | 3/231 [00:01<01:42,  2.22it/s]

tensor(4.3963, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0601, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:   2%|▏         | 4/231 [00:01<01:39,  2.28it/s]

tensor(3.4005, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0910, device='cuda:0', grad_fn=<DivBackward1>)
tensor(2.9192, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0727, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:   3%|▎         | 6/231 [00:02<01:36,  2.33it/s]

tensor(2.9548, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.1257, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:   3%|▎         | 7/231 [00:03<01:35,  2.35it/s]

tensor(2.8533, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.1130, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:   3%|▎         | 8/231 [00:03<01:34,  2.37it/s]

tensor(2.7051, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.1373, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:   4%|▍         | 9/231 [00:03<01:33,  2.36it/s]

tensor(2.6435, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.1241, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:   4%|▍         | 10/231 [00:04<01:33,  2.36it/s]

tensor(2.6651, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.1231, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:   5%|▍         | 11/231 [00:04<01:33,  2.36it/s]

tensor(2.6345, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.1038, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:   5%|▌         | 12/231 [00:05<01:32,  2.36it/s]

tensor(2.7494, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.1078, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:   6%|▌         | 13/231 [00:05<01:32,  2.37it/s]

tensor(2.8541, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.1083, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:   6%|▌         | 14/231 [00:06<01:31,  2.37it/s]

tensor(2.6566, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.1309, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:   6%|▋         | 15/231 [00:06<01:30,  2.39it/s]

tensor(2.5283, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.1327, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:   7%|▋         | 16/231 [00:06<01:29,  2.39it/s]

tensor(2.4713, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.1372, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:   7%|▋         | 17/231 [00:07<01:30,  2.38it/s]

tensor(2.3677, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.1462, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:   8%|▊         | 18/231 [00:07<01:29,  2.38it/s]

tensor(2.3794, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.1457, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:   8%|▊         | 19/231 [00:08<01:29,  2.38it/s]

tensor(2.4860, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.1530, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:   9%|▊         | 20/231 [00:08<01:28,  2.37it/s]

tensor(2.3920, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.1696, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:   9%|▉         | 21/231 [00:08<01:28,  2.37it/s]

tensor(2.4208, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.1537, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  10%|▉         | 22/231 [00:09<01:28,  2.37it/s]

tensor(2.3681, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.1648, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  10%|▉         | 23/231 [00:09<01:27,  2.37it/s]

tensor(2.3418, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.1462, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  10%|█         | 24/231 [00:10<01:27,  2.38it/s]

tensor(2.3207, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.1496, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  11%|█         | 25/231 [00:10<01:26,  2.37it/s]

tensor(2.2960, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.1401, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  11%|█▏        | 26/231 [00:11<01:26,  2.37it/s]

tensor(2.3341, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.1319, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  12%|█▏        | 27/231 [00:11<01:26,  2.37it/s]

tensor(2.4277, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.1363, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  12%|█▏        | 28/231 [00:11<01:25,  2.39it/s]

tensor(2.3165, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.1277, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  13%|█▎        | 29/231 [00:12<01:24,  2.40it/s]

tensor(2.3581, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.1375, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  13%|█▎        | 30/231 [00:12<01:24,  2.39it/s]

tensor(2.3592, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.1255, device='cuda:0', grad_fn=<DivBackward1>)
tensor(2.3436, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.1338, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  14%|█▍        | 32/231 [00:13<01:23,  2.38it/s]

tensor(2.3455, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.1296, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  14%|█▍        | 33/231 [00:14<01:22,  2.39it/s]

tensor(2.4128, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.1403, device='cuda:0', grad_fn=<DivBackward1>)
tensor(2.3227, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.1303, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  15%|█▌        | 35/231 [00:14<01:22,  2.38it/s]

tensor(2.3548, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.1287, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  16%|█▌        | 36/231 [00:15<01:21,  2.39it/s]

tensor(2.3494, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.1242, device='cuda:0', grad_fn=<DivBackward1>)
tensor(2.3145, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.1189, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  16%|█▌        | 37/231 [00:15<01:22,  2.35it/s]

tensor(2.3713, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.1207, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  16%|█▋        | 38/231 [00:16<01:22,  2.35it/s]

tensor(2.3644, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.1153, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  17%|█▋        | 40/231 [00:16<01:21,  2.35it/s]

tensor(2.3464, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.1102, device='cuda:0', grad_fn=<DivBackward1>)
tensor(2.3389, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.1009, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  18%|█▊        | 42/231 [00:17<01:20,  2.36it/s]

tensor(2.3313, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0969, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  19%|█▊        | 43/231 [00:18<01:19,  2.35it/s]

tensor(2.3474, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0958, device='cuda:0', grad_fn=<DivBackward1>)
tensor(2.3316, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0876, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  19%|█▉        | 45/231 [00:19<01:19,  2.33it/s]

tensor(2.3453, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0888, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  20%|█▉        | 46/231 [00:19<01:19,  2.34it/s]

tensor(2.3307, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0880, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  20%|██        | 47/231 [00:19<01:18,  2.34it/s]

tensor(2.3086, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0856, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  21%|██        | 48/231 [00:20<01:18,  2.34it/s]

tensor(2.3802, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0789, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  21%|██        | 49/231 [00:20<01:17,  2.36it/s]

tensor(2.3175, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0801, device='cuda:0', grad_fn=<DivBackward1>)
tensor(2.3356, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0881, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  22%|██▏       | 51/231 [00:21<01:16,  2.37it/s]

tensor(2.3356, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0766, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  23%|██▎       | 52/231 [00:22<01:15,  2.36it/s]

tensor(2.3340, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0791, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  23%|██▎       | 53/231 [00:22<01:15,  2.37it/s]

tensor(2.3557, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0795, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  23%|██▎       | 54/231 [00:22<01:14,  2.37it/s]

tensor(2.3671, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0766, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  24%|██▍       | 55/231 [00:23<01:14,  2.37it/s]

tensor(2.3513, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0847, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  24%|██▍       | 56/231 [00:23<01:13,  2.37it/s]

tensor(2.3444, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0778, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  25%|██▍       | 57/231 [00:24<01:13,  2.37it/s]

tensor(2.3631, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0774, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  25%|██▌       | 58/231 [00:24<01:13,  2.36it/s]

tensor(2.3376, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0717, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  26%|██▌       | 59/231 [00:25<01:12,  2.36it/s]

tensor(2.3239, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0769, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  26%|██▌       | 60/231 [00:25<01:12,  2.36it/s]

tensor(2.3282, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0744, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  26%|██▋       | 61/231 [00:25<01:12,  2.36it/s]

tensor(2.3414, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0685, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  27%|██▋       | 62/231 [00:26<01:11,  2.36it/s]

tensor(2.3262, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0729, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  27%|██▋       | 63/231 [00:26<01:11,  2.36it/s]

tensor(2.3164, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0678, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  28%|██▊       | 64/231 [00:27<01:10,  2.36it/s]

tensor(2.3094, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0670, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  28%|██▊       | 65/231 [00:27<01:10,  2.36it/s]

tensor(2.3013, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0632, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  29%|██▊       | 66/231 [00:28<01:09,  2.37it/s]

tensor(2.3289, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0617, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  29%|██▉       | 67/231 [00:28<01:09,  2.36it/s]

tensor(2.3152, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0583, device='cuda:0', grad_fn=<DivBackward1>)
tensor(2.2880, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0618, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  30%|██▉       | 69/231 [00:29<01:15,  2.15it/s]

tensor(2.3316, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0640, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  30%|███       | 70/231 [00:29<01:12,  2.23it/s]

tensor(2.3343, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0637, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  31%|███       | 71/231 [00:30<01:10,  2.26it/s]

tensor(2.2963, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0590, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  31%|███       | 72/231 [00:30<01:09,  2.29it/s]

tensor(2.3283, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0623, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  32%|███▏      | 73/231 [00:31<01:08,  2.31it/s]

tensor(2.3041, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0576, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  32%|███▏      | 74/231 [00:31<01:06,  2.35it/s]

tensor(2.3224, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0575, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  32%|███▏      | 75/231 [00:31<01:06,  2.36it/s]

tensor(2.2978, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0821, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  33%|███▎      | 76/231 [00:32<01:05,  2.36it/s]

tensor(2.3128, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0592, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  33%|███▎      | 77/231 [00:32<01:05,  2.36it/s]

tensor(2.2918, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0581, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  34%|███▍      | 78/231 [00:33<01:04,  2.36it/s]

tensor(2.2803, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0568, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  34%|███▍      | 79/231 [00:33<01:04,  2.35it/s]

tensor(2.3105, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0553, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  35%|███▍      | 80/231 [00:34<01:04,  2.36it/s]

tensor(2.3002, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0546, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  35%|███▌      | 81/231 [00:34<01:03,  2.36it/s]

tensor(2.3265, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0515, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  35%|███▌      | 82/231 [00:34<01:03,  2.36it/s]

tensor(2.3016, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0488, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  36%|███▌      | 83/231 [00:35<01:02,  2.36it/s]

tensor(2.3309, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0529, device='cuda:0', grad_fn=<DivBackward1>)
tensor(2.3054, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0467, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  37%|███▋      | 85/231 [00:36<01:02,  2.35it/s]

tensor(2.2731, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0412, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  37%|███▋      | 86/231 [00:36<01:01,  2.35it/s]

tensor(2.2725, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0400, device='cuda:0', grad_fn=<DivBackward1>)
tensor(2.2846, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0426, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  38%|███▊      | 87/231 [00:37<01:01,  2.35it/s]

tensor(2.3064, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0434, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  38%|███▊      | 88/231 [00:37<01:00,  2.35it/s]

tensor(2.2684, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0398, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  39%|███▉      | 90/231 [00:38<01:00,  2.34it/s]

tensor(2.3016, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0435, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  39%|███▉      | 91/231 [00:38<00:59,  2.34it/s]

tensor(2.2780, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0415, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  40%|███▉      | 92/231 [00:39<00:59,  2.34it/s]

tensor(2.2906, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0420, device='cuda:0', grad_fn=<DivBackward1>)
tensor(2.3124, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0405, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  41%|████      | 94/231 [00:40<00:58,  2.35it/s]

tensor(2.2716, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0426, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  41%|████      | 95/231 [00:40<00:57,  2.35it/s]

tensor(2.3114, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0430, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  42%|████▏     | 96/231 [00:40<00:57,  2.35it/s]

tensor(2.2815, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0426, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  42%|████▏     | 97/231 [00:41<00:56,  2.36it/s]

tensor(2.2807, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0398, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  42%|████▏     | 98/231 [00:41<00:56,  2.36it/s]

tensor(2.2815, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0392, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  43%|████▎     | 99/231 [00:42<00:55,  2.36it/s]

tensor(2.2361, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0377, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  43%|████▎     | 100/231 [00:42<00:55,  2.36it/s]

tensor(2.2420, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0374, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  44%|████▎     | 101/231 [00:42<00:55,  2.36it/s]

tensor(2.2807, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0372, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  44%|████▍     | 102/231 [00:43<00:54,  2.36it/s]

tensor(2.2678, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0343, device='cuda:0', grad_fn=<DivBackward1>)
tensor(2.1995, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0339, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  45%|████▍     | 103/231 [00:43<00:54,  2.35it/s]

tensor(2.2628, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0323, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  45%|████▌     | 105/231 [00:44<00:53,  2.35it/s]

tensor(2.2574, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0336, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  46%|████▌     | 106/231 [00:45<00:53,  2.35it/s]

tensor(2.2239, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0332, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  46%|████▋     | 107/231 [00:45<00:52,  2.35it/s]

tensor(2.2131, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0322, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  47%|████▋     | 108/231 [00:45<00:52,  2.36it/s]

tensor(2.2383, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0395, device='cuda:0', grad_fn=<DivBackward1>)
tensor(2.2418, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0333, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  47%|████▋     | 109/231 [00:46<00:51,  2.35it/s]

tensor(2.2507, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0331, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  48%|████▊     | 111/231 [00:47<00:51,  2.35it/s]

tensor(2.2389, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0316, device='cuda:0', grad_fn=<DivBackward1>)
tensor(2.2217, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0293, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  49%|████▉     | 113/231 [00:48<00:50,  2.35it/s]

tensor(2.2109, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0314, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  49%|████▉     | 114/231 [00:48<00:49,  2.35it/s]

tensor(2.2238, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0326, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  50%|████▉     | 115/231 [00:48<00:49,  2.35it/s]

tensor(2.2791, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0308, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  50%|█████     | 116/231 [00:49<00:48,  2.35it/s]

tensor(2.2117, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0325, device='cuda:0', grad_fn=<DivBackward1>)
tensor(2.1943, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0283, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  51%|█████     | 118/231 [00:50<00:48,  2.35it/s]

tensor(2.2367, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0304, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  52%|█████▏    | 119/231 [00:50<00:47,  2.35it/s]

tensor(2.1615, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0295, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  52%|█████▏    | 120/231 [00:51<00:47,  2.35it/s]

tensor(2.1745, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0264, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  52%|█████▏    | 121/231 [00:51<00:46,  2.34it/s]

tensor(2.2082, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0288, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  53%|█████▎    | 122/231 [00:51<00:46,  2.36it/s]

tensor(2.1844, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0280, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  53%|█████▎    | 123/231 [00:52<00:45,  2.35it/s]

tensor(2.1630, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0244, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  54%|█████▎    | 124/231 [00:52<00:45,  2.35it/s]

tensor(2.1266, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0241, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  54%|█████▍    | 125/231 [00:53<00:44,  2.36it/s]

tensor(2.1161, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0266, device='cuda:0', grad_fn=<DivBackward1>)
tensor(2.1985, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0316, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  55%|█████▍    | 126/231 [00:53<00:44,  2.35it/s]

tensor(2.1629, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0209, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  55%|█████▍    | 127/231 [00:54<00:44,  2.35it/s]

tensor(2.1722, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0237, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  56%|█████▌    | 129/231 [00:54<00:43,  2.36it/s]

tensor(2.1016, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0229, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  56%|█████▋    | 130/231 [00:55<00:42,  2.36it/s]

tensor(2.1714, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0233, device='cuda:0', grad_fn=<DivBackward1>)
tensor(2.1503, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0215, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  57%|█████▋    | 131/231 [00:55<00:42,  2.36it/s]

tensor(2.0992, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0254, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  57%|█████▋    | 132/231 [00:56<00:42,  2.36it/s]

tensor(2.1894, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0293, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  58%|█████▊    | 133/231 [00:56<00:41,  2.36it/s]

tensor(2.0670, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0233, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  58%|█████▊    | 135/231 [00:57<00:40,  2.35it/s]

tensor(2.1528, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0232, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  59%|█████▉    | 136/231 [00:57<00:40,  2.36it/s]

tensor(2.1293, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0200, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  59%|█████▉    | 137/231 [00:58<00:39,  2.36it/s]

tensor(2.1397, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0206, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  60%|█████▉    | 138/231 [00:58<00:39,  2.36it/s]

tensor(2.1632, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0213, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  60%|██████    | 139/231 [00:59<00:38,  2.37it/s]

tensor(2.1249, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0180, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  61%|██████    | 140/231 [00:59<00:38,  2.37it/s]

tensor(2.0678, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0154, device='cuda:0', grad_fn=<DivBackward1>)
tensor(2.0925, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0168, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  61%|██████▏   | 142/231 [01:00<00:37,  2.36it/s]

tensor(2.0500, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0141, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  62%|██████▏   | 143/231 [01:00<00:37,  2.36it/s]

tensor(2.0020, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0115, device='cuda:0', grad_fn=<DivBackward1>)
tensor(2.0545, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0099, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  62%|██████▏   | 144/231 [01:01<00:36,  2.36it/s]

tensor(1.9571, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0081, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  63%|██████▎   | 146/231 [01:02<00:36,  2.36it/s]

tensor(2.0136, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0066, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  64%|██████▎   | 147/231 [01:02<00:35,  2.36it/s]

tensor(1.9879, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0081, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  64%|██████▍   | 148/231 [01:02<00:35,  2.36it/s]

tensor(2.0471, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0058, device='cuda:0', grad_fn=<DivBackward1>)
tensor(2.1415, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0078, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  65%|██████▍   | 149/231 [01:03<00:34,  2.35it/s]

tensor(2.0711, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0079, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  65%|██████▍   | 150/231 [01:03<00:34,  2.35it/s]

tensor(2.0657, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0142, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  65%|██████▌   | 151/231 [01:04<00:34,  2.35it/s]

tensor(2.0160, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0191, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  66%|██████▌   | 153/231 [01:05<00:33,  2.35it/s]

tensor(1.9702, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0092, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  67%|██████▋   | 154/231 [01:05<00:32,  2.35it/s]

tensor(1.9070, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0178, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  67%|██████▋   | 155/231 [01:05<00:32,  2.36it/s]

tensor(2.0641, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0096, device='cuda:0', grad_fn=<DivBackward1>)
tensor(2.0124, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0081, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  68%|██████▊   | 156/231 [01:06<00:31,  2.36it/s]

tensor(1.9871, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0143, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  68%|██████▊   | 157/231 [01:06<00:31,  2.35it/s]

tensor(2.0832, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0090, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  68%|██████▊   | 158/231 [01:07<00:31,  2.35it/s]

tensor(2.0790, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0105, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  69%|██████▉   | 160/231 [01:08<00:29,  2.38it/s]

tensor(1.9128, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0093, device='cuda:0', grad_fn=<DivBackward1>)
tensor(1.9306, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0084, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  70%|███████   | 162/231 [01:08<00:29,  2.37it/s]

tensor(1.9559, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0088, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  71%|███████   | 163/231 [01:09<00:28,  2.39it/s]

tensor(1.8670, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0077, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  71%|███████   | 164/231 [01:09<00:27,  2.40it/s]

tensor(1.8273, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0065, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  71%|███████▏  | 165/231 [01:10<00:27,  2.38it/s]

tensor(1.8690, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0082, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  72%|███████▏  | 166/231 [01:10<00:27,  2.37it/s]

tensor(1.9469, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0088, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  72%|███████▏  | 167/231 [01:10<00:26,  2.39it/s]

tensor(1.9633, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0056, device='cuda:0', grad_fn=<DivBackward1>)
tensor(1.9178, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0062, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  73%|███████▎  | 168/231 [01:11<00:26,  2.40it/s]

tensor(2.0459, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0056, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  73%|███████▎  | 169/231 [01:11<00:25,  2.40it/s]

tensor(1.9328, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0069, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  74%|███████▎  | 170/231 [01:12<00:25,  2.39it/s]

tensor(1.8832, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0062, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  74%|███████▍  | 171/231 [01:12<00:25,  2.37it/s]

tensor(1.9122, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0046, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  74%|███████▍  | 172/231 [01:13<00:24,  2.37it/s]

tensor(1.9045, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0072, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  75%|███████▌  | 174/231 [01:13<00:23,  2.40it/s]

tensor(1.9215, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0075, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  76%|███████▌  | 175/231 [01:14<00:23,  2.39it/s]

tensor(1.9969, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0062, device='cuda:0', grad_fn=<DivBackward1>)
tensor(1.8142, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0059, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  76%|███████▌  | 176/231 [01:14<00:23,  2.38it/s]

tensor(1.8202, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0053, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  77%|███████▋  | 178/231 [01:15<00:22,  2.36it/s]

tensor(1.8683, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0043, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  77%|███████▋  | 179/231 [01:16<00:22,  2.36it/s]

tensor(1.8108, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0047, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  78%|███████▊  | 180/231 [01:16<00:21,  2.38it/s]

tensor(1.7884, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0054, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  78%|███████▊  | 181/231 [01:16<00:20,  2.39it/s]

tensor(1.8557, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0074, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  79%|███████▉  | 182/231 [01:17<00:20,  2.40it/s]

tensor(1.7319, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0059, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  79%|███████▉  | 183/231 [01:17<00:19,  2.41it/s]

tensor(1.6986, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0074, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  80%|███████▉  | 184/231 [01:18<00:19,  2.42it/s]

tensor(1.6521, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0114, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  80%|████████  | 185/231 [01:18<00:18,  2.42it/s]

tensor(1.5467, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0081, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  81%|████████  | 186/231 [01:18<00:18,  2.42it/s]

tensor(1.5710, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0056, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  81%|████████  | 187/231 [01:19<00:18,  2.43it/s]

tensor(1.6025, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0132, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  81%|████████▏ | 188/231 [01:19<00:17,  2.43it/s]

tensor(1.5298, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0047, device='cuda:0', grad_fn=<DivBackward1>)
tensor(1.5778, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0073, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  82%|████████▏ | 189/231 [01:20<00:17,  2.40it/s]

tensor(1.6277, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0060, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  82%|████████▏ | 190/231 [01:20<00:17,  2.40it/s]

tensor(1.6582, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0149, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  83%|████████▎ | 192/231 [01:21<00:16,  2.40it/s]

tensor(1.7085, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0059, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  84%|████████▎ | 193/231 [01:21<00:15,  2.40it/s]

tensor(1.6110, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0093, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  84%|████████▍ | 194/231 [01:22<00:15,  2.40it/s]

tensor(1.6227, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0065, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  84%|████████▍ | 195/231 [01:22<00:15,  2.38it/s]

tensor(1.5160, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0049, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  85%|████████▍ | 196/231 [01:23<00:14,  2.40it/s]

tensor(1.5153, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0195, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  85%|████████▌ | 197/231 [01:23<00:14,  2.41it/s]

tensor(1.6312, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0037, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  86%|████████▌ | 198/231 [01:23<00:13,  2.41it/s]

tensor(1.6829, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0062, device='cuda:0', grad_fn=<DivBackward1>)
tensor(1.6520, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0044, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  87%|████████▋ | 200/231 [01:24<00:12,  2.40it/s]

tensor(1.5530, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0068, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  87%|████████▋ | 201/231 [01:25<00:12,  2.40it/s]

tensor(1.3838, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0514, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  87%|████████▋ | 202/231 [01:25<00:12,  2.41it/s]

tensor(1.4608, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0076, device='cuda:0', grad_fn=<DivBackward1>)
tensor(1.2791, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0138, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  88%|████████▊ | 204/231 [01:26<00:11,  2.41it/s]

tensor(1.5355, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0061, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  89%|████████▊ | 205/231 [01:26<00:10,  2.41it/s]

tensor(1.3838, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0059, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  89%|████████▉ | 206/231 [01:27<00:10,  2.42it/s]

tensor(1.3466, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0096, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  90%|████████▉ | 207/231 [01:27<00:09,  2.42it/s]

tensor(1.2705, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0079, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  90%|█████████ | 208/231 [01:28<00:09,  2.43it/s]

tensor(1.3203, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0047, device='cuda:0', grad_fn=<DivBackward1>)
tensor(1.2391, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0116, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  91%|█████████ | 210/231 [01:28<00:08,  2.41it/s]

tensor(1.1211, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0048, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  91%|█████████▏| 211/231 [01:29<00:08,  2.42it/s]

tensor(1.3560, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0216, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  92%|█████████▏| 212/231 [01:29<00:07,  2.43it/s]

tensor(1.3108, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0064, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  92%|█████████▏| 213/231 [01:30<00:07,  2.42it/s]

tensor(1.2821, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0039, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  93%|█████████▎| 214/231 [01:30<00:07,  2.42it/s]

tensor(1.3111, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0057, device='cuda:0', grad_fn=<DivBackward1>)
tensor(1.1163, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0036, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  94%|█████████▎| 216/231 [01:31<00:06,  2.42it/s]

tensor(1.0430, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0078, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  94%|█████████▍| 217/231 [01:31<00:06,  2.20it/s]

tensor(1.1793, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0091, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  94%|█████████▍| 218/231 [01:32<00:05,  2.25it/s]

tensor(1.0283, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0126, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  95%|█████████▍| 219/231 [01:32<00:05,  2.30it/s]

tensor(0.7988, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0033, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  95%|█████████▌| 220/231 [01:33<00:04,  2.33it/s]

tensor(1.1043, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0144, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  96%|█████████▌| 221/231 [01:33<00:04,  2.35it/s]

tensor(0.8251, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0679, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  96%|█████████▌| 222/231 [01:34<00:03,  2.36it/s]

tensor(0.8472, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0010, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  97%|█████████▋| 223/231 [01:34<00:03,  2.39it/s]

tensor(0.7466, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0087, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  97%|█████████▋| 224/231 [01:34<00:02,  2.40it/s]

tensor(0.9878, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0090, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.8936, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0041, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  98%|█████████▊| 226/231 [01:35<00:02,  2.41it/s]

tensor(0.9005, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0141, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  98%|█████████▊| 227/231 [01:36<00:01,  2.41it/s]

tensor(0.6042, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0102, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  99%|█████████▊| 228/231 [01:36<00:01,  2.42it/s]

tensor(0.7330, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0029, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000:  99%|█████████▉| 229/231 [01:36<00:00,  2.41it/s]

tensor(0.6122, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0185, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000: 100%|█████████▉| 230/231 [01:37<00:00,  2.42it/s]

tensor(0.8250, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0173, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.5845, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0361, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 000: 100%|██████████| 231/231 [01:37<00:00,  2.36it/s]


Epoch 1/235, Average Training Loss: 0.6853


100%|██████████| 235/235 [00:21<00:00, 10.88it/s]


Accuracy on validation set: 0.8245
Warmup...


Epoch 001: 100%|██████████| 3/3 [00:00<00:00, 10.64it/s]
Epoch 001:   0%|          | 1/231 [00:00<01:35,  2.40it/s]

tensor(0.6327, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0043, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:   1%|          | 2/231 [00:00<01:34,  2.42it/s]

tensor(0.8402, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0315, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:   1%|▏         | 3/231 [00:01<01:34,  2.41it/s]

tensor(0.9997, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0499, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:   2%|▏         | 4/231 [00:01<01:34,  2.41it/s]

tensor(0.6534, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0288, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.7451, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0178, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:   2%|▏         | 5/231 [00:02<01:33,  2.41it/s]

tensor(0.9336, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0656, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:   3%|▎         | 7/231 [00:02<01:33,  2.38it/s]

tensor(0.7708, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.1601, device='cuda:0', grad_fn=<DivBackward1>)
tensor(1.0467, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0151, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:   4%|▍         | 9/231 [00:03<01:32,  2.39it/s]

tensor(0.6057, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0019, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.7532, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0027, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:   4%|▍         | 10/231 [00:04<01:32,  2.40it/s]

tensor(0.7868, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0107, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:   5%|▍         | 11/231 [00:04<01:31,  2.40it/s]

tensor(0.8564, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0074, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:   5%|▌         | 12/231 [00:05<01:31,  2.41it/s]

tensor(0.6663, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0042, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:   6%|▌         | 14/231 [00:05<01:29,  2.41it/s]

tensor(0.6491, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0103, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.5687, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0016, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:   7%|▋         | 16/231 [00:06<01:29,  2.40it/s]

tensor(0.6368, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0055, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.5492, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0029, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:   7%|▋         | 17/231 [00:07<01:29,  2.40it/s]

tensor(0.4448, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0018, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:   8%|▊         | 19/231 [00:07<01:28,  2.38it/s]

tensor(0.5928, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0206, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.5851, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:   9%|▉         | 21/231 [00:08<01:27,  2.41it/s]

tensor(0.4680, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0016, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.5884, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0299, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  10%|▉         | 22/231 [00:09<01:26,  2.41it/s]

tensor(0.4941, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0047, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  10%|█         | 24/231 [00:09<01:25,  2.42it/s]

tensor(0.5974, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0010, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.5372, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  11%|█▏        | 26/231 [00:10<01:24,  2.42it/s]

tensor(0.3630, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0010, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  12%|█▏        | 27/231 [00:11<01:24,  2.43it/s]

tensor(0.5756, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  12%|█▏        | 28/231 [00:11<01:23,  2.42it/s]

tensor(0.4282, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  13%|█▎        | 29/231 [00:12<01:23,  2.42it/s]

tensor(0.6992, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0013, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  13%|█▎        | 30/231 [00:12<01:22,  2.42it/s]

tensor(0.5956, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.4433, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  13%|█▎        | 31/231 [00:12<01:23,  2.40it/s]

tensor(0.4688, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  14%|█▍        | 32/231 [00:13<01:22,  2.40it/s]

tensor(0.5336, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  15%|█▍        | 34/231 [00:14<01:22,  2.39it/s]

tensor(0.6645, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  15%|█▌        | 35/231 [00:14<01:21,  2.40it/s]

tensor(0.4501, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0009, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  16%|█▌        | 36/231 [00:14<01:21,  2.41it/s]

tensor(0.2675, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  16%|█▌        | 37/231 [00:15<01:20,  2.40it/s]

tensor(0.5229, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0013, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  16%|█▋        | 38/231 [00:15<01:20,  2.39it/s]

tensor(0.3138, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  17%|█▋        | 39/231 [00:16<01:19,  2.41it/s]

tensor(0.3490, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0026, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  17%|█▋        | 40/231 [00:16<01:19,  2.41it/s]

tensor(0.4312, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0022, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.4353, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0026, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  18%|█▊        | 42/231 [00:17<01:18,  2.42it/s]

tensor(0.6299, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.4849, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  19%|█▊        | 43/231 [00:17<01:17,  2.42it/s]

tensor(0.4978, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.6063e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  19%|█▉        | 44/231 [00:18<01:17,  2.42it/s]

tensor(0.2864, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  19%|█▉        | 45/231 [00:18<01:16,  2.42it/s]

tensor(0.5025, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0009, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  20%|██        | 47/231 [00:19<01:15,  2.43it/s]

tensor(0.2494, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  21%|██        | 48/231 [00:19<01:15,  2.43it/s]

tensor(0.4169, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.3235, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  21%|██        | 49/231 [00:20<01:15,  2.42it/s]

tensor(0.5014, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0010, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  22%|██▏       | 50/231 [00:20<01:14,  2.42it/s]

tensor(0.4229, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0014, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  22%|██▏       | 51/231 [00:21<01:14,  2.41it/s]

tensor(0.4955, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  23%|██▎       | 52/231 [00:21<01:14,  2.42it/s]

tensor(0.6109, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  23%|██▎       | 54/231 [00:22<01:13,  2.42it/s]

tensor(0.5823, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  24%|██▍       | 55/231 [00:22<01:12,  2.42it/s]

tensor(0.3915, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.7558e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  24%|██▍       | 56/231 [00:23<01:12,  2.43it/s]

tensor(0.3605, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0041, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  25%|██▍       | 57/231 [00:23<01:11,  2.43it/s]

tensor(0.3658, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  25%|██▌       | 58/231 [00:24<01:11,  2.43it/s]

tensor(0.4425, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.2364, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0040, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  26%|██▌       | 59/231 [00:24<01:10,  2.43it/s]

tensor(0.2503, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  26%|██▋       | 61/231 [00:25<01:10,  2.43it/s]

tensor(0.3640, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.0079e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  27%|██▋       | 62/231 [00:25<01:09,  2.43it/s]

tensor(0.5408, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.3278, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0010, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  27%|██▋       | 63/231 [00:26<01:09,  2.42it/s]

tensor(0.2274, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0010, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  28%|██▊       | 65/231 [00:26<01:08,  2.41it/s]

tensor(0.5709, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  29%|██▊       | 66/231 [00:27<01:08,  2.42it/s]

tensor(0.2546, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0022, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.3498, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0010, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  29%|██▉       | 68/231 [00:28<01:07,  2.42it/s]

tensor(0.2352, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  30%|██▉       | 69/231 [00:28<01:06,  2.43it/s]

tensor(0.4204, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0008, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  30%|███       | 70/231 [00:29<01:06,  2.42it/s]

tensor(0.3396, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  31%|███       | 71/231 [00:29<01:05,  2.43it/s]

tensor(0.5210, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.8047e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  31%|███       | 72/231 [00:29<01:05,  2.42it/s]

tensor(0.3198, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  32%|███▏      | 73/231 [00:30<01:05,  2.40it/s]

tensor(0.3313, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  32%|███▏      | 74/231 [00:30<01:05,  2.41it/s]

tensor(0.2990, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  32%|███▏      | 75/231 [00:31<01:04,  2.41it/s]

tensor(0.3442, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0256, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.3774, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0017, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  33%|███▎      | 76/231 [00:31<01:04,  2.41it/s]

tensor(0.3757, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0312, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  34%|███▍      | 78/231 [00:32<01:03,  2.42it/s]

tensor(0.5210, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0032, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.1759, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0061, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  34%|███▍      | 79/231 [00:32<01:03,  2.40it/s]

tensor(0.3056, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0105, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  35%|███▌      | 81/231 [00:33<01:02,  2.38it/s]

tensor(0.3474, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0013, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  35%|███▌      | 82/231 [00:34<01:01,  2.41it/s]

tensor(0.3927, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0029, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  36%|███▌      | 83/231 [00:34<01:01,  2.40it/s]

tensor(0.2907, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0009, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  36%|███▋      | 84/231 [00:34<01:00,  2.41it/s]

tensor(0.2917, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0074, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  37%|███▋      | 85/231 [00:35<01:00,  2.41it/s]

tensor(0.4050, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  37%|███▋      | 86/231 [00:35<01:00,  2.40it/s]

tensor(0.2218, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0054, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  38%|███▊      | 87/231 [00:36<00:59,  2.41it/s]

tensor(0.2295, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  38%|███▊      | 88/231 [00:36<00:59,  2.40it/s]

tensor(0.2790, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0042, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  39%|███▊      | 89/231 [00:36<00:59,  2.38it/s]

tensor(0.2862, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0052, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  39%|███▉      | 90/231 [00:37<00:59,  2.37it/s]

tensor(0.1147, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  39%|███▉      | 91/231 [00:37<00:58,  2.38it/s]

tensor(0.2251, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  40%|███▉      | 92/231 [00:38<00:58,  2.38it/s]

tensor(0.3482, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  40%|████      | 93/231 [00:38<00:58,  2.36it/s]

tensor(0.3204, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  41%|████      | 94/231 [00:39<01:03,  2.16it/s]

tensor(0.2809, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  41%|████      | 95/231 [00:39<01:01,  2.21it/s]

tensor(0.2432, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.3181e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  42%|████▏     | 96/231 [00:40<01:00,  2.25it/s]

tensor(0.1288, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.3172e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  42%|████▏     | 97/231 [00:40<00:58,  2.28it/s]

tensor(0.3727, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.0038e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.2774, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.1138e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  43%|████▎     | 99/231 [00:41<00:56,  2.35it/s]

tensor(0.2787, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0530, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  43%|████▎     | 100/231 [00:41<00:55,  2.35it/s]

tensor(0.5336, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  44%|████▎     | 101/231 [00:42<00:55,  2.36it/s]

tensor(0.3667, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  44%|████▍     | 102/231 [00:42<00:54,  2.36it/s]

tensor(0.2288, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0063, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  45%|████▍     | 103/231 [00:42<00:53,  2.38it/s]

tensor(0.2648, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  45%|████▌     | 104/231 [00:43<00:53,  2.39it/s]

tensor(0.2528, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.3934e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  45%|████▌     | 105/231 [00:43<00:52,  2.38it/s]

tensor(0.1064, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0089, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  46%|████▌     | 106/231 [00:44<00:52,  2.37it/s]

tensor(0.2061, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  46%|████▋     | 107/231 [00:44<00:52,  2.37it/s]

tensor(0.3032, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  47%|████▋     | 108/231 [00:45<00:52,  2.36it/s]

tensor(0.2851, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0018, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  47%|████▋     | 109/231 [00:45<00:51,  2.36it/s]

tensor(0.1574, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0073, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  48%|████▊     | 110/231 [00:45<00:51,  2.35it/s]

tensor(0.3681, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0345, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  48%|████▊     | 111/231 [00:46<00:51,  2.35it/s]

tensor(0.3380, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  48%|████▊     | 112/231 [00:46<00:50,  2.36it/s]

tensor(0.2865, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  49%|████▉     | 113/231 [00:47<00:49,  2.36it/s]

tensor(0.1935, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0029, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  49%|████▉     | 114/231 [00:47<00:49,  2.36it/s]

tensor(0.1572, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0018, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  50%|████▉     | 115/231 [00:48<00:49,  2.36it/s]

tensor(0.2535, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0037, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  50%|█████     | 116/231 [00:48<00:48,  2.36it/s]

tensor(0.4016, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0034, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  51%|█████     | 117/231 [00:48<00:48,  2.36it/s]

tensor(0.2684, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0023, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  51%|█████     | 118/231 [00:49<00:47,  2.35it/s]

tensor(0.3345, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  52%|█████▏    | 119/231 [00:49<00:47,  2.36it/s]

tensor(0.2645, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0008, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  52%|█████▏    | 120/231 [00:50<00:47,  2.35it/s]

tensor(0.3122, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  52%|█████▏    | 121/231 [00:50<00:46,  2.36it/s]

tensor(0.1263, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  53%|█████▎    | 122/231 [00:51<00:46,  2.36it/s]

tensor(0.3533, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0010, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  53%|█████▎    | 123/231 [00:51<00:45,  2.35it/s]

tensor(0.1394, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  54%|█████▎    | 124/231 [00:51<00:45,  2.36it/s]

tensor(0.1297, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0013, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  54%|█████▍    | 125/231 [00:52<00:45,  2.35it/s]

tensor(0.1405, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0010, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  55%|█████▍    | 126/231 [00:52<00:44,  2.35it/s]

tensor(0.2357, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.0942e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  55%|█████▍    | 127/231 [00:53<00:44,  2.35it/s]

tensor(0.1988, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0012, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  55%|█████▌    | 128/231 [00:53<00:43,  2.36it/s]

tensor(0.1068, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0012, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  56%|█████▌    | 129/231 [00:53<00:42,  2.38it/s]

tensor(0.1561, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  56%|█████▋    | 130/231 [00:54<00:42,  2.39it/s]

tensor(0.1843, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  57%|█████▋    | 131/231 [00:54<00:41,  2.41it/s]

tensor(0.1749, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  57%|█████▋    | 132/231 [00:55<00:41,  2.41it/s]

tensor(0.1540, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0021, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.1782, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.4902e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  58%|█████▊    | 134/231 [00:56<00:40,  2.42it/s]

tensor(0.0668, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.5116e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  58%|█████▊    | 135/231 [00:56<00:39,  2.42it/s]

tensor(0.3821, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  59%|█████▉    | 136/231 [00:56<00:39,  2.42it/s]

tensor(0.2596, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0016, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.1819, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0017, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  59%|█████▉    | 137/231 [00:57<00:39,  2.41it/s]

tensor(0.2923, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0041, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  60%|██████    | 139/231 [00:58<00:38,  2.42it/s]

tensor(0.3444, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.1139e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.2931, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0027, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  61%|██████    | 141/231 [00:58<00:37,  2.42it/s]

tensor(0.0646, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  61%|██████▏   | 142/231 [00:59<00:36,  2.42it/s]

tensor(0.6068, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0012, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  62%|██████▏   | 143/231 [00:59<00:36,  2.42it/s]

tensor(0.2738, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  62%|██████▏   | 144/231 [01:00<00:35,  2.42it/s]

tensor(0.0943, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  63%|██████▎   | 145/231 [01:00<00:35,  2.43it/s]

tensor(0.2443, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.1844e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  63%|██████▎   | 146/231 [01:01<00:34,  2.43it/s]

tensor(0.2563, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  64%|██████▎   | 147/231 [01:01<00:34,  2.43it/s]

tensor(0.1410, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0029, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  64%|██████▍   | 148/231 [01:01<00:34,  2.43it/s]

tensor(0.2357, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.4674, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0008, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  65%|██████▍   | 150/231 [01:02<00:33,  2.42it/s]

tensor(0.2712, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.2285, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0016, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  66%|██████▌   | 152/231 [01:03<00:32,  2.40it/s]

tensor(0.2499, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0009, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  66%|██████▌   | 153/231 [01:03<00:32,  2.39it/s]

tensor(0.1471, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.4549e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  67%|██████▋   | 154/231 [01:04<00:32,  2.38it/s]

tensor(0.2917, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.1432, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  68%|██████▊   | 156/231 [01:05<00:31,  2.36it/s]

tensor(0.2116, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.1610e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.3195, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  68%|██████▊   | 158/231 [01:06<00:31,  2.35it/s]

tensor(0.3935, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  69%|██████▉   | 159/231 [01:06<00:30,  2.35it/s]

tensor(0.2398, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.2705, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0036, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  70%|██████▉   | 161/231 [01:07<00:29,  2.35it/s]

tensor(0.1364, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.2046, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  71%|███████   | 163/231 [01:08<00:28,  2.36it/s]

tensor(0.1557, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0017, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  71%|███████   | 164/231 [01:08<00:28,  2.36it/s]

tensor(0.2715, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.2072, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0058, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  71%|███████▏  | 165/231 [01:09<00:28,  2.36it/s]

tensor(0.2012, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0098, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  72%|███████▏  | 166/231 [01:09<00:27,  2.35it/s]

tensor(0.2622, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  72%|███████▏  | 167/231 [01:09<00:27,  2.35it/s]

tensor(0.3632, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  73%|███████▎  | 168/231 [01:10<00:26,  2.34it/s]

tensor(0.1990, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0013, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  74%|███████▎  | 170/231 [01:11<00:25,  2.35it/s]

tensor(0.2150, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.2786e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  74%|███████▍  | 171/231 [01:11<00:25,  2.35it/s]

tensor(0.4035, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  74%|███████▍  | 172/231 [01:12<00:24,  2.37it/s]

tensor(0.1110, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.5864e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  75%|███████▍  | 173/231 [01:12<00:24,  2.37it/s]

tensor(0.2545, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.4836, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  75%|███████▌  | 174/231 [01:12<00:24,  2.35it/s]

tensor(0.1680, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.3666e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  76%|███████▌  | 176/231 [01:13<00:23,  2.34it/s]

tensor(0.1877, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.0631e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  77%|███████▋  | 177/231 [01:14<00:22,  2.35it/s]

tensor(0.2085, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.4701e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  77%|███████▋  | 178/231 [01:14<00:22,  2.35it/s]

tensor(0.1271, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.7804e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  77%|███████▋  | 179/231 [01:14<00:22,  2.35it/s]

tensor(0.2548, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  78%|███████▊  | 180/231 [01:15<00:21,  2.36it/s]

tensor(0.1869, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  78%|███████▊  | 181/231 [01:15<00:21,  2.36it/s]

tensor(0.2864, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.6002e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  79%|███████▉  | 182/231 [01:16<00:20,  2.35it/s]

tensor(0.2488, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.9899e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  79%|███████▉  | 183/231 [01:16<00:20,  2.36it/s]

tensor(0.2222, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.3186, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.4783e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  80%|████████  | 185/231 [01:17<00:19,  2.36it/s]

tensor(0.2391, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.0345e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  81%|████████  | 186/231 [01:17<00:19,  2.36it/s]

tensor(0.2314, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.2215, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  81%|████████▏ | 188/231 [01:18<00:18,  2.35it/s]

tensor(0.2289, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  82%|████████▏ | 189/231 [01:19<00:17,  2.38it/s]

tensor(0.1138, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.9802e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  82%|████████▏ | 190/231 [01:19<00:17,  2.39it/s]

tensor(0.1948, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  83%|████████▎ | 191/231 [01:20<00:16,  2.41it/s]

tensor(0.1506, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.6295e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  83%|████████▎ | 192/231 [01:20<00:16,  2.42it/s]

tensor(0.2927, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.8224e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  84%|████████▎ | 193/231 [01:20<00:15,  2.42it/s]

tensor(0.2210, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0010, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  84%|████████▍ | 194/231 [01:21<00:15,  2.42it/s]

tensor(0.0998, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  84%|████████▍ | 195/231 [01:21<00:14,  2.43it/s]

tensor(0.1207, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  85%|████████▍ | 196/231 [01:22<00:14,  2.43it/s]

tensor(0.1257, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.5042e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  85%|████████▌ | 197/231 [01:22<00:13,  2.43it/s]

tensor(0.2015, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.9961e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  86%|████████▌ | 198/231 [01:22<00:13,  2.43it/s]

tensor(0.0719, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  86%|████████▌ | 199/231 [01:23<00:13,  2.42it/s]

tensor(0.1542, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  87%|████████▋ | 200/231 [01:23<00:12,  2.42it/s]

tensor(0.0764, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0049, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  87%|████████▋ | 201/231 [01:24<00:12,  2.42it/s]

tensor(0.1666, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0021, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  87%|████████▋ | 202/231 [01:24<00:11,  2.42it/s]

tensor(0.3415, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  88%|████████▊ | 203/231 [01:24<00:11,  2.43it/s]

tensor(0.1165, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0019, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  88%|████████▊ | 204/231 [01:25<00:11,  2.43it/s]

tensor(0.1251, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  89%|████████▊ | 205/231 [01:25<00:10,  2.43it/s]

tensor(0.1415, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.2518, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0033, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  90%|████████▉ | 207/231 [01:26<00:09,  2.41it/s]

tensor(0.2223, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.9175e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0998, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  90%|█████████ | 209/231 [01:27<00:09,  2.42it/s]

tensor(0.1817, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.1597, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.1563e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  91%|█████████ | 210/231 [01:27<00:08,  2.42it/s]

tensor(0.2924, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.8372e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  92%|█████████▏| 212/231 [01:28<00:07,  2.43it/s]

tensor(0.2314, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  92%|█████████▏| 213/231 [01:29<00:07,  2.42it/s]

tensor(0.2704, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  93%|█████████▎| 214/231 [01:29<00:07,  2.41it/s]

tensor(0.2572, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  93%|█████████▎| 215/231 [01:29<00:06,  2.41it/s]

tensor(0.1550, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  94%|█████████▎| 216/231 [01:30<00:06,  2.42it/s]

tensor(0.1584, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.4807e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  94%|█████████▍| 217/231 [01:30<00:05,  2.42it/s]

tensor(0.2436, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  94%|█████████▍| 218/231 [01:31<00:05,  2.43it/s]

tensor(0.2030, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.6801e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  95%|█████████▍| 219/231 [01:31<00:04,  2.43it/s]

tensor(0.1545, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0023, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  95%|█████████▌| 220/231 [01:32<00:04,  2.43it/s]

tensor(0.1734, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.2361e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  96%|█████████▌| 221/231 [01:32<00:04,  2.43it/s]

tensor(0.2680, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.2563, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  97%|█████████▋| 223/231 [01:33<00:03,  2.42it/s]

tensor(0.2891, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.4132e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  97%|█████████▋| 224/231 [01:33<00:02,  2.42it/s]

tensor(0.2366, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.2482e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  97%|█████████▋| 225/231 [01:34<00:02,  2.42it/s]

tensor(0.2085, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  98%|█████████▊| 226/231 [01:34<00:02,  2.42it/s]

tensor(0.4093, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.2192, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  99%|█████████▊| 228/231 [01:35<00:01,  2.40it/s]

tensor(0.0891, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.2252, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001:  99%|█████████▉| 229/231 [01:35<00:00,  2.40it/s]

tensor(0.1417, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.5470e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001: 100%|██████████| 231/231 [01:36<00:00,  2.41it/s]

tensor(0.2269, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 001: 100%|██████████| 231/231 [01:36<00:00,  2.39it/s]


Epoch 2/235, Average Training Loss: 0.1895


100%|██████████| 235/235 [00:21<00:00, 10.98it/s]


Accuracy on validation set: 0.9623
Warmup...


Epoch 002: 100%|██████████| 3/3 [00:00<00:00, 10.26it/s]
Epoch 002:   0%|          | 1/231 [00:00<01:34,  2.42it/s]

tensor(0.1073, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:   1%|          | 2/231 [00:00<01:34,  2.41it/s]

tensor(0.1845, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:   1%|▏         | 3/231 [00:01<01:34,  2.42it/s]

tensor(0.2312, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:   2%|▏         | 4/231 [00:01<01:33,  2.42it/s]

tensor(0.2475, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:   2%|▏         | 5/231 [00:02<01:33,  2.43it/s]

tensor(0.1452, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:   3%|▎         | 6/231 [00:02<01:33,  2.39it/s]

tensor(0.0989, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:   3%|▎         | 7/231 [00:03<01:42,  2.18it/s]

tensor(0.1006, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0014, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:   3%|▎         | 8/231 [00:03<01:38,  2.26it/s]

tensor(0.0863, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.2448e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:   4%|▍         | 9/231 [00:03<01:35,  2.32it/s]

tensor(0.2196, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:   4%|▍         | 10/231 [00:04<01:33,  2.35it/s]

tensor(0.2851, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:   5%|▍         | 11/231 [00:04<01:32,  2.38it/s]

tensor(0.2925, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:   5%|▌         | 12/231 [00:05<01:31,  2.39it/s]

tensor(0.2339, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0016, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:   6%|▌         | 13/231 [00:05<01:31,  2.38it/s]

tensor(0.1940, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0008, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:   6%|▌         | 14/231 [00:05<01:30,  2.39it/s]

tensor(0.0532, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0017, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.1547, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0017, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:   7%|▋         | 16/231 [00:06<01:29,  2.41it/s]

tensor(0.1554, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0022, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:   7%|▋         | 17/231 [00:07<01:28,  2.42it/s]

tensor(0.2541, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:   8%|▊         | 18/231 [00:07<01:28,  2.40it/s]

tensor(0.1592, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:   8%|▊         | 19/231 [00:08<01:28,  2.39it/s]

tensor(0.1457, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0010, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:   9%|▊         | 20/231 [00:08<01:27,  2.40it/s]

tensor(0.3151, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:   9%|▉         | 21/231 [00:08<01:26,  2.42it/s]

tensor(0.1740, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  10%|▉         | 22/231 [00:09<01:26,  2.43it/s]

tensor(0.2578, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  10%|▉         | 23/231 [00:09<01:25,  2.45it/s]

tensor(0.1866, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.6372e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  10%|█         | 24/231 [00:10<01:25,  2.42it/s]

tensor(0.1096, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  11%|█         | 25/231 [00:10<01:24,  2.43it/s]

tensor(0.1344, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  11%|█▏        | 26/231 [00:10<01:24,  2.44it/s]

tensor(0.1929, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0139, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  12%|█▏        | 27/231 [00:11<01:23,  2.43it/s]

tensor(0.1709, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.4521e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  12%|█▏        | 28/231 [00:11<01:23,  2.44it/s]

tensor(0.1368, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.2966e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  13%|█▎        | 29/231 [00:12<01:23,  2.43it/s]

tensor(0.2303, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0024, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  13%|█▎        | 30/231 [00:12<01:22,  2.43it/s]

tensor(0.1295, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  13%|█▎        | 31/231 [00:12<01:22,  2.43it/s]

tensor(0.0849, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  14%|█▍        | 32/231 [00:13<01:21,  2.43it/s]

tensor(0.1537, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  14%|█▍        | 33/231 [00:13<01:21,  2.44it/s]

tensor(0.1716, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  15%|█▍        | 34/231 [00:14<01:21,  2.43it/s]

tensor(0.1433, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0008, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  15%|█▌        | 35/231 [00:14<01:21,  2.42it/s]

tensor(0.2188, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  16%|█▌        | 36/231 [00:15<01:21,  2.40it/s]

tensor(0.0698, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.2196e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  16%|█▌        | 37/231 [00:15<01:21,  2.39it/s]

tensor(0.1190, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  16%|█▋        | 38/231 [00:15<01:20,  2.40it/s]

tensor(0.2372, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0043, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  17%|█▋        | 39/231 [00:16<01:19,  2.42it/s]

tensor(0.1682, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0164, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  17%|█▋        | 40/231 [00:16<01:18,  2.43it/s]

tensor(0.1579, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  18%|█▊        | 41/231 [00:17<01:17,  2.44it/s]

tensor(0.2522, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  18%|█▊        | 42/231 [00:17<01:17,  2.44it/s]

tensor(0.0864, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  19%|█▊        | 43/231 [00:17<01:17,  2.42it/s]

tensor(0.1214, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  19%|█▉        | 44/231 [00:18<01:17,  2.40it/s]

tensor(0.2198, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  19%|█▉        | 45/231 [00:18<01:17,  2.39it/s]

tensor(0.1793, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0102, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  20%|█▉        | 46/231 [00:19<01:16,  2.41it/s]

tensor(0.0606, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.0492e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  20%|██        | 47/231 [00:19<01:15,  2.43it/s]

tensor(0.0675, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  21%|██        | 48/231 [00:19<01:15,  2.44it/s]

tensor(0.0759, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0008, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  21%|██        | 49/231 [00:20<01:15,  2.41it/s]

tensor(0.0714, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0009, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  22%|██▏       | 50/231 [00:20<01:15,  2.40it/s]

tensor(0.0660, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0020, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  22%|██▏       | 51/231 [00:21<01:15,  2.39it/s]

tensor(0.1154, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.1670e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  23%|██▎       | 52/231 [00:21<01:14,  2.39it/s]

tensor(0.0384, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  23%|██▎       | 53/231 [00:22<01:14,  2.38it/s]

tensor(0.0618, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.5441e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  23%|██▎       | 54/231 [00:22<01:14,  2.38it/s]

tensor(0.1040, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0008, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  24%|██▍       | 55/231 [00:22<01:13,  2.39it/s]

tensor(0.0571, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0017, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  24%|██▍       | 56/231 [00:23<01:12,  2.41it/s]

tensor(0.2978, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  25%|██▍       | 57/231 [00:23<01:11,  2.42it/s]

tensor(0.0475, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0024, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  25%|██▌       | 58/231 [00:24<01:11,  2.43it/s]

tensor(0.1063, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.8801e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  26%|██▌       | 59/231 [00:24<01:10,  2.43it/s]

tensor(0.1711, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  26%|██▌       | 60/231 [00:24<01:10,  2.41it/s]

tensor(0.0837, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.9058e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  26%|██▋       | 61/231 [00:25<01:10,  2.41it/s]

tensor(0.2994, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  27%|██▋       | 62/231 [00:25<01:09,  2.42it/s]

tensor(0.0478, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  27%|██▋       | 63/231 [00:26<01:09,  2.43it/s]

tensor(0.1409, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0010, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  28%|██▊       | 64/231 [00:26<01:08,  2.43it/s]

tensor(0.3777, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  28%|██▊       | 65/231 [00:27<01:07,  2.44it/s]

tensor(0.0317, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0013, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  29%|██▊       | 66/231 [00:27<01:07,  2.44it/s]

tensor(0.0903, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0038, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  29%|██▉       | 67/231 [00:27<01:06,  2.45it/s]

tensor(0.2942, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0016, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  29%|██▉       | 68/231 [00:28<01:07,  2.43it/s]

tensor(0.2224, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  30%|██▉       | 69/231 [00:28<01:06,  2.44it/s]

tensor(0.2060, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0016, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  30%|███       | 70/231 [00:29<01:05,  2.44it/s]

tensor(0.1122, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0029, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  31%|███       | 71/231 [00:29<01:05,  2.44it/s]

tensor(0.1182, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0011, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  31%|███       | 72/231 [00:29<01:05,  2.43it/s]

tensor(0.0432, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0092, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  32%|███▏      | 73/231 [00:30<01:04,  2.44it/s]

tensor(0.1492, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0015, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  32%|███▏      | 74/231 [00:30<01:04,  2.43it/s]

tensor(0.1843, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.5121e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  32%|███▏      | 75/231 [00:31<01:04,  2.44it/s]

tensor(0.0633, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0008, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  33%|███▎      | 76/231 [00:31<01:03,  2.43it/s]

tensor(0.1658, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  33%|███▎      | 77/231 [00:31<01:03,  2.41it/s]

tensor(0.1060, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  34%|███▍      | 78/231 [00:32<01:03,  2.42it/s]

tensor(0.0949, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0011, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  34%|███▍      | 79/231 [00:32<01:03,  2.41it/s]

tensor(0.0852, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  35%|███▍      | 80/231 [00:33<01:02,  2.42it/s]

tensor(0.1202, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0008, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  35%|███▌      | 81/231 [00:33<01:01,  2.42it/s]

tensor(0.1011, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  35%|███▌      | 82/231 [00:34<01:01,  2.43it/s]

tensor(0.1439, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  36%|███▌      | 83/231 [00:34<01:00,  2.44it/s]

tensor(0.2819, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  36%|███▋      | 84/231 [00:34<01:00,  2.45it/s]

tensor(0.1521, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  37%|███▋      | 85/231 [00:35<00:59,  2.44it/s]

tensor(0.1336, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0022, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  37%|███▋      | 86/231 [00:35<00:59,  2.45it/s]

tensor(0.2853, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.1104, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0008, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  38%|███▊      | 88/231 [00:36<00:58,  2.43it/s]

tensor(0.2501, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  39%|███▊      | 89/231 [00:36<00:58,  2.43it/s]

tensor(0.1181, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  39%|███▉      | 90/231 [00:37<00:57,  2.44it/s]

tensor(0.1368, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0016, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  39%|███▉      | 91/231 [00:37<00:57,  2.43it/s]

tensor(0.0310, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.7542e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  40%|███▉      | 92/231 [00:38<00:57,  2.43it/s]

tensor(0.0760, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0012, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  40%|████      | 93/231 [00:38<00:56,  2.43it/s]

tensor(0.0516, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.3354e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  41%|████      | 94/231 [00:38<00:56,  2.44it/s]

tensor(0.0738, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.8325e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  41%|████      | 95/231 [00:39<00:55,  2.44it/s]

tensor(0.0620, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.0011e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  42%|████▏     | 96/231 [00:39<00:55,  2.44it/s]

tensor(0.1026, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  42%|████▏     | 97/231 [00:40<00:54,  2.45it/s]

tensor(0.0429, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  42%|████▏     | 98/231 [00:40<00:54,  2.42it/s]

tensor(0.1071, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  43%|████▎     | 99/231 [00:40<00:54,  2.43it/s]

tensor(0.0994, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  43%|████▎     | 100/231 [00:41<00:53,  2.43it/s]

tensor(0.0700, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  44%|████▎     | 101/231 [00:41<00:53,  2.44it/s]

tensor(0.0572, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0023, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  44%|████▍     | 102/231 [00:42<00:52,  2.44it/s]

tensor(0.0751, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0246, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  45%|████▍     | 103/231 [00:42<00:52,  2.45it/s]

tensor(0.0481, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0011, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  45%|████▌     | 104/231 [00:43<00:52,  2.44it/s]

tensor(0.1635, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  45%|████▌     | 105/231 [00:43<00:52,  2.42it/s]

tensor(0.0913, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  46%|████▌     | 106/231 [00:43<00:51,  2.41it/s]

tensor(0.1546, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0054, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  46%|████▋     | 107/231 [00:44<00:51,  2.42it/s]

tensor(0.1017, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  47%|████▋     | 108/231 [00:44<00:51,  2.40it/s]

tensor(0.1590, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0056, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0690, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.7789e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  48%|████▊     | 110/231 [00:45<00:50,  2.37it/s]

tensor(0.0256, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0012, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  48%|████▊     | 111/231 [00:45<00:50,  2.39it/s]

tensor(0.0707, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  48%|████▊     | 112/231 [00:46<00:49,  2.39it/s]

tensor(0.0559, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0034, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  49%|████▉     | 113/231 [00:46<00:49,  2.41it/s]

tensor(0.1335, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  49%|████▉     | 114/231 [00:47<00:48,  2.42it/s]

tensor(0.0886, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0110, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  50%|████▉     | 115/231 [00:47<00:47,  2.43it/s]

tensor(0.0820, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0036, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  50%|█████     | 116/231 [00:48<00:47,  2.43it/s]

tensor(0.0267, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0009, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  51%|█████     | 117/231 [00:48<00:46,  2.44it/s]

tensor(0.0783, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.2649e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  51%|█████     | 118/231 [00:48<00:46,  2.44it/s]

tensor(0.1474, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0009, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  52%|█████▏    | 119/231 [00:49<00:46,  2.42it/s]

tensor(0.1837, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0039, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  52%|█████▏    | 120/231 [00:49<00:45,  2.43it/s]

tensor(0.1094, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.1437e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  52%|█████▏    | 121/231 [00:50<00:45,  2.43it/s]

tensor(0.1378, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  53%|█████▎    | 122/231 [00:50<00:44,  2.43it/s]

tensor(0.1326, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  53%|█████▎    | 123/231 [00:50<00:44,  2.42it/s]

tensor(0.1241, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0008, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  54%|█████▎    | 124/231 [00:51<00:44,  2.41it/s]

tensor(0.1612, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0010, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  54%|█████▍    | 125/231 [00:51<00:43,  2.42it/s]

tensor(0.1379, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  55%|█████▍    | 126/231 [00:52<00:43,  2.42it/s]

tensor(0.1531, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  55%|█████▍    | 127/231 [00:52<00:42,  2.43it/s]

tensor(0.0709, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.2503e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  55%|█████▌    | 128/231 [00:52<00:42,  2.43it/s]

tensor(0.0875, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0037, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  56%|█████▌    | 129/231 [00:53<00:41,  2.43it/s]

tensor(0.1072, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.4752e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  56%|█████▋    | 130/231 [00:53<00:41,  2.44it/s]

tensor(0.1058, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.0528e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  57%|█████▋    | 131/231 [00:54<00:41,  2.43it/s]

tensor(0.1790, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  57%|█████▋    | 132/231 [00:54<00:40,  2.44it/s]

tensor(0.0619, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.7486e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  58%|█████▊    | 133/231 [00:55<00:40,  2.44it/s]

tensor(0.0673, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  58%|█████▊    | 134/231 [00:55<00:39,  2.45it/s]

tensor(0.0998, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  58%|█████▊    | 135/231 [00:55<00:39,  2.45it/s]

tensor(0.0826, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.0579e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  59%|█████▉    | 136/231 [00:56<00:38,  2.45it/s]

tensor(0.1235, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  59%|█████▉    | 137/231 [00:56<00:38,  2.44it/s]

tensor(0.0849, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  60%|█████▉    | 138/231 [00:57<00:38,  2.44it/s]

tensor(0.2444, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0012, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  60%|██████    | 139/231 [00:57<00:37,  2.44it/s]

tensor(0.0411, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0011, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  61%|██████    | 140/231 [00:57<00:37,  2.44it/s]

tensor(0.0851, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  61%|██████    | 141/231 [00:58<00:36,  2.44it/s]

tensor(0.1719, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.5756e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  61%|██████▏   | 142/231 [00:58<00:36,  2.45it/s]

tensor(0.2846, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.9624e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  62%|██████▏   | 143/231 [00:59<00:36,  2.43it/s]

tensor(0.0589, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  62%|██████▏   | 144/231 [00:59<00:35,  2.43it/s]

tensor(0.0794, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0036, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0995, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0015, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  63%|██████▎   | 146/231 [01:00<00:35,  2.42it/s]

tensor(0.0970, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0014, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  64%|██████▎   | 147/231 [01:00<00:34,  2.40it/s]

tensor(0.0771, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0074, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  64%|██████▍   | 148/231 [01:01<00:34,  2.39it/s]

tensor(0.0353, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0037, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  65%|██████▍   | 149/231 [01:01<00:34,  2.41it/s]

tensor(0.0780, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  65%|██████▍   | 150/231 [01:02<00:33,  2.41it/s]

tensor(0.0537, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0012, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  65%|██████▌   | 151/231 [01:02<00:33,  2.42it/s]

tensor(0.0996, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  66%|██████▌   | 152/231 [01:02<00:32,  2.40it/s]

tensor(0.0622, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  66%|██████▌   | 153/231 [01:03<00:32,  2.39it/s]

tensor(0.1214, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0057, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  67%|██████▋   | 154/231 [01:03<00:32,  2.38it/s]

tensor(0.1608, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  67%|██████▋   | 155/231 [01:04<00:34,  2.18it/s]

tensor(0.1012, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.8765e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  68%|██████▊   | 156/231 [01:04<00:33,  2.25it/s]

tensor(0.0725, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0013, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  68%|██████▊   | 157/231 [01:05<00:32,  2.30it/s]

tensor(0.0558, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0081, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  68%|██████▊   | 158/231 [01:05<00:31,  2.34it/s]

tensor(0.1240, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.6915e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  69%|██████▉   | 159/231 [01:05<00:30,  2.37it/s]

tensor(0.1472, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0009, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  69%|██████▉   | 160/231 [01:06<00:29,  2.39it/s]

tensor(0.1168, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  70%|██████▉   | 161/231 [01:06<00:29,  2.41it/s]

tensor(0.0696, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0058, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  70%|███████   | 162/231 [01:07<00:28,  2.42it/s]

tensor(0.1573, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  71%|███████   | 163/231 [01:07<00:28,  2.39it/s]

tensor(0.0569, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0019, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  71%|███████   | 164/231 [01:07<00:27,  2.41it/s]

tensor(0.1043, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.0024e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  71%|███████▏  | 165/231 [01:08<00:27,  2.40it/s]

tensor(0.1463, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.0530e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  72%|███████▏  | 166/231 [01:08<00:27,  2.38it/s]

tensor(0.0210, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0040, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  72%|███████▏  | 167/231 [01:09<00:26,  2.37it/s]

tensor(0.1374, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.4714e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  73%|███████▎  | 168/231 [01:09<00:26,  2.38it/s]

tensor(0.0824, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  73%|███████▎  | 169/231 [01:10<00:26,  2.37it/s]

tensor(0.1064, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0064, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  74%|███████▎  | 170/231 [01:10<00:25,  2.37it/s]

tensor(0.0579, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.5224e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  74%|███████▍  | 171/231 [01:10<00:25,  2.39it/s]

tensor(0.0612, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  74%|███████▍  | 172/231 [01:11<00:24,  2.38it/s]

tensor(0.0701, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  75%|███████▍  | 173/231 [01:11<00:24,  2.39it/s]

tensor(0.0356, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  75%|███████▌  | 174/231 [01:12<00:23,  2.41it/s]

tensor(0.1187, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.3794e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  76%|███████▌  | 175/231 [01:12<00:23,  2.42it/s]

tensor(0.0278, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.7420e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  76%|███████▌  | 176/231 [01:12<00:22,  2.43it/s]

tensor(0.0470, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  77%|███████▋  | 177/231 [01:13<00:22,  2.44it/s]

tensor(0.0348, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  77%|███████▋  | 178/231 [01:13<00:21,  2.44it/s]

tensor(0.1016, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.5494e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  77%|███████▋  | 179/231 [01:14<00:21,  2.44it/s]

tensor(0.0529, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  78%|███████▊  | 180/231 [01:14<00:20,  2.44it/s]

tensor(0.0426, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  78%|███████▊  | 181/231 [01:15<00:20,  2.41it/s]

tensor(0.0853, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  79%|███████▉  | 182/231 [01:15<00:20,  2.42it/s]

tensor(0.0504, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  79%|███████▉  | 183/231 [01:15<00:19,  2.43it/s]

tensor(0.0681, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  80%|███████▉  | 184/231 [01:16<00:19,  2.43it/s]

tensor(0.2063, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0028, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  80%|████████  | 185/231 [01:16<00:18,  2.42it/s]

tensor(0.0859, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  81%|████████  | 186/231 [01:17<00:18,  2.42it/s]

tensor(0.0650, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.0985e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  81%|████████  | 187/231 [01:17<00:18,  2.42it/s]

tensor(0.1056, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  81%|████████▏ | 188/231 [01:17<00:17,  2.42it/s]

tensor(0.0754, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  82%|████████▏ | 189/231 [01:18<00:17,  2.41it/s]

tensor(0.0239, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0662, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  83%|████████▎ | 191/231 [01:19<00:16,  2.41it/s]

tensor(0.1475, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  83%|████████▎ | 192/231 [01:19<00:16,  2.41it/s]

tensor(0.1649, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.7485e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  84%|████████▎ | 193/231 [01:19<00:15,  2.41it/s]

tensor(0.0942, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0011, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  84%|████████▍ | 194/231 [01:20<00:15,  2.41it/s]

tensor(0.0534, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  84%|████████▍ | 195/231 [01:20<00:14,  2.41it/s]

tensor(0.1612, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0081, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  85%|████████▍ | 196/231 [01:21<00:14,  2.40it/s]

tensor(0.1868, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.0874e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0639, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0010, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  86%|████████▌ | 198/231 [01:22<00:13,  2.40it/s]

tensor(0.1397, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.3972e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  86%|████████▌ | 199/231 [01:22<00:13,  2.40it/s]

tensor(0.1941, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0423, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  87%|████████▋ | 201/231 [01:23<00:12,  2.39it/s]

tensor(0.0735, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  87%|████████▋ | 202/231 [01:23<00:12,  2.40it/s]

tensor(0.2123, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  88%|████████▊ | 203/231 [01:24<00:11,  2.41it/s]

tensor(0.2234, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.1275, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  89%|████████▊ | 205/231 [01:25<00:10,  2.39it/s]

tensor(0.0304, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0156, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  89%|████████▉ | 206/231 [01:25<00:10,  2.40it/s]

tensor(0.0770, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.0061e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  90%|████████▉ | 207/231 [01:25<00:10,  2.39it/s]

tensor(0.2261, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0020, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  90%|█████████ | 208/231 [01:26<00:09,  2.38it/s]

tensor(0.1611, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0069, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0733, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.0389e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  91%|█████████ | 210/231 [01:27<00:08,  2.40it/s]

tensor(0.1642, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.3832e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  91%|█████████▏| 211/231 [01:27<00:08,  2.40it/s]

tensor(0.1089, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0272, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  92%|█████████▏| 213/231 [01:28<00:07,  2.41it/s]

tensor(0.0650, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.8842e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.1501, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.3313e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  93%|█████████▎| 215/231 [01:29<00:06,  2.41it/s]

tensor(0.0519, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  94%|█████████▎| 216/231 [01:29<00:06,  2.39it/s]

tensor(0.0329, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.3283e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  94%|█████████▍| 217/231 [01:30<00:05,  2.39it/s]

tensor(0.1257, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.3183e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  94%|█████████▍| 218/231 [01:30<00:05,  2.38it/s]

tensor(0.0573, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.3235e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  95%|█████████▍| 219/231 [01:30<00:05,  2.37it/s]

tensor(0.0532, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  95%|█████████▌| 220/231 [01:31<00:04,  2.36it/s]

tensor(0.0383, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  96%|█████████▌| 221/231 [01:31<00:04,  2.38it/s]

tensor(0.0582, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  96%|█████████▌| 222/231 [01:32<00:03,  2.40it/s]

tensor(0.0733, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  97%|█████████▋| 223/231 [01:32<00:03,  2.38it/s]

tensor(0.0870, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0008, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  97%|█████████▋| 224/231 [01:32<00:02,  2.40it/s]

tensor(0.0353, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  97%|█████████▋| 225/231 [01:33<00:02,  2.41it/s]

tensor(0.1200, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0021, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  98%|█████████▊| 226/231 [01:33<00:02,  2.41it/s]

tensor(0.1759, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0010, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  98%|█████████▊| 227/231 [01:34<00:01,  2.41it/s]

tensor(0.0647, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0015, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  99%|█████████▊| 228/231 [01:34<00:01,  2.42it/s]

tensor(0.1552, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002:  99%|█████████▉| 229/231 [01:35<00:00,  2.42it/s]

tensor(0.1050, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002: 100%|█████████▉| 230/231 [01:35<00:00,  2.40it/s]

tensor(0.2460, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0291, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0795, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.9822e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 002: 100%|██████████| 231/231 [01:35<00:00,  2.41it/s]


Epoch 3/235, Average Training Loss: 0.1098


100%|██████████| 235/235 [00:21<00:00, 10.95it/s]


Accuracy on validation set: 0.9878
Warmup...


Epoch 003: 100%|██████████| 3/3 [00:00<00:00, 10.59it/s]
Epoch 003:   0%|          | 0/231 [00:00<?, ?it/s]

tensor(0.0906, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0036, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:   1%|          | 2/231 [00:00<01:40,  2.28it/s]

tensor(0.0912, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0145, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:   1%|▏         | 3/231 [00:01<01:38,  2.31it/s]

tensor(0.0637, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.9294e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:   2%|▏         | 4/231 [00:01<01:37,  2.34it/s]

tensor(0.0223, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:   2%|▏         | 5/231 [00:02<01:36,  2.34it/s]

tensor(0.0636, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:   3%|▎         | 6/231 [00:02<01:35,  2.35it/s]

tensor(0.0551, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.2389, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:   3%|▎         | 7/231 [00:03<01:35,  2.35it/s]

tensor(0.0684, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:   3%|▎         | 8/231 [00:03<01:34,  2.37it/s]

tensor(0.0318, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.8645e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:   4%|▍         | 10/231 [00:04<01:33,  2.37it/s]

tensor(0.0751, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.3899e-06, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.1543, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.5372e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:   5%|▌         | 12/231 [00:05<01:32,  2.36it/s]

tensor(0.0523, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.2877e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:   6%|▌         | 13/231 [00:05<01:32,  2.36it/s]

tensor(0.1154, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.2064, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.2480e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:   6%|▌         | 14/231 [00:05<01:32,  2.35it/s]

tensor(0.1282, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.7265e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:   6%|▋         | 15/231 [00:06<01:31,  2.35it/s]

tensor(0.0555, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:   7%|▋         | 17/231 [00:07<01:31,  2.35it/s]

tensor(0.0425, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.7504e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:   8%|▊         | 18/231 [00:07<01:30,  2.35it/s]

tensor(0.0793, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.4411e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.1061, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.5168e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:   9%|▊         | 20/231 [00:08<01:28,  2.39it/s]

tensor(0.0520, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0036, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:   9%|▉         | 21/231 [00:08<01:27,  2.39it/s]

tensor(0.0717, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0432, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  10%|▉         | 23/231 [00:09<01:27,  2.37it/s]

tensor(0.1021, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.7196e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0835, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.5559e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  10%|█         | 24/231 [00:10<01:27,  2.36it/s]

tensor(0.0449, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  11%|█         | 25/231 [00:10<01:27,  2.36it/s]

tensor(0.0072, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  12%|█▏        | 27/231 [00:11<01:26,  2.36it/s]

tensor(0.0264, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.4428e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  12%|█▏        | 28/231 [00:11<01:26,  2.36it/s]

tensor(0.0125, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  13%|█▎        | 29/231 [00:12<01:25,  2.36it/s]

tensor(0.0785, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0024, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  13%|█▎        | 30/231 [00:12<01:25,  2.36it/s]

tensor(0.0068, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.1387e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0313, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  14%|█▍        | 32/231 [00:13<01:24,  2.36it/s]

tensor(0.2250, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0017, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  14%|█▍        | 33/231 [00:14<01:24,  2.36it/s]

tensor(0.0325, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.8608e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  15%|█▍        | 34/231 [00:14<01:23,  2.36it/s]

tensor(0.0810, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  15%|█▌        | 35/231 [00:14<01:22,  2.37it/s]

tensor(0.0538, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  16%|█▌        | 36/231 [00:15<01:21,  2.39it/s]

tensor(0.0727, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  16%|█▌        | 37/231 [00:15<01:21,  2.37it/s]

tensor(0.0795, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.4883e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  16%|█▋        | 38/231 [00:16<01:21,  2.37it/s]

tensor(0.0168, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.9003e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  17%|█▋        | 39/231 [00:16<01:20,  2.38it/s]

tensor(0.0612, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  17%|█▋        | 40/231 [00:16<01:20,  2.36it/s]

tensor(0.0408, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  18%|█▊        | 41/231 [00:17<01:20,  2.35it/s]

tensor(0.0290, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0510, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  19%|█▊        | 43/231 [00:18<01:22,  2.27it/s]

tensor(0.0796, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.1053e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  19%|█▉        | 44/231 [00:18<01:21,  2.30it/s]

tensor(0.0434, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  19%|█▉        | 45/231 [00:19<01:19,  2.33it/s]

tensor(0.1010, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.6591e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  20%|█▉        | 46/231 [00:19<01:18,  2.35it/s]

tensor(0.1215, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  20%|██        | 47/231 [00:19<01:17,  2.38it/s]

tensor(0.0533, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.3606e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  21%|██        | 48/231 [00:20<01:16,  2.39it/s]

tensor(0.0486, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  21%|██        | 49/231 [00:20<01:15,  2.41it/s]

tensor(0.0544, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0023, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  22%|██▏       | 50/231 [00:21<01:14,  2.42it/s]

tensor(0.0600, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0869, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.6686e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  23%|██▎       | 52/231 [00:22<01:13,  2.43it/s]

tensor(0.0891, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  23%|██▎       | 53/231 [00:22<01:13,  2.42it/s]

tensor(0.0681, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.2442e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  23%|██▎       | 54/231 [00:22<01:14,  2.39it/s]

tensor(0.1037, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.4515e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  24%|██▍       | 55/231 [00:23<01:13,  2.41it/s]

tensor(0.0979, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.3894e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  24%|██▍       | 56/231 [00:23<01:20,  2.16it/s]

tensor(0.0546, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.3220e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  25%|██▍       | 57/231 [00:24<01:17,  2.24it/s]

tensor(0.0550, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.3448e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  25%|██▌       | 58/231 [00:24<01:15,  2.30it/s]

tensor(0.0462, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.9335e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  26%|██▌       | 59/231 [00:25<01:13,  2.35it/s]

tensor(0.0533, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  26%|██▌       | 60/231 [00:25<01:11,  2.38it/s]

tensor(0.0929, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  26%|██▋       | 61/231 [00:25<01:10,  2.40it/s]

tensor(0.0549, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.8223e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  27%|██▋       | 62/231 [00:26<01:09,  2.41it/s]

tensor(0.0171, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  27%|██▋       | 63/231 [00:26<01:09,  2.42it/s]

tensor(0.1115, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  28%|██▊       | 64/231 [00:27<01:09,  2.41it/s]

tensor(0.0787, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.3100e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  28%|██▊       | 65/231 [00:27<01:08,  2.42it/s]

tensor(0.0517, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  29%|██▊       | 66/231 [00:27<01:08,  2.42it/s]

tensor(0.1262, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.8836e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  29%|██▉       | 67/231 [00:28<01:07,  2.43it/s]

tensor(0.0252, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.0669e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0507, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0041, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  30%|██▉       | 69/231 [00:29<01:06,  2.43it/s]

tensor(0.0844, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0011, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  30%|███       | 70/231 [00:29<01:06,  2.43it/s]

tensor(0.0791, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0015, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  31%|███       | 71/231 [00:30<01:05,  2.43it/s]

tensor(0.0502, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.3114e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  31%|███       | 72/231 [00:30<01:05,  2.43it/s]

tensor(0.0958, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0008, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  32%|███▏      | 73/231 [00:30<01:04,  2.44it/s]

tensor(0.2305, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0011, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  32%|███▏      | 74/231 [00:31<01:04,  2.43it/s]

tensor(0.0851, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  32%|███▏      | 75/231 [00:31<01:04,  2.43it/s]

tensor(0.1610, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0016, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  33%|███▎      | 76/231 [00:32<01:03,  2.43it/s]

tensor(0.0255, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0016, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  33%|███▎      | 77/231 [00:32<01:03,  2.44it/s]

tensor(0.0593, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  34%|███▍      | 78/231 [00:32<01:02,  2.44it/s]

tensor(0.0986, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  34%|███▍      | 79/231 [00:33<01:02,  2.44it/s]

tensor(0.0647, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  35%|███▍      | 80/231 [00:33<01:01,  2.44it/s]

tensor(0.0681, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0320, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.8638e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  35%|███▌      | 82/231 [00:34<01:01,  2.44it/s]

tensor(0.1471, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  36%|███▌      | 83/231 [00:34<01:00,  2.44it/s]

tensor(0.0460, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0015, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.1146, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0034, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  37%|███▋      | 85/231 [00:35<01:00,  2.42it/s]

tensor(0.0285, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  37%|███▋      | 86/231 [00:36<00:59,  2.42it/s]

tensor(0.0218, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.9436e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  38%|███▊      | 87/231 [00:36<00:59,  2.42it/s]

tensor(0.0260, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  38%|███▊      | 88/231 [00:36<00:58,  2.42it/s]

tensor(0.1424, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.6104e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  39%|███▊      | 89/231 [00:37<00:58,  2.42it/s]

tensor(0.0419, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.9861e-06, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.1435, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  39%|███▉      | 90/231 [00:37<00:58,  2.40it/s]

tensor(0.2114, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.4177e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  39%|███▉      | 91/231 [00:38<00:58,  2.39it/s]

tensor(0.2376, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.0585e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  40%|███▉      | 92/231 [00:38<00:58,  2.38it/s]

tensor(0.0856, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.3356e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  41%|████      | 94/231 [00:39<00:57,  2.40it/s]

tensor(0.0885, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  41%|████      | 95/231 [00:39<00:56,  2.40it/s]

tensor(0.0828, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  42%|████▏     | 96/231 [00:40<00:56,  2.40it/s]

tensor(0.0520, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.2614e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  42%|████▏     | 97/231 [00:40<00:55,  2.42it/s]

tensor(0.0967, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.6188e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0385, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  43%|████▎     | 99/231 [00:41<00:54,  2.43it/s]

tensor(0.0854, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.4296e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  43%|████▎     | 100/231 [00:41<00:54,  2.42it/s]

tensor(0.0456, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  44%|████▎     | 101/231 [00:42<00:53,  2.42it/s]

tensor(0.0576, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.7652e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0344, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  45%|████▍     | 103/231 [00:43<00:52,  2.42it/s]

tensor(0.0135, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.7589e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  45%|████▌     | 104/231 [00:43<00:52,  2.41it/s]

tensor(0.0800, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0650, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.5442e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  45%|████▌     | 105/231 [00:44<00:52,  2.41it/s]

tensor(0.0739, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.1914e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  46%|████▋     | 107/231 [00:44<00:51,  2.42it/s]

tensor(0.0490, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  47%|████▋     | 108/231 [00:45<00:50,  2.42it/s]

tensor(0.0272, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  47%|████▋     | 109/231 [00:45<00:50,  2.40it/s]

tensor(0.0904, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0696, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.1478e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  48%|████▊     | 110/231 [00:46<00:50,  2.38it/s]

tensor(0.0368, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  48%|████▊     | 111/231 [00:46<00:50,  2.38it/s]

tensor(0.0688, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  48%|████▊     | 112/231 [00:46<00:50,  2.38it/s]

tensor(0.0509, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  49%|████▉     | 113/231 [00:47<00:49,  2.38it/s]

tensor(0.0619, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  49%|████▉     | 114/231 [00:47<00:49,  2.38it/s]

tensor(0.0257, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  50%|████▉     | 115/231 [00:48<00:48,  2.39it/s]

tensor(0.0324, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  50%|█████     | 116/231 [00:48<00:48,  2.39it/s]

tensor(0.0261, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0030, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  51%|█████     | 118/231 [00:49<00:46,  2.41it/s]

tensor(0.0325, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0011, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0202, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0008, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  52%|█████▏    | 119/231 [00:49<00:46,  2.38it/s]

tensor(0.1118, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  52%|█████▏    | 120/231 [00:50<00:47,  2.36it/s]

tensor(0.0735, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.2460e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  52%|█████▏    | 121/231 [00:50<00:47,  2.30it/s]

tensor(0.0428, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  53%|█████▎    | 122/231 [00:51<00:47,  2.30it/s]

tensor(0.0464, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0066, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  54%|█████▎    | 124/231 [00:52<00:45,  2.36it/s]

tensor(0.0317, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  54%|█████▍    | 125/231 [00:52<00:44,  2.37it/s]

tensor(0.0793, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  55%|█████▍    | 126/231 [00:52<00:43,  2.39it/s]

tensor(0.0947, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.1206e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  55%|█████▍    | 127/231 [00:53<00:43,  2.40it/s]

tensor(0.1299, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.8490e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  55%|█████▌    | 128/231 [00:53<00:43,  2.37it/s]

tensor(0.0139, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  56%|█████▌    | 129/231 [00:54<00:42,  2.37it/s]

tensor(0.0368, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0320, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.3756e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  57%|█████▋    | 131/231 [00:54<00:41,  2.40it/s]

tensor(0.0742, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0010, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.1016, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0022, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  58%|█████▊    | 133/231 [00:55<00:40,  2.40it/s]

tensor(0.0559, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0084, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  58%|█████▊    | 134/231 [00:56<00:40,  2.40it/s]

tensor(0.0642, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0008, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  58%|█████▊    | 135/231 [00:56<00:40,  2.39it/s]

tensor(0.0668, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0026, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  59%|█████▉    | 136/231 [00:57<00:39,  2.40it/s]

tensor(0.0988, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0023, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  59%|█████▉    | 137/231 [00:57<00:39,  2.40it/s]

tensor(0.0265, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  60%|█████▉    | 138/231 [00:57<00:38,  2.41it/s]

tensor(0.0278, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0671, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  61%|██████    | 140/231 [00:58<00:37,  2.41it/s]

tensor(0.1471, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.3453e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0544, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  61%|██████    | 141/231 [00:59<00:37,  2.39it/s]

tensor(0.0304, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  62%|██████▏   | 143/231 [01:00<00:36,  2.38it/s]

tensor(0.0733, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0018, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  62%|██████▏   | 144/231 [01:00<00:36,  2.37it/s]

tensor(0.0313, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  63%|██████▎   | 145/231 [01:00<00:36,  2.39it/s]

tensor(0.1321, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0063, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  64%|██████▎   | 147/231 [01:01<00:34,  2.41it/s]

tensor(0.0185, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.6154e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  64%|██████▍   | 148/231 [01:02<00:34,  2.38it/s]

tensor(0.1119, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  65%|██████▍   | 149/231 [01:02<00:34,  2.40it/s]

tensor(0.0777, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  65%|██████▍   | 150/231 [01:02<00:33,  2.39it/s]

tensor(0.0794, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  65%|██████▌   | 151/231 [01:03<00:33,  2.40it/s]

tensor(0.0181, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0018, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  66%|██████▌   | 152/231 [01:03<00:33,  2.39it/s]

tensor(0.1189, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0045, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  66%|██████▌   | 153/231 [01:04<00:32,  2.38it/s]

tensor(0.1735, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0012, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  67%|██████▋   | 154/231 [01:04<00:32,  2.38it/s]

tensor(0.0475, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.6650e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  67%|██████▋   | 155/231 [01:05<00:32,  2.37it/s]

tensor(0.0442, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  68%|██████▊   | 156/231 [01:05<00:31,  2.37it/s]

tensor(0.1765, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  68%|██████▊   | 157/231 [01:05<00:31,  2.37it/s]

tensor(0.0615, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  68%|██████▊   | 158/231 [01:06<00:30,  2.37it/s]

tensor(0.1007, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0011, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  69%|██████▉   | 159/231 [01:06<00:30,  2.38it/s]

tensor(0.1339, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.6862e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  69%|██████▉   | 160/231 [01:07<00:29,  2.37it/s]

tensor(0.0404, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0032, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.2043, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0022, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  70%|██████▉   | 161/231 [01:07<00:29,  2.37it/s]

tensor(0.0630, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  70%|███████   | 162/231 [01:07<00:29,  2.36it/s]

tensor(0.0099, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  71%|███████   | 164/231 [01:08<00:28,  2.38it/s]

tensor(0.0755, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  71%|███████▏  | 165/231 [01:09<00:27,  2.37it/s]

tensor(0.0593, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0015, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  72%|███████▏  | 166/231 [01:09<00:27,  2.37it/s]

tensor(0.0399, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.4886e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  72%|███████▏  | 167/231 [01:10<00:27,  2.37it/s]

tensor(0.0248, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  73%|███████▎  | 168/231 [01:10<00:26,  2.36it/s]

tensor(0.0779, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  73%|███████▎  | 169/231 [01:10<00:25,  2.38it/s]

tensor(0.0395, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.7913e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  74%|███████▎  | 170/231 [01:11<00:25,  2.39it/s]

tensor(0.1258, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.6321e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.1293, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  74%|███████▍  | 171/231 [01:11<00:25,  2.38it/s]

tensor(0.1322, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  74%|███████▍  | 172/231 [01:12<00:24,  2.37it/s]

tensor(0.0243, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  75%|███████▍  | 173/231 [01:12<00:24,  2.37it/s]

tensor(0.0151, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  76%|███████▌  | 175/231 [01:13<00:23,  2.40it/s]

tensor(0.1188, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0063, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  76%|███████▌  | 176/231 [01:14<00:25,  2.14it/s]

tensor(0.1188, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0032, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0503, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0011, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  77%|███████▋  | 178/231 [01:14<00:23,  2.24it/s]

tensor(0.0158, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  77%|███████▋  | 179/231 [01:15<00:22,  2.29it/s]

tensor(0.1162, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  78%|███████▊  | 180/231 [01:15<00:22,  2.30it/s]

tensor(0.1357, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0009, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  78%|███████▊  | 181/231 [01:16<00:21,  2.32it/s]

tensor(0.0802, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  79%|███████▉  | 182/231 [01:16<00:21,  2.33it/s]

tensor(0.0582, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.9488e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  79%|███████▉  | 183/231 [01:16<00:20,  2.36it/s]

tensor(0.0175, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0069, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  80%|███████▉  | 184/231 [01:17<00:19,  2.38it/s]

tensor(0.0511, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0289, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  80%|████████  | 185/231 [01:17<00:19,  2.40it/s]

tensor(0.0340, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.2008e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  81%|████████  | 186/231 [01:18<00:18,  2.41it/s]

tensor(0.0520, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  81%|████████  | 187/231 [01:18<00:18,  2.41it/s]

tensor(0.1756, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  81%|████████▏ | 188/231 [01:19<00:17,  2.41it/s]

tensor(0.1205, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.7388e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  82%|████████▏ | 189/231 [01:19<00:17,  2.42it/s]

tensor(0.0431, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.8617e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  82%|████████▏ | 190/231 [01:19<00:17,  2.40it/s]

tensor(0.0123, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0249, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  83%|████████▎ | 191/231 [01:20<00:16,  2.40it/s]

tensor(0.0744, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  83%|████████▎ | 192/231 [01:20<00:16,  2.40it/s]

tensor(0.2258, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  84%|████████▎ | 193/231 [01:21<00:15,  2.39it/s]

tensor(0.0586, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.6480e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  84%|████████▍ | 194/231 [01:21<00:15,  2.40it/s]

tensor(0.0901, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0922, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0027, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  84%|████████▍ | 195/231 [01:21<00:15,  2.40it/s]

tensor(0.1278, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  85%|████████▍ | 196/231 [01:22<00:14,  2.40it/s]

tensor(0.0450, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.9689e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  85%|████████▌ | 197/231 [01:22<00:14,  2.39it/s]

tensor(0.0355, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  86%|████████▌ | 199/231 [01:23<00:13,  2.40it/s]

tensor(0.0775, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.8443e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  87%|████████▋ | 200/231 [01:24<00:12,  2.39it/s]

tensor(0.1280, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0014, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  87%|████████▋ | 201/231 [01:24<00:12,  2.39it/s]

tensor(0.0458, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  87%|████████▋ | 202/231 [01:24<00:12,  2.40it/s]

tensor(0.2106, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.9193e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  88%|████████▊ | 203/231 [01:25<00:11,  2.36it/s]

tensor(0.0878, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  88%|████████▊ | 204/231 [01:25<00:11,  2.37it/s]

tensor(0.0254, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0011, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0340, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  89%|████████▉ | 206/231 [01:26<00:10,  2.37it/s]

tensor(0.0477, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  90%|████████▉ | 207/231 [01:27<00:10,  2.36it/s]

tensor(0.1054, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0009, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0162, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0012, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  90%|█████████ | 208/231 [01:27<00:09,  2.36it/s]

tensor(0.0184, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  90%|█████████ | 209/231 [01:27<00:09,  2.36it/s]

tensor(0.0220, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  91%|█████████▏| 211/231 [01:28<00:08,  2.38it/s]

tensor(0.0973, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0011, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  92%|█████████▏| 212/231 [01:29<00:07,  2.39it/s]

tensor(0.0574, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0368, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0039, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  92%|█████████▏| 213/231 [01:29<00:07,  2.38it/s]

tensor(0.1169, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0009, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  93%|█████████▎| 214/231 [01:29<00:07,  2.38it/s]

tensor(0.1802, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  94%|█████████▎| 216/231 [01:30<00:06,  2.38it/s]

tensor(0.2363, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0014, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  94%|█████████▍| 217/231 [01:31<00:05,  2.38it/s]

tensor(0.1028, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0031, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0477, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  95%|█████████▍| 219/231 [01:32<00:05,  2.39it/s]

tensor(0.0322, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0020, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  95%|█████████▌| 220/231 [01:32<00:04,  2.38it/s]

tensor(0.0291, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.1710, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  96%|█████████▌| 222/231 [01:33<00:03,  2.38it/s]

tensor(0.0583, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  97%|█████████▋| 223/231 [01:33<00:03,  2.39it/s]

tensor(0.0215, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0031, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0629, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  97%|█████████▋| 225/231 [01:34<00:02,  2.40it/s]

tensor(0.0691, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0020, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0270, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  98%|█████████▊| 227/231 [01:35<00:01,  2.41it/s]

tensor(0.1934, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0008, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.1233, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0019, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003:  99%|█████████▉| 229/231 [01:36<00:00,  2.35it/s]

tensor(0.0639, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0080, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003: 100%|█████████▉| 230/231 [01:36<00:00,  2.37it/s]

tensor(0.0694, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 003: 100%|██████████| 231/231 [01:37<00:00,  2.38it/s]


tensor(0.0105, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0046, device='cuda:0', grad_fn=<DivBackward1>)
Epoch 4/235, Average Training Loss: 0.0636


100%|██████████| 235/235 [00:21<00:00, 10.93it/s]


Accuracy on validation set: 0.9947
Warmup...


Epoch 004: 100%|██████████| 3/3 [00:00<00:00, 10.76it/s]
Epoch 004:   0%|          | 1/231 [00:00<01:42,  2.24it/s]

tensor(0.0774, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0140, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:   1%|          | 2/231 [00:00<01:40,  2.28it/s]

tensor(0.0339, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.7513e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:   2%|▏         | 4/231 [00:01<01:37,  2.33it/s]

tensor(0.0294, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.1446e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:   2%|▏         | 5/231 [00:02<01:36,  2.34it/s]

tensor(0.0355, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:   3%|▎         | 6/231 [00:02<01:36,  2.34it/s]

tensor(0.0522, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.0944e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0291, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:   3%|▎         | 8/231 [00:03<01:35,  2.34it/s]

tensor(0.0203, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.9139e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0386, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0015, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:   4%|▍         | 10/231 [00:04<01:34,  2.35it/s]

tensor(0.0890, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0424, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.6475e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:   5%|▌         | 12/231 [00:05<01:32,  2.36it/s]

tensor(0.0047, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.3653e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:   6%|▌         | 13/231 [00:05<01:32,  2.36it/s]

tensor(0.0033, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0021, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.1081, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:   6%|▌         | 14/231 [00:06<01:35,  2.27it/s]

tensor(0.0078, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.2947e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:   6%|▋         | 15/231 [00:06<01:33,  2.31it/s]

tensor(0.0155, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:   7%|▋         | 16/231 [00:06<01:32,  2.32it/s]

tensor(0.0358, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.2097e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:   8%|▊         | 18/231 [00:07<01:30,  2.36it/s]

tensor(0.1214, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0220, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:   8%|▊         | 19/231 [00:08<01:29,  2.37it/s]

tensor(0.0407, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0012, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:   9%|▊         | 20/231 [00:08<01:28,  2.38it/s]

tensor(0.0337, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:   9%|▉         | 21/231 [00:08<01:27,  2.39it/s]

tensor(0.0398, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  10%|▉         | 22/231 [00:09<01:26,  2.40it/s]

tensor(0.0136, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  10%|▉         | 23/231 [00:09<01:27,  2.39it/s]

tensor(0.0452, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  10%|█         | 24/231 [00:10<01:26,  2.40it/s]

tensor(0.0464, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  11%|█         | 25/231 [00:10<01:25,  2.40it/s]

tensor(0.0142, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  11%|█▏        | 26/231 [00:11<01:24,  2.41it/s]

tensor(0.0305, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  12%|█▏        | 27/231 [00:11<01:25,  2.40it/s]

tensor(0.0274, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.4008e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  13%|█▎        | 29/231 [00:12<01:23,  2.41it/s]

tensor(0.0172, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  13%|█▎        | 30/231 [00:12<01:23,  2.42it/s]

tensor(0.0081, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  13%|█▎        | 31/231 [00:13<01:22,  2.42it/s]

tensor(0.0355, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  14%|█▍        | 32/231 [00:13<01:21,  2.43it/s]

tensor(0.1427, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0012, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  14%|█▍        | 33/231 [00:13<01:22,  2.41it/s]

tensor(0.0965, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0009, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  15%|█▍        | 34/231 [00:14<01:22,  2.40it/s]

tensor(0.0327, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  15%|█▌        | 35/231 [00:14<01:22,  2.39it/s]

tensor(0.0579, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  16%|█▌        | 36/231 [00:15<01:22,  2.38it/s]

tensor(0.0585, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  16%|█▌        | 37/231 [00:15<01:21,  2.39it/s]

tensor(0.0115, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.8166e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  16%|█▋        | 38/231 [00:16<01:21,  2.38it/s]

tensor(0.0758, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0067, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  17%|█▋        | 39/231 [00:16<01:20,  2.39it/s]

tensor(0.0688, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.9868e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  17%|█▋        | 40/231 [00:16<01:19,  2.40it/s]

tensor(0.0485, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  18%|█▊        | 41/231 [00:17<01:18,  2.41it/s]

tensor(0.0351, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.3606e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  18%|█▊        | 42/231 [00:17<01:18,  2.42it/s]

tensor(0.1168, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0012, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  19%|█▊        | 43/231 [00:18<01:17,  2.42it/s]

tensor(0.0570, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0024, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  19%|█▉        | 44/231 [00:18<01:17,  2.40it/s]

tensor(0.0499, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.3856e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  19%|█▉        | 45/231 [00:18<01:17,  2.41it/s]

tensor(0.0082, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0012, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0426, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  20%|██        | 47/231 [00:19<01:16,  2.42it/s]

tensor(0.0238, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0197, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.4729e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  21%|██        | 49/231 [00:20<01:15,  2.40it/s]

tensor(0.0543, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0018, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  22%|██▏       | 50/231 [00:21<01:15,  2.39it/s]

tensor(0.0348, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0235, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  22%|██▏       | 51/231 [00:21<01:15,  2.38it/s]

tensor(0.0165, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  23%|██▎       | 52/231 [00:21<01:15,  2.37it/s]

tensor(0.0265, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  23%|██▎       | 54/231 [00:22<01:14,  2.38it/s]

tensor(0.0322, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  24%|██▍       | 55/231 [00:23<01:13,  2.40it/s]

tensor(0.0587, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0011, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  24%|██▍       | 56/231 [00:23<01:12,  2.41it/s]

tensor(0.0938, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0279, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.9451e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  25%|██▌       | 58/231 [00:24<01:16,  2.25it/s]

tensor(0.0624, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  26%|██▌       | 59/231 [00:24<01:14,  2.31it/s]

tensor(0.0407, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.6070e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  26%|██▌       | 60/231 [00:25<01:13,  2.32it/s]

tensor(0.0532, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  26%|██▋       | 61/231 [00:25<01:12,  2.33it/s]

tensor(0.0143, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0020, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  27%|██▋       | 62/231 [00:26<01:12,  2.34it/s]

tensor(0.0352, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.0909e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  27%|██▋       | 63/231 [00:26<01:11,  2.34it/s]

tensor(0.0166, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  28%|██▊       | 64/231 [00:27<01:11,  2.34it/s]

tensor(0.0097, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  28%|██▊       | 65/231 [00:27<01:10,  2.35it/s]

tensor(0.0306, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0031, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  29%|██▊       | 66/231 [00:27<01:10,  2.35it/s]

tensor(0.0726, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0019, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  29%|██▉       | 67/231 [00:28<01:09,  2.37it/s]

tensor(0.0178, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0026, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  29%|██▉       | 68/231 [00:28<01:08,  2.36it/s]

tensor(0.0209, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.5912e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  30%|██▉       | 69/231 [00:29<01:08,  2.35it/s]

tensor(0.0077, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  30%|███       | 70/231 [00:29<01:08,  2.36it/s]

tensor(0.0184, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  31%|███       | 71/231 [00:30<01:08,  2.35it/s]

tensor(0.0268, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.0176e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0317, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.9549e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  31%|███       | 72/231 [00:30<01:07,  2.36it/s]

tensor(0.0343, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  32%|███▏      | 74/231 [00:31<01:05,  2.39it/s]

tensor(0.0419, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  32%|███▏      | 75/231 [00:31<01:05,  2.40it/s]

tensor(0.0513, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  33%|███▎      | 76/231 [00:32<01:04,  2.40it/s]

tensor(0.0334, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0284, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.7138e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  34%|███▍      | 78/231 [00:32<01:03,  2.39it/s]

tensor(0.0235, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  34%|███▍      | 79/231 [00:33<01:03,  2.41it/s]

tensor(0.0164, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.4211e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  35%|███▍      | 80/231 [00:33<01:02,  2.41it/s]

tensor(0.0976, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  35%|███▌      | 81/231 [00:34<01:01,  2.42it/s]

tensor(0.0791, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.9326e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  35%|███▌      | 82/231 [00:34<01:01,  2.42it/s]

tensor(0.0234, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0026, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  36%|███▌      | 83/231 [00:34<01:01,  2.41it/s]

tensor(0.0215, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  36%|███▋      | 84/231 [00:35<01:01,  2.39it/s]

tensor(0.0117, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.9898e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  37%|███▋      | 85/231 [00:35<01:01,  2.38it/s]

tensor(0.0145, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  37%|███▋      | 86/231 [00:36<01:01,  2.37it/s]

tensor(0.0445, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.5149e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  38%|███▊      | 87/231 [00:36<01:00,  2.39it/s]

tensor(0.0205, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  38%|███▊      | 88/231 [00:37<00:59,  2.40it/s]

tensor(0.0342, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  39%|███▊      | 89/231 [00:37<00:58,  2.41it/s]

tensor(0.0278, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.1925e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  39%|███▉      | 90/231 [00:37<00:58,  2.42it/s]

tensor(0.0102, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.2609e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  39%|███▉      | 91/231 [00:38<00:57,  2.43it/s]

tensor(0.0215, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.4963e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  40%|███▉      | 92/231 [00:38<00:57,  2.44it/s]

tensor(0.0083, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  40%|████      | 93/231 [00:39<00:57,  2.41it/s]

tensor(0.0141, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0243, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  41%|████      | 95/231 [00:39<00:57,  2.39it/s]

tensor(0.0429, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0427, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  42%|████▏     | 96/231 [00:40<00:56,  2.37it/s]

tensor(0.0875, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  42%|████▏     | 97/231 [00:40<00:56,  2.36it/s]

tensor(0.0268, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.9610e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  43%|████▎     | 99/231 [00:41<00:56,  2.35it/s]

tensor(0.0131, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  43%|████▎     | 100/231 [00:42<00:55,  2.38it/s]

tensor(0.0337, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  44%|████▎     | 101/231 [00:42<00:54,  2.39it/s]

tensor(0.0125, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.6199e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  44%|████▍     | 102/231 [00:42<00:53,  2.40it/s]

tensor(0.0225, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  45%|████▍     | 103/231 [00:43<00:53,  2.39it/s]

tensor(0.0268, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.4037e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0418, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.4342e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  45%|████▌     | 104/231 [00:43<00:53,  2.38it/s]

tensor(0.0322, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.4356e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  46%|████▌     | 106/231 [00:44<00:53,  2.35it/s]

tensor(0.0826, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  46%|████▋     | 107/231 [00:45<00:52,  2.35it/s]

tensor(0.0247, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0024, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  47%|████▋     | 108/231 [00:45<00:52,  2.36it/s]

tensor(0.0079, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.4934e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  47%|████▋     | 109/231 [00:45<00:51,  2.37it/s]

tensor(0.0372, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.2748e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  48%|████▊     | 110/231 [00:46<00:51,  2.37it/s]

tensor(0.0656, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.4163e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  48%|████▊     | 111/231 [00:46<00:50,  2.37it/s]

tensor(0.1059, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.6714e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  48%|████▊     | 112/231 [00:47<00:50,  2.37it/s]

tensor(0.0755, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.4928e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  49%|████▉     | 113/231 [00:47<00:49,  2.36it/s]

tensor(0.0533, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  49%|████▉     | 114/231 [00:48<00:49,  2.36it/s]

tensor(0.0294, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  50%|████▉     | 115/231 [00:48<00:49,  2.37it/s]

tensor(0.0832, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.1023e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  50%|█████     | 116/231 [00:48<00:48,  2.36it/s]

tensor(0.1590, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.3398e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  51%|█████     | 117/231 [00:49<00:48,  2.36it/s]

tensor(0.0628, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0269, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  51%|█████     | 118/231 [00:49<00:47,  2.38it/s]

tensor(0.0532, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  52%|█████▏    | 119/231 [00:50<00:47,  2.37it/s]

tensor(0.0174, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  52%|█████▏    | 120/231 [00:50<00:46,  2.37it/s]

tensor(0.0165, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.6497e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  52%|█████▏    | 121/231 [00:50<00:46,  2.37it/s]

tensor(0.0254, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  53%|█████▎    | 122/231 [00:51<00:46,  2.37it/s]

tensor(0.0570, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0009, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  53%|█████▎    | 123/231 [00:51<00:45,  2.37it/s]

tensor(0.0979, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  54%|█████▎    | 124/231 [00:52<00:45,  2.36it/s]

tensor(0.0784, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  54%|█████▍    | 125/231 [00:52<00:44,  2.36it/s]

tensor(0.0921, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.3288e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  55%|█████▍    | 126/231 [00:53<00:44,  2.36it/s]

tensor(0.0322, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.4175e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  55%|█████▍    | 127/231 [00:53<00:43,  2.37it/s]

tensor(0.1054, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  55%|█████▌    | 128/231 [00:53<00:43,  2.37it/s]

tensor(0.1315, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.9162e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  56%|█████▌    | 129/231 [00:54<00:42,  2.39it/s]

tensor(0.0609, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  56%|█████▋    | 130/231 [00:54<00:42,  2.40it/s]

tensor(0.0336, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.2259e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  57%|█████▋    | 131/231 [00:55<00:41,  2.40it/s]

tensor(0.0899, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  57%|█████▋    | 132/231 [00:55<00:40,  2.42it/s]

tensor(0.0621, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0020, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  58%|█████▊    | 133/231 [00:55<00:40,  2.42it/s]

tensor(0.0060, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  58%|█████▊    | 134/231 [00:56<00:39,  2.43it/s]

tensor(0.0857, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.2358e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  58%|█████▊    | 135/231 [00:56<00:39,  2.43it/s]

tensor(0.0045, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  59%|█████▉    | 136/231 [00:57<00:39,  2.43it/s]

tensor(0.0151, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  59%|█████▉    | 137/231 [00:57<00:38,  2.43it/s]

tensor(0.0186, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0012, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  60%|█████▉    | 138/231 [00:58<00:38,  2.44it/s]

tensor(0.0290, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  60%|██████    | 139/231 [00:58<00:37,  2.45it/s]

tensor(0.0112, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.7788e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  61%|██████    | 140/231 [00:58<00:37,  2.45it/s]

tensor(0.0271, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  61%|██████    | 141/231 [00:59<00:36,  2.45it/s]

tensor(0.0485, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  61%|██████▏   | 142/231 [00:59<00:36,  2.44it/s]

tensor(0.0431, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  62%|██████▏   | 143/231 [01:00<00:36,  2.44it/s]

tensor(0.0476, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.2717e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0156, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  62%|██████▏   | 144/231 [01:00<00:35,  2.42it/s]

tensor(0.0234, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  63%|██████▎   | 146/231 [01:01<00:35,  2.40it/s]

tensor(0.0598, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0321, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.4214e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  64%|██████▍   | 148/231 [01:02<00:34,  2.41it/s]

tensor(0.0112, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0017, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0190, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  65%|██████▍   | 149/231 [01:02<00:34,  2.41it/s]

tensor(0.0169, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  65%|██████▍   | 150/231 [01:03<00:33,  2.41it/s]

tensor(0.0120, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  66%|██████▌   | 152/231 [01:03<00:32,  2.41it/s]

tensor(0.0121, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0024, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  66%|██████▌   | 153/231 [01:04<00:32,  2.40it/s]

tensor(0.0142, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0196, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  67%|██████▋   | 155/231 [01:05<00:31,  2.41it/s]

tensor(0.0312, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0015, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  68%|██████▊   | 156/231 [01:05<00:30,  2.42it/s]

tensor(0.0156, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  68%|██████▊   | 157/231 [01:05<00:30,  2.43it/s]

tensor(0.0532, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  68%|██████▊   | 158/231 [01:06<00:30,  2.43it/s]

tensor(0.0648, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  69%|██████▉   | 159/231 [01:06<00:29,  2.43it/s]

tensor(0.0341, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  69%|██████▉   | 160/231 [01:07<00:29,  2.44it/s]

tensor(0.0193, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  70%|██████▉   | 161/231 [01:07<00:28,  2.44it/s]

tensor(0.0135, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0019, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  70%|███████   | 162/231 [01:08<00:31,  2.22it/s]

tensor(0.0294, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.8201e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  71%|███████   | 163/231 [01:08<00:29,  2.29it/s]

tensor(0.0409, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  71%|███████   | 164/231 [01:08<00:28,  2.33it/s]

tensor(0.0156, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  71%|███████▏  | 165/231 [01:09<00:27,  2.36it/s]

tensor(0.0936, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.9260e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  72%|███████▏  | 166/231 [01:09<00:27,  2.39it/s]

tensor(0.0169, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.4713e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  72%|███████▏  | 167/231 [01:10<00:26,  2.39it/s]

tensor(0.0144, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.5192e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  73%|███████▎  | 168/231 [01:10<00:26,  2.40it/s]

tensor(0.0278, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  73%|███████▎  | 169/231 [01:10<00:25,  2.41it/s]

tensor(0.0543, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  74%|███████▎  | 170/231 [01:11<00:25,  2.40it/s]

tensor(0.0818, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  74%|███████▍  | 171/231 [01:11<00:24,  2.40it/s]

tensor(0.0304, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.2304e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0450, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.9527e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  74%|███████▍  | 172/231 [01:12<00:24,  2.39it/s]

tensor(0.0128, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.7765e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  75%|███████▌  | 174/231 [01:13<00:23,  2.39it/s]

tensor(0.0549, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  76%|███████▌  | 175/231 [01:13<00:23,  2.37it/s]

tensor(0.0362, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.1463, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  76%|███████▌  | 176/231 [01:13<00:23,  2.33it/s]

tensor(0.0454, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.3911e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  77%|███████▋  | 177/231 [01:14<00:23,  2.33it/s]

tensor(0.0425, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.5975e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  77%|███████▋  | 179/231 [01:15<00:21,  2.38it/s]

tensor(0.0236, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  78%|███████▊  | 180/231 [01:15<00:21,  2.40it/s]

tensor(0.0102, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.2740e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  78%|███████▊  | 181/231 [01:16<00:20,  2.41it/s]

tensor(0.0455, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.4008e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0432, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  79%|███████▉  | 183/231 [01:16<00:19,  2.42it/s]

tensor(0.0095, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  80%|███████▉  | 184/231 [01:17<00:19,  2.42it/s]

tensor(0.0155, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.8838e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  80%|████████  | 185/231 [01:17<00:19,  2.42it/s]

tensor(0.0323, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  81%|████████  | 186/231 [01:18<00:18,  2.42it/s]

tensor(0.0175, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  81%|████████  | 187/231 [01:18<00:18,  2.43it/s]

tensor(0.0192, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.4702e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  81%|████████▏ | 188/231 [01:18<00:17,  2.44it/s]

tensor(0.0420, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  82%|████████▏ | 189/231 [01:19<00:17,  2.43it/s]

tensor(0.0149, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.9010e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  82%|████████▏ | 190/231 [01:19<00:16,  2.43it/s]

tensor(0.0368, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  83%|████████▎ | 191/231 [01:20<00:16,  2.43it/s]

tensor(0.0190, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  83%|████████▎ | 192/231 [01:20<00:16,  2.43it/s]

tensor(0.0340, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.7869e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  84%|████████▎ | 193/231 [01:20<00:15,  2.43it/s]

tensor(0.0417, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.8597e-06, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0114, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  84%|████████▍ | 195/231 [01:21<00:14,  2.42it/s]

tensor(0.0286, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  85%|████████▍ | 196/231 [01:22<00:14,  2.42it/s]

tensor(0.0268, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.6988e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  85%|████████▌ | 197/231 [01:22<00:14,  2.42it/s]

tensor(0.1007, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  86%|████████▌ | 198/231 [01:23<00:13,  2.42it/s]

tensor(0.0821, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0032, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  86%|████████▌ | 199/231 [01:23<00:13,  2.43it/s]

tensor(0.0669, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0008, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  87%|████████▋ | 200/231 [01:23<00:12,  2.43it/s]

tensor(0.0890, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  87%|████████▋ | 201/231 [01:24<00:12,  2.43it/s]

tensor(0.0607, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0013, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  87%|████████▋ | 202/231 [01:24<00:11,  2.43it/s]

tensor(0.0124, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  88%|████████▊ | 203/231 [01:25<00:11,  2.41it/s]

tensor(0.0427, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0039, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  88%|████████▊ | 204/231 [01:25<00:11,  2.40it/s]

tensor(0.0210, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.3857e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  89%|████████▊ | 205/231 [01:25<00:10,  2.38it/s]

tensor(0.0504, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  89%|████████▉ | 206/231 [01:26<00:10,  2.38it/s]

tensor(0.0243, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  90%|████████▉ | 207/231 [01:26<00:10,  2.37it/s]

tensor(0.0544, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  90%|█████████ | 208/231 [01:27<00:09,  2.37it/s]

tensor(0.0394, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.5056e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  90%|█████████ | 209/231 [01:27<00:09,  2.37it/s]

tensor(0.0189, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  91%|█████████ | 210/231 [01:28<00:08,  2.37it/s]

tensor(0.0210, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  91%|█████████▏| 211/231 [01:28<00:08,  2.37it/s]

tensor(0.0772, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0021, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  92%|█████████▏| 212/231 [01:28<00:07,  2.40it/s]

tensor(0.0603, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  92%|█████████▏| 213/231 [01:29<00:07,  2.42it/s]

tensor(0.0462, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.8497e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0240, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  93%|█████████▎| 215/231 [01:30<00:06,  2.39it/s]

tensor(0.0074, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  94%|█████████▎| 216/231 [01:30<00:06,  2.40it/s]

tensor(0.0316, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0533, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  94%|█████████▍| 218/231 [01:31<00:05,  2.42it/s]

tensor(0.0498, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.4546e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0426, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  95%|█████████▌| 220/231 [01:32<00:04,  2.42it/s]

tensor(0.0501, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  96%|█████████▌| 221/231 [01:32<00:04,  2.43it/s]

tensor(0.0371, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  96%|█████████▌| 222/231 [01:33<00:03,  2.43it/s]

tensor(0.0669, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  97%|█████████▋| 223/231 [01:33<00:03,  2.43it/s]

tensor(0.0992, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.5800e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  97%|█████████▋| 224/231 [01:33<00:02,  2.43it/s]

tensor(0.1709, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  97%|█████████▋| 225/231 [01:34<00:02,  2.43it/s]

tensor(0.0114, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0011, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  98%|█████████▊| 226/231 [01:34<00:02,  2.43it/s]

tensor(0.0345, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.9526e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  98%|█████████▊| 227/231 [01:35<00:01,  2.43it/s]

tensor(0.0234, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.4631e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  99%|█████████▊| 228/231 [01:35<00:01,  2.43it/s]

tensor(0.0874, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.0999e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004:  99%|█████████▉| 229/231 [01:35<00:00,  2.44it/s]

tensor(0.0339, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.1657e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004: 100%|█████████▉| 230/231 [01:36<00:00,  2.45it/s]

tensor(0.0456, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 004: 100%|██████████| 231/231 [01:36<00:00,  2.39it/s]

tensor(0.0770, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0014, device='cuda:0', grad_fn=<DivBackward1>)
Epoch 5/235, Average Training Loss: 0.0518



100%|██████████| 235/235 [00:21<00:00, 11.02it/s]


Accuracy on validation set: 0.9966
Warmup...


Epoch 005: 100%|██████████| 3/3 [00:00<00:00, 10.99it/s]
Epoch 005:   0%|          | 0/231 [00:00<?, ?it/s]

tensor(0.0104, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:   0%|          | 1/231 [00:00<01:35,  2.40it/s]

tensor(0.0558, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:   1%|▏         | 3/231 [00:01<01:35,  2.39it/s]

tensor(0.0083, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0009, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:   2%|▏         | 4/231 [00:01<01:34,  2.40it/s]

tensor(0.0288, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.0850e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:   2%|▏         | 5/231 [00:02<01:33,  2.41it/s]

tensor(0.0189, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:   3%|▎         | 6/231 [00:02<01:33,  2.41it/s]

tensor(0.0070, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.8394e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:   3%|▎         | 7/231 [00:02<01:32,  2.41it/s]

tensor(0.0332, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0047, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:   3%|▎         | 8/231 [00:03<01:33,  2.40it/s]

tensor(0.0522, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.9456e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:   4%|▍         | 9/231 [00:03<01:33,  2.38it/s]

tensor(0.0595, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0030, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:   4%|▍         | 10/231 [00:04<01:33,  2.37it/s]

tensor(0.1045, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:   5%|▍         | 11/231 [00:04<01:32,  2.37it/s]

tensor(0.0601, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:   5%|▌         | 12/231 [00:05<01:32,  2.37it/s]

tensor(0.0930, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.1256e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:   6%|▌         | 13/231 [00:05<01:32,  2.36it/s]

tensor(0.0164, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0029, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:   6%|▌         | 14/231 [00:05<01:31,  2.36it/s]

tensor(0.0187, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.2175e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:   6%|▋         | 15/231 [00:06<01:31,  2.37it/s]

tensor(0.0269, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0009, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:   7%|▋         | 16/231 [00:06<01:30,  2.38it/s]

tensor(0.0403, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.9982e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:   7%|▋         | 17/231 [00:07<01:29,  2.40it/s]

tensor(0.0164, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.7877e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:   8%|▊         | 18/231 [00:07<01:28,  2.41it/s]

tensor(0.0057, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:   8%|▊         | 19/231 [00:07<01:27,  2.42it/s]

tensor(0.0451, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.7939e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:   9%|▊         | 20/231 [00:08<01:26,  2.43it/s]

tensor(0.0155, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0025, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:   9%|▉         | 21/231 [00:08<01:26,  2.44it/s]

tensor(0.0109, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0009, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  10%|▉         | 22/231 [00:09<01:25,  2.44it/s]

tensor(0.0064, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  10%|▉         | 23/231 [00:09<01:24,  2.45it/s]

tensor(0.0115, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.1673e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  10%|█         | 24/231 [00:09<01:25,  2.43it/s]

tensor(0.0050, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.2283e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  11%|█         | 25/231 [00:10<01:24,  2.44it/s]

tensor(0.0715, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.5742e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  11%|█▏        | 26/231 [00:10<01:24,  2.44it/s]

tensor(0.0303, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.0440e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  12%|█▏        | 27/231 [00:11<01:23,  2.44it/s]

tensor(0.0154, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  12%|█▏        | 28/231 [00:11<01:23,  2.43it/s]

tensor(0.0839, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.3572e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  13%|█▎        | 29/231 [00:12<01:22,  2.44it/s]

tensor(0.0330, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.4776e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  13%|█▎        | 30/231 [00:12<01:22,  2.43it/s]

tensor(0.0285, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.7737e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  13%|█▎        | 31/231 [00:12<01:21,  2.44it/s]

tensor(0.0095, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.2403e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  14%|█▍        | 32/231 [00:13<01:21,  2.45it/s]

tensor(0.0218, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.7311e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  14%|█▍        | 33/231 [00:13<01:20,  2.45it/s]

tensor(0.0121, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  15%|█▍        | 34/231 [00:14<01:20,  2.45it/s]

tensor(0.0135, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.2915e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  15%|█▌        | 35/231 [00:14<01:20,  2.44it/s]

tensor(0.0296, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0035, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  16%|█▌        | 36/231 [00:14<01:19,  2.44it/s]

tensor(0.0318, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  16%|█▌        | 37/231 [00:15<01:19,  2.45it/s]

tensor(0.0369, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.3342e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  16%|█▋        | 38/231 [00:15<01:18,  2.45it/s]

tensor(0.0713, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  17%|█▋        | 39/231 [00:16<01:18,  2.46it/s]

tensor(0.0772, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  17%|█▋        | 40/231 [00:16<01:18,  2.45it/s]

tensor(0.0370, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.6407e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  18%|█▊        | 41/231 [00:16<01:17,  2.45it/s]

tensor(0.0131, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0074, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  18%|█▊        | 42/231 [00:17<01:17,  2.43it/s]

tensor(0.0117, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  19%|█▊        | 43/231 [00:17<01:17,  2.44it/s]

tensor(0.0113, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  19%|█▉        | 44/231 [00:18<01:16,  2.44it/s]

tensor(0.0099, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0115, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  19%|█▉        | 45/231 [00:18<01:16,  2.45it/s]

tensor(0.0287, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  20%|█▉        | 46/231 [00:18<01:15,  2.43it/s]

tensor(0.0459, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  20%|██        | 47/231 [00:19<01:15,  2.44it/s]

tensor(0.0321, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  21%|██        | 48/231 [00:19<01:15,  2.42it/s]

tensor(0.0239, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.3071e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  21%|██        | 49/231 [00:20<01:15,  2.41it/s]

tensor(0.0064, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  22%|██▏       | 50/231 [00:20<01:14,  2.42it/s]

tensor(0.0191, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  22%|██▏       | 51/231 [00:21<01:13,  2.43it/s]

tensor(0.0469, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.3609e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  23%|██▎       | 52/231 [00:21<01:13,  2.43it/s]

tensor(0.0329, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  23%|██▎       | 53/231 [00:21<01:13,  2.44it/s]

tensor(0.0376, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  23%|██▎       | 54/231 [00:22<01:12,  2.45it/s]

tensor(0.0054, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  24%|██▍       | 55/231 [00:22<01:20,  2.19it/s]

tensor(0.0049, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0025, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  24%|██▍       | 56/231 [00:23<01:17,  2.26it/s]

tensor(0.0365, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  25%|██▍       | 57/231 [00:23<01:19,  2.19it/s]

tensor(0.0202, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  25%|██▌       | 58/231 [00:24<01:16,  2.26it/s]

tensor(0.0213, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  26%|██▌       | 59/231 [00:24<01:14,  2.32it/s]

tensor(0.0091, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  26%|██▌       | 60/231 [00:24<01:12,  2.36it/s]

tensor(0.0131, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  26%|██▋       | 61/231 [00:25<01:11,  2.39it/s]

tensor(0.0077, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  27%|██▋       | 62/231 [00:25<01:10,  2.41it/s]

tensor(0.0521, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0024, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  27%|██▋       | 63/231 [00:26<01:09,  2.42it/s]

tensor(0.0109, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  28%|██▊       | 64/231 [00:26<01:08,  2.43it/s]

tensor(0.0371, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0016, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  28%|██▊       | 65/231 [00:27<01:08,  2.43it/s]

tensor(0.0075, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  29%|██▊       | 66/231 [00:27<01:07,  2.44it/s]

tensor(0.0730, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  29%|██▉       | 67/231 [00:27<01:07,  2.44it/s]

tensor(0.0071, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  29%|██▉       | 68/231 [00:28<01:06,  2.44it/s]

tensor(0.0066, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.5460e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  30%|██▉       | 69/231 [00:28<01:06,  2.44it/s]

tensor(0.0064, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.2970e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  30%|███       | 70/231 [00:29<01:05,  2.44it/s]

tensor(0.0057, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  31%|███       | 71/231 [00:29<01:05,  2.45it/s]

tensor(0.0120, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0018, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  31%|███       | 72/231 [00:29<01:04,  2.45it/s]

tensor(0.0443, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  32%|███▏      | 73/231 [00:30<01:04,  2.45it/s]

tensor(0.0097, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  32%|███▏      | 74/231 [00:30<01:04,  2.45it/s]

tensor(0.0203, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  32%|███▏      | 75/231 [00:31<01:03,  2.46it/s]

tensor(0.0048, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  33%|███▎      | 76/231 [00:31<01:02,  2.46it/s]

tensor(0.0049, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.9937e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  33%|███▎      | 77/231 [00:31<01:02,  2.47it/s]

tensor(0.0349, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  34%|███▍      | 78/231 [00:32<01:01,  2.47it/s]

tensor(0.0052, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  34%|███▍      | 79/231 [00:32<01:01,  2.46it/s]

tensor(0.0075, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  35%|███▍      | 80/231 [00:33<01:01,  2.46it/s]

tensor(0.0091, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  35%|███▌      | 81/231 [00:33<01:01,  2.46it/s]

tensor(0.0293, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.8442e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  35%|███▌      | 82/231 [00:33<01:00,  2.45it/s]

tensor(0.0507, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  36%|███▌      | 83/231 [00:34<01:00,  2.45it/s]

tensor(0.0161, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  36%|███▋      | 84/231 [00:34<01:00,  2.44it/s]

tensor(0.0427, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0011, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  37%|███▋      | 85/231 [00:35<00:59,  2.44it/s]

tensor(0.0045, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  37%|███▋      | 86/231 [00:35<00:59,  2.44it/s]

tensor(0.0762, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.3412e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  38%|███▊      | 87/231 [00:35<00:59,  2.40it/s]

tensor(0.0682, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  38%|███▊      | 88/231 [00:36<00:59,  2.41it/s]

tensor(0.0255, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  39%|███▊      | 89/231 [00:36<00:59,  2.40it/s]

tensor(0.0161, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  39%|███▉      | 90/231 [00:37<00:58,  2.40it/s]

tensor(0.0070, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  39%|███▉      | 91/231 [00:37<00:58,  2.41it/s]

tensor(0.0182, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  40%|███▉      | 92/231 [00:38<00:57,  2.42it/s]

tensor(0.0146, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0010, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  40%|████      | 93/231 [00:38<00:57,  2.41it/s]

tensor(0.0132, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.2637e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  41%|████      | 94/231 [00:38<00:56,  2.42it/s]

tensor(0.0136, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  41%|████      | 95/231 [00:39<00:56,  2.43it/s]

tensor(0.0588, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0011, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  42%|████▏     | 96/231 [00:39<00:55,  2.44it/s]

tensor(0.0234, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0020, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  42%|████▏     | 97/231 [00:40<00:54,  2.44it/s]

tensor(0.0052, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.3815e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  42%|████▏     | 98/231 [00:40<00:54,  2.44it/s]

tensor(0.0066, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.9938e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  43%|████▎     | 99/231 [00:40<00:54,  2.44it/s]

tensor(0.0098, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  43%|████▎     | 100/231 [00:41<00:53,  2.44it/s]

tensor(0.0250, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  44%|████▎     | 101/231 [00:41<00:53,  2.44it/s]

tensor(0.0089, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.0624e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  44%|████▍     | 102/231 [00:42<00:52,  2.44it/s]

tensor(0.0246, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.9353e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  45%|████▍     | 103/231 [00:42<00:52,  2.44it/s]

tensor(0.0154, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0020, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  45%|████▌     | 104/231 [00:42<00:52,  2.43it/s]

tensor(0.0117, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0173, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  45%|████▌     | 105/231 [00:43<00:51,  2.43it/s]

tensor(0.0191, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.6628e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  46%|████▌     | 106/231 [00:43<00:51,  2.42it/s]

tensor(0.0293, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.5414e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  46%|████▋     | 107/231 [00:44<00:51,  2.42it/s]

tensor(0.0162, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  47%|████▋     | 108/231 [00:44<00:50,  2.42it/s]

tensor(0.0073, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.6430e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0457, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.1388e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  48%|████▊     | 110/231 [00:45<00:50,  2.39it/s]

tensor(0.0886, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  48%|████▊     | 111/231 [00:45<00:50,  2.40it/s]

tensor(0.0208, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  48%|████▊     | 112/231 [00:46<00:49,  2.41it/s]

tensor(0.0290, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.9136e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  49%|████▉     | 113/231 [00:46<00:48,  2.42it/s]

tensor(0.0748, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.4569e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  49%|████▉     | 114/231 [00:47<00:48,  2.42it/s]

tensor(0.0235, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  50%|████▉     | 115/231 [00:47<00:47,  2.42it/s]

tensor(0.0376, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0023, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  50%|█████     | 116/231 [00:47<00:47,  2.43it/s]

tensor(0.0243, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0014, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  51%|█████     | 117/231 [00:48<00:46,  2.43it/s]

tensor(0.0237, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  51%|█████     | 118/231 [00:48<00:46,  2.43it/s]

tensor(0.0155, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.9794e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  52%|█████▏    | 119/231 [00:49<00:45,  2.44it/s]

tensor(0.0096, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  52%|█████▏    | 120/231 [00:49<00:45,  2.43it/s]

tensor(0.0338, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  52%|█████▏    | 121/231 [00:50<00:45,  2.43it/s]

tensor(0.0546, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0627, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  53%|█████▎    | 122/231 [00:50<00:44,  2.43it/s]

tensor(0.0280, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  53%|█████▎    | 123/231 [00:50<00:45,  2.40it/s]

tensor(0.0301, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  54%|█████▍    | 125/231 [00:51<00:44,  2.41it/s]

tensor(0.0625, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.9942e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  55%|█████▍    | 126/231 [00:52<00:43,  2.40it/s]

tensor(0.0815, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.2286e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  55%|█████▍    | 127/231 [00:52<00:43,  2.38it/s]

tensor(0.0048, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0114, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0305, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  55%|█████▌    | 128/231 [00:52<00:44,  2.33it/s]

tensor(0.0403, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  56%|█████▌    | 129/231 [00:53<00:43,  2.32it/s]

tensor(0.0618, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0017, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  57%|█████▋    | 131/231 [00:54<00:42,  2.35it/s]

tensor(0.0144, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  57%|█████▋    | 132/231 [00:54<00:41,  2.38it/s]

tensor(0.0886, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0075, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  58%|█████▊    | 133/231 [00:55<00:40,  2.41it/s]

tensor(0.0091, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0040, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  58%|█████▊    | 134/231 [00:55<00:40,  2.42it/s]

tensor(0.0566, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  58%|█████▊    | 135/231 [00:55<00:39,  2.42it/s]

tensor(0.0320, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  59%|█████▉    | 136/231 [00:56<00:39,  2.42it/s]

tensor(0.0080, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0015, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  59%|█████▉    | 137/231 [00:56<00:38,  2.43it/s]

tensor(0.0404, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0094, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  60%|█████▉    | 138/231 [00:57<00:38,  2.44it/s]

tensor(0.0164, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0025, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  60%|██████    | 139/231 [00:57<00:37,  2.44it/s]

tensor(0.0790, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  61%|██████    | 140/231 [00:57<00:37,  2.44it/s]

tensor(0.0437, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  61%|██████    | 141/231 [00:58<00:36,  2.44it/s]

tensor(0.0088, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  61%|██████▏   | 142/231 [00:58<00:36,  2.44it/s]

tensor(0.0176, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  62%|██████▏   | 143/231 [00:59<00:36,  2.42it/s]

tensor(0.0535, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.3852e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  62%|██████▏   | 144/231 [00:59<00:35,  2.43it/s]

tensor(0.1654, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.0982e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  63%|██████▎   | 145/231 [00:59<00:35,  2.43it/s]

tensor(0.0679, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.2128e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  63%|██████▎   | 146/231 [01:00<00:34,  2.44it/s]

tensor(0.0032, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.1474e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  64%|██████▎   | 147/231 [01:00<00:34,  2.41it/s]

tensor(0.0131, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.1555e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  64%|██████▍   | 148/231 [01:01<00:34,  2.42it/s]

tensor(0.0154, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  65%|██████▍   | 149/231 [01:01<00:33,  2.43it/s]

tensor(0.0143, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.2090e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  65%|██████▍   | 150/231 [01:02<00:33,  2.44it/s]

tensor(0.0203, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0012, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  65%|██████▌   | 151/231 [01:02<00:32,  2.43it/s]

tensor(0.0212, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.6036e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  66%|██████▌   | 152/231 [01:02<00:32,  2.44it/s]

tensor(0.0059, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.8950e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  66%|██████▌   | 153/231 [01:03<00:32,  2.43it/s]

tensor(0.0446, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.6738e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  67%|██████▋   | 154/231 [01:03<00:31,  2.44it/s]

tensor(0.0548, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.6244e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  67%|██████▋   | 155/231 [01:04<00:31,  2.44it/s]

tensor(0.0110, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  68%|██████▊   | 156/231 [01:04<00:30,  2.44it/s]

tensor(0.0504, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  68%|██████▊   | 157/231 [01:04<00:30,  2.44it/s]

tensor(0.0063, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.7021e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  68%|██████▊   | 158/231 [01:05<00:29,  2.45it/s]

tensor(0.1214, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.0845e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  69%|██████▉   | 159/231 [01:05<00:29,  2.45it/s]

tensor(0.0044, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  69%|██████▉   | 160/231 [01:06<00:29,  2.44it/s]

tensor(0.0638, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  70%|██████▉   | 161/231 [01:06<00:28,  2.45it/s]

tensor(0.0840, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.7261e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  70%|███████   | 162/231 [01:06<00:28,  2.44it/s]

tensor(0.0237, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.5835e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  71%|███████   | 163/231 [01:07<00:27,  2.44it/s]

tensor(0.0112, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.9037e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  71%|███████   | 164/231 [01:07<00:27,  2.43it/s]

tensor(0.0164, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  71%|███████▏  | 165/231 [01:08<00:27,  2.43it/s]

tensor(0.0230, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.2623e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  72%|███████▏  | 166/231 [01:08<00:26,  2.44it/s]

tensor(0.0168, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.9776e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  72%|███████▏  | 167/231 [01:09<00:26,  2.43it/s]

tensor(0.0389, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.0287e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  73%|███████▎  | 168/231 [01:09<00:25,  2.44it/s]

tensor(0.0256, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.2567e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  73%|███████▎  | 169/231 [01:09<00:25,  2.44it/s]

tensor(0.0180, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  74%|███████▎  | 170/231 [01:10<00:24,  2.44it/s]

tensor(0.0101, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.9941e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  74%|███████▍  | 171/231 [01:10<00:24,  2.45it/s]

tensor(0.0060, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.7590e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  74%|███████▍  | 172/231 [01:11<00:23,  2.46it/s]

tensor(0.0513, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0012, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  75%|███████▍  | 173/231 [01:11<00:23,  2.47it/s]

tensor(0.0348, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.0709e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  75%|███████▌  | 174/231 [01:11<00:23,  2.47it/s]

tensor(0.0255, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.8485e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  76%|███████▌  | 175/231 [01:12<00:22,  2.46it/s]

tensor(0.0829, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.9713e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  76%|███████▌  | 176/231 [01:12<00:22,  2.47it/s]

tensor(0.0889, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.6574e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  77%|███████▋  | 177/231 [01:13<00:21,  2.47it/s]

tensor(0.0085, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.5395e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  77%|███████▋  | 178/231 [01:13<00:21,  2.47it/s]

tensor(0.0543, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.5175e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  77%|███████▋  | 179/231 [01:13<00:21,  2.46it/s]

tensor(0.0313, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  78%|███████▊  | 180/231 [01:14<00:20,  2.46it/s]

tensor(0.0089, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.3215e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  78%|███████▊  | 181/231 [01:14<00:20,  2.46it/s]

tensor(0.0276, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  79%|███████▉  | 182/231 [01:15<00:19,  2.46it/s]

tensor(0.0215, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  79%|███████▉  | 183/231 [01:15<00:19,  2.45it/s]

tensor(0.0033, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  80%|███████▉  | 184/231 [01:15<00:19,  2.45it/s]

tensor(0.0349, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.0761e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  80%|████████  | 185/231 [01:16<00:18,  2.45it/s]

tensor(0.0078, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.4512e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  81%|████████  | 186/231 [01:16<00:18,  2.44it/s]

tensor(0.0077, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  81%|████████  | 187/231 [01:17<00:17,  2.44it/s]

tensor(0.0689, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.8335e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  81%|████████▏ | 188/231 [01:17<00:17,  2.44it/s]

tensor(0.0310, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.9426e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  82%|████████▏ | 189/231 [01:17<00:17,  2.44it/s]

tensor(0.0748, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  82%|████████▏ | 190/231 [01:18<00:16,  2.44it/s]

tensor(0.0286, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  83%|████████▎ | 191/231 [01:18<00:16,  2.44it/s]

tensor(0.0214, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.2751e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  83%|████████▎ | 192/231 [01:19<00:15,  2.44it/s]

tensor(0.0386, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.0413e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  84%|████████▎ | 193/231 [01:19<00:15,  2.44it/s]

tensor(0.0755, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.7853e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  84%|████████▍ | 194/231 [01:20<00:15,  2.45it/s]

tensor(0.0358, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.4153e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  84%|████████▍ | 195/231 [01:20<00:14,  2.45it/s]

tensor(0.0504, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0009, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  85%|████████▍ | 196/231 [01:20<00:14,  2.45it/s]

tensor(0.0906, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0030, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  85%|████████▌ | 197/231 [01:21<00:13,  2.46it/s]

tensor(0.1114, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0013, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  86%|████████▌ | 198/231 [01:21<00:13,  2.45it/s]

tensor(0.0195, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  86%|████████▌ | 199/231 [01:22<00:13,  2.46it/s]

tensor(0.0078, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  87%|████████▋ | 200/231 [01:22<00:12,  2.47it/s]

tensor(0.0357, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0069, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  87%|████████▋ | 201/231 [01:22<00:12,  2.45it/s]

tensor(0.0432, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.3284e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  87%|████████▋ | 202/231 [01:23<00:11,  2.44it/s]

tensor(0.0107, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  88%|████████▊ | 203/231 [01:23<00:12,  2.21it/s]

tensor(0.1751, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.5862e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  88%|████████▊ | 204/231 [01:24<00:11,  2.26it/s]

tensor(0.0128, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  89%|████████▊ | 205/231 [01:24<00:11,  2.32it/s]

tensor(0.0247, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.6391e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  89%|████████▉ | 206/231 [01:25<00:10,  2.35it/s]

tensor(0.0069, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.7386e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  90%|████████▉ | 207/231 [01:25<00:10,  2.37it/s]

tensor(0.0149, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.4064e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  90%|█████████ | 208/231 [01:25<00:09,  2.39it/s]

tensor(0.0110, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.6763e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  90%|█████████ | 209/231 [01:26<00:09,  2.40it/s]

tensor(0.0033, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  91%|█████████ | 210/231 [01:26<00:08,  2.42it/s]

tensor(0.0110, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.4578e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  91%|█████████▏| 211/231 [01:27<00:08,  2.42it/s]

tensor(0.1218, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.3296e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  92%|█████████▏| 212/231 [01:27<00:07,  2.43it/s]

tensor(0.0190, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.8809e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  92%|█████████▏| 213/231 [01:27<00:07,  2.43it/s]

tensor(0.0063, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.2853e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  93%|█████████▎| 214/231 [01:28<00:07,  2.41it/s]

tensor(0.0337, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.8767e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  93%|█████████▎| 215/231 [01:28<00:06,  2.39it/s]

tensor(0.0211, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.2828e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  94%|█████████▎| 216/231 [01:29<00:06,  2.39it/s]

tensor(0.0059, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.4222e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  94%|█████████▍| 217/231 [01:29<00:05,  2.38it/s]

tensor(0.0180, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  94%|█████████▍| 218/231 [01:30<00:05,  2.37it/s]

tensor(0.0302, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.8232e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  95%|█████████▍| 219/231 [01:30<00:05,  2.37it/s]

tensor(0.0198, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.9581e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  95%|█████████▌| 220/231 [01:30<00:04,  2.37it/s]

tensor(0.0201, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  96%|█████████▌| 221/231 [01:31<00:04,  2.37it/s]

tensor(0.0527, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.0537e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  96%|█████████▌| 222/231 [01:31<00:03,  2.36it/s]

tensor(0.0095, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.3793e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  97%|█████████▋| 223/231 [01:32<00:03,  2.36it/s]

tensor(0.0089, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  97%|█████████▋| 224/231 [01:32<00:02,  2.36it/s]

tensor(0.0040, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  97%|█████████▋| 225/231 [01:33<00:02,  2.36it/s]

tensor(0.0099, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  98%|█████████▊| 226/231 [01:33<00:02,  2.36it/s]

tensor(0.0256, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.1001e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  98%|█████████▊| 227/231 [01:33<00:01,  2.36it/s]

tensor(0.0055, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.8165e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  99%|█████████▊| 228/231 [01:34<00:01,  2.36it/s]

tensor(0.0472, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005:  99%|█████████▉| 229/231 [01:34<00:00,  2.36it/s]

tensor(0.0161, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.9211e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005: 100%|█████████▉| 230/231 [01:35<00:00,  2.36it/s]

tensor(0.0107, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 005: 100%|██████████| 231/231 [01:35<00:00,  2.42it/s]

tensor(0.0346, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)
Epoch 6/235, Average Training Loss: 0.0198



100%|██████████| 235/235 [00:21<00:00, 10.89it/s]


Accuracy on validation set: 0.9975
Warmup...


Epoch 006: 100%|██████████| 3/3 [00:00<00:00, 11.05it/s]
Epoch 006:   0%|          | 1/231 [00:00<01:35,  2.42it/s]

tensor(0.0058, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:   1%|          | 2/231 [00:00<01:34,  2.43it/s]

tensor(0.0145, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.8220e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:   1%|▏         | 3/231 [00:01<01:33,  2.44it/s]

tensor(0.0023, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.9199e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:   2%|▏         | 4/231 [00:01<01:33,  2.42it/s]

tensor(0.0190, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:   2%|▏         | 5/231 [00:02<01:33,  2.43it/s]

tensor(0.0202, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0241, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:   3%|▎         | 6/231 [00:02<01:32,  2.42it/s]

tensor(0.0077, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.3710e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:   3%|▎         | 7/231 [00:02<01:32,  2.43it/s]

tensor(0.0129, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:   3%|▎         | 8/231 [00:03<01:31,  2.43it/s]

tensor(0.0154, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.7674e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:   4%|▍         | 9/231 [00:03<01:31,  2.43it/s]

tensor(0.0239, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.6702e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:   4%|▍         | 10/231 [00:04<01:30,  2.43it/s]

tensor(0.0062, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:   5%|▍         | 11/231 [00:04<01:30,  2.43it/s]

tensor(0.0232, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.7901e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:   5%|▌         | 12/231 [00:04<01:30,  2.43it/s]

tensor(0.0232, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.4633e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:   6%|▌         | 13/231 [00:05<01:29,  2.44it/s]

tensor(0.0325, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:   6%|▌         | 14/231 [00:05<01:28,  2.45it/s]

tensor(0.0064, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.7111e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:   6%|▋         | 15/231 [00:06<01:28,  2.45it/s]

tensor(0.0061, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.7841e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:   7%|▋         | 16/231 [00:06<01:27,  2.45it/s]

tensor(0.0223, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.0960e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:   7%|▋         | 17/231 [00:06<01:27,  2.45it/s]

tensor(0.0073, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.3106e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:   8%|▊         | 18/231 [00:07<01:27,  2.45it/s]

tensor(0.0239, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.1794e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:   8%|▊         | 19/231 [00:07<01:26,  2.45it/s]

tensor(0.0515, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:   9%|▊         | 20/231 [00:08<01:26,  2.45it/s]

tensor(0.0038, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.0229e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:   9%|▉         | 21/231 [00:08<01:25,  2.45it/s]

tensor(0.0347, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  10%|▉         | 22/231 [00:09<01:25,  2.45it/s]

tensor(0.0402, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  10%|▉         | 23/231 [00:09<01:24,  2.45it/s]

tensor(0.0446, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.2642e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  10%|█         | 24/231 [00:09<01:24,  2.45it/s]

tensor(0.0122, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.5447e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  11%|█         | 25/231 [00:10<01:24,  2.45it/s]

tensor(0.0206, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.5552e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  11%|█▏        | 26/231 [00:10<01:23,  2.44it/s]

tensor(0.0026, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0023, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  12%|█▏        | 27/231 [00:11<01:23,  2.43it/s]

tensor(0.0240, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.9040e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  12%|█▏        | 28/231 [00:11<01:23,  2.44it/s]

tensor(0.0057, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.5298e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  13%|█▎        | 29/231 [00:11<01:22,  2.44it/s]

tensor(0.0034, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.2474e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  13%|█▎        | 30/231 [00:12<01:22,  2.44it/s]

tensor(0.0038, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.7536e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  13%|█▎        | 31/231 [00:12<01:22,  2.44it/s]

tensor(0.0023, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  14%|█▍        | 32/231 [00:13<01:21,  2.44it/s]

tensor(0.0023, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.9432e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  14%|█▍        | 33/231 [00:13<01:21,  2.44it/s]

tensor(0.0099, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  15%|█▍        | 34/231 [00:13<01:20,  2.44it/s]

tensor(0.0529, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.4231e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  15%|█▌        | 35/231 [00:14<01:20,  2.45it/s]

tensor(0.0928, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.8289e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  16%|█▌        | 36/231 [00:14<01:19,  2.45it/s]

tensor(0.0482, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.3137e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  16%|█▌        | 37/231 [00:15<01:19,  2.45it/s]

tensor(0.0563, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  16%|█▋        | 38/231 [00:15<01:18,  2.45it/s]

tensor(0.0262, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  17%|█▋        | 39/231 [00:15<01:18,  2.44it/s]

tensor(0.0228, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.3096e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  17%|█▋        | 40/231 [00:16<01:18,  2.45it/s]

tensor(0.0143, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.1995e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  18%|█▊        | 41/231 [00:16<01:17,  2.45it/s]

tensor(0.0038, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0015, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  18%|█▊        | 42/231 [00:17<01:17,  2.45it/s]

tensor(0.0386, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  19%|█▊        | 43/231 [00:17<01:17,  2.44it/s]

tensor(0.0391, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.4380e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  19%|█▉        | 44/231 [00:18<01:16,  2.43it/s]

tensor(0.0503, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  19%|█▉        | 45/231 [00:18<01:16,  2.44it/s]

tensor(0.0056, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.0973e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  20%|█▉        | 46/231 [00:18<01:16,  2.42it/s]

tensor(0.0032, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.2427e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  20%|██        | 47/231 [00:19<01:15,  2.43it/s]

tensor(0.0314, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  21%|██        | 48/231 [00:19<01:15,  2.43it/s]

tensor(0.0039, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  21%|██        | 49/231 [00:20<01:14,  2.43it/s]

tensor(0.0095, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.1784e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  22%|██▏       | 50/231 [00:20<01:14,  2.44it/s]

tensor(0.0961, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.2586e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  22%|██▏       | 51/231 [00:20<01:13,  2.44it/s]

tensor(0.0069, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.3160e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  23%|██▎       | 52/231 [00:21<01:13,  2.44it/s]

tensor(0.0938, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.4042e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  23%|██▎       | 53/231 [00:21<01:12,  2.45it/s]

tensor(0.0208, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.8844e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  23%|██▎       | 54/231 [00:22<01:12,  2.45it/s]

tensor(0.0162, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.3436e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  24%|██▍       | 55/231 [00:22<01:11,  2.45it/s]

tensor(0.0237, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.0133e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  24%|██▍       | 56/231 [00:22<01:11,  2.44it/s]

tensor(0.0099, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  25%|██▍       | 57/231 [00:23<01:11,  2.43it/s]

tensor(0.0160, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.0687e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  25%|██▌       | 58/231 [00:23<01:11,  2.43it/s]

tensor(0.0083, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0016, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  26%|██▌       | 59/231 [00:24<01:10,  2.43it/s]

tensor(0.0044, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  26%|██▌       | 60/231 [00:24<01:10,  2.44it/s]

tensor(0.0098, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.9776e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  26%|██▋       | 61/231 [00:25<01:09,  2.44it/s]

tensor(0.0531, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.2867e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  27%|██▋       | 62/231 [00:25<01:09,  2.44it/s]

tensor(0.0071, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.6566e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  27%|██▋       | 63/231 [00:25<01:08,  2.44it/s]

tensor(0.0222, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.4619e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  28%|██▊       | 64/231 [00:26<01:08,  2.44it/s]

tensor(0.0146, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  28%|██▊       | 65/231 [00:26<01:08,  2.44it/s]

tensor(0.1006, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  29%|██▊       | 66/231 [00:27<01:07,  2.44it/s]

tensor(0.0113, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  29%|██▉       | 67/231 [00:27<01:07,  2.43it/s]

tensor(0.0100, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.3426e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  29%|██▉       | 68/231 [00:27<01:06,  2.44it/s]

tensor(0.0740, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.1605e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  30%|██▉       | 69/231 [00:28<01:06,  2.44it/s]

tensor(0.0238, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.3860e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  30%|███       | 70/231 [00:28<01:05,  2.44it/s]

tensor(0.0306, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  31%|███       | 71/231 [00:29<01:05,  2.45it/s]

tensor(0.0348, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0039, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  31%|███       | 72/231 [00:29<01:04,  2.45it/s]

tensor(0.0170, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.7809e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  32%|███▏      | 73/231 [00:29<01:04,  2.45it/s]

tensor(0.0025, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.0701e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  32%|███▏      | 74/231 [00:30<01:04,  2.45it/s]

tensor(0.0043, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.2121e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  32%|███▏      | 75/231 [00:30<01:03,  2.44it/s]

tensor(0.0161, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.8186e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  33%|███▎      | 76/231 [00:31<01:03,  2.46it/s]

tensor(0.0529, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.3491e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  33%|███▎      | 77/231 [00:31<01:02,  2.46it/s]

tensor(0.0509, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  34%|███▍      | 78/231 [00:31<01:02,  2.45it/s]

tensor(0.0083, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.4549e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  34%|███▍      | 79/231 [00:32<01:01,  2.46it/s]

tensor(0.0125, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.6304e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  35%|███▍      | 80/231 [00:32<01:01,  2.46it/s]

tensor(0.0358, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.9767e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  35%|███▌      | 81/231 [00:33<01:01,  2.45it/s]

tensor(0.0094, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.9620e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  35%|███▌      | 82/231 [00:33<01:00,  2.45it/s]

tensor(0.0075, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.9092e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  36%|███▌      | 83/231 [00:33<01:00,  2.45it/s]

tensor(0.0041, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.0578e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  36%|███▋      | 84/231 [00:34<01:00,  2.44it/s]

tensor(0.0450, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.2353e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  37%|███▋      | 85/231 [00:34<00:59,  2.44it/s]

tensor(0.0555, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.5840e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  37%|███▋      | 86/231 [00:35<00:59,  2.44it/s]

tensor(0.0060, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.2873e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  38%|███▊      | 87/231 [00:35<00:58,  2.44it/s]

tensor(0.0105, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.5072e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  38%|███▊      | 88/231 [00:36<00:58,  2.45it/s]

tensor(0.0677, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  39%|███▊      | 89/231 [00:36<00:58,  2.45it/s]

tensor(0.0151, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.2588e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  39%|███▉      | 90/231 [00:36<00:57,  2.45it/s]

tensor(0.0230, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.9667e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  39%|███▉      | 91/231 [00:37<00:57,  2.45it/s]

tensor(0.0175, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.6031e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  40%|███▉      | 92/231 [00:37<00:56,  2.45it/s]

tensor(0.0070, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.9568e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  40%|████      | 93/231 [00:38<00:56,  2.45it/s]

tensor(0.0478, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  41%|████      | 94/231 [00:38<00:55,  2.45it/s]

tensor(0.0490, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.3440e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  41%|████      | 95/231 [00:38<00:55,  2.45it/s]

tensor(0.0233, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  42%|████▏     | 96/231 [00:39<00:55,  2.45it/s]

tensor(0.0363, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0031, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  42%|████▏     | 97/231 [00:39<00:54,  2.44it/s]

tensor(0.0028, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  42%|████▏     | 98/231 [00:40<00:54,  2.45it/s]

tensor(0.0217, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  43%|████▎     | 99/231 [00:40<00:53,  2.45it/s]

tensor(0.0061, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  43%|████▎     | 100/231 [00:40<00:53,  2.46it/s]

tensor(0.0082, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.0688e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  44%|████▎     | 101/231 [00:41<00:52,  2.46it/s]

tensor(0.0192, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.6896e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  44%|████▍     | 102/231 [00:41<00:52,  2.45it/s]

tensor(0.0112, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0063, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  45%|████▍     | 103/231 [00:42<00:52,  2.43it/s]

tensor(0.0031, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.4050e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  45%|████▌     | 104/231 [00:42<00:52,  2.43it/s]

tensor(0.0223, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.5470e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  45%|████▌     | 105/231 [00:42<00:51,  2.43it/s]

tensor(0.0729, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0011, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  46%|████▌     | 106/231 [00:43<00:51,  2.44it/s]

tensor(0.0064, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  46%|████▋     | 107/231 [00:43<00:50,  2.43it/s]

tensor(0.0187, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  47%|████▋     | 108/231 [00:44<00:50,  2.44it/s]

tensor(0.0071, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  47%|████▋     | 109/231 [00:44<00:50,  2.44it/s]

tensor(0.0083, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.4732e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  48%|████▊     | 110/231 [00:45<00:50,  2.42it/s]

tensor(0.0025, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.0133e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  48%|████▊     | 111/231 [00:45<00:49,  2.43it/s]

tensor(0.0127, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  48%|████▊     | 112/231 [00:46<00:53,  2.21it/s]

tensor(0.0055, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  49%|████▉     | 113/231 [00:46<00:51,  2.27it/s]

tensor(0.0085, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  49%|████▉     | 114/231 [00:46<00:50,  2.32it/s]

tensor(0.0047, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.9717e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  50%|████▉     | 115/231 [00:47<00:49,  2.36it/s]

tensor(0.0283, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  50%|█████     | 116/231 [00:47<00:48,  2.39it/s]

tensor(0.0280, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  51%|█████     | 117/231 [00:48<00:47,  2.40it/s]

tensor(0.0231, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  51%|█████     | 118/231 [00:48<00:46,  2.41it/s]

tensor(0.0069, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.2800e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  52%|█████▏    | 119/231 [00:48<00:46,  2.42it/s]

tensor(0.0082, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.0844e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  52%|█████▏    | 120/231 [00:49<00:45,  2.43it/s]

tensor(0.0158, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.2393e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  52%|█████▏    | 121/231 [00:49<00:45,  2.44it/s]

tensor(0.0035, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0012, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  53%|█████▎    | 122/231 [00:50<00:44,  2.44it/s]

tensor(0.0066, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.1976e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  53%|█████▎    | 123/231 [00:50<00:44,  2.45it/s]

tensor(0.0050, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.4019e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  54%|█████▎    | 124/231 [00:50<00:43,  2.45it/s]

tensor(0.0049, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  54%|█████▍    | 125/231 [00:51<00:43,  2.45it/s]

tensor(0.0189, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  55%|█████▍    | 126/231 [00:51<00:42,  2.46it/s]

tensor(0.0042, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  55%|█████▍    | 127/231 [00:52<00:42,  2.47it/s]

tensor(0.0173, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.6603e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  55%|█████▌    | 128/231 [00:52<00:41,  2.47it/s]

tensor(0.0185, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  56%|█████▌    | 129/231 [00:52<00:41,  2.48it/s]

tensor(0.0134, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  56%|█████▋    | 130/231 [00:53<00:40,  2.47it/s]

tensor(0.0039, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  57%|█████▋    | 131/231 [00:53<00:40,  2.47it/s]

tensor(0.0157, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  57%|█████▋    | 132/231 [00:54<00:40,  2.47it/s]

tensor(0.0079, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0053, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.5416e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  58%|█████▊    | 134/231 [00:54<00:39,  2.46it/s]

tensor(0.0533, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  58%|█████▊    | 135/231 [00:55<00:39,  2.44it/s]

tensor(0.0147, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  59%|█████▉    | 136/231 [00:55<00:39,  2.42it/s]

tensor(0.0501, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  59%|█████▉    | 137/231 [00:56<00:38,  2.43it/s]

tensor(0.0085, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  60%|█████▉    | 138/231 [00:56<00:38,  2.44it/s]

tensor(0.0299, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0008, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  60%|██████    | 139/231 [00:57<00:37,  2.44it/s]

tensor(0.0159, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  61%|██████    | 140/231 [00:57<00:37,  2.44it/s]

tensor(0.0102, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  61%|██████    | 141/231 [00:57<00:37,  2.42it/s]

tensor(0.0049, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  61%|██████▏   | 142/231 [00:58<00:36,  2.43it/s]

tensor(0.0241, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  62%|██████▏   | 143/231 [00:58<00:36,  2.44it/s]

tensor(0.0054, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  62%|██████▏   | 144/231 [00:59<00:35,  2.44it/s]

tensor(0.0101, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0053, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  63%|██████▎   | 145/231 [00:59<00:35,  2.45it/s]

tensor(0.0061, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.9859e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  63%|██████▎   | 146/231 [00:59<00:34,  2.45it/s]

tensor(0.0050, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  64%|██████▎   | 147/231 [01:00<00:34,  2.45it/s]

tensor(0.0029, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.6233e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  64%|██████▍   | 148/231 [01:00<00:33,  2.45it/s]

tensor(0.0075, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  65%|██████▍   | 149/231 [01:01<00:33,  2.46it/s]

tensor(0.1097, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.7665e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  65%|██████▍   | 150/231 [01:01<00:32,  2.46it/s]

tensor(0.0059, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  65%|██████▌   | 151/231 [01:01<00:32,  2.46it/s]

tensor(0.0226, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  66%|██████▌   | 152/231 [01:02<00:32,  2.46it/s]

tensor(0.0076, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.3268e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  66%|██████▌   | 153/231 [01:02<00:31,  2.46it/s]

tensor(0.0141, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.5852e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  67%|██████▋   | 154/231 [01:03<00:31,  2.46it/s]

tensor(0.0207, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  67%|██████▋   | 155/231 [01:03<00:31,  2.45it/s]

tensor(0.0039, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  68%|██████▊   | 156/231 [01:03<00:30,  2.45it/s]

tensor(0.0086, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  68%|██████▊   | 157/231 [01:04<00:30,  2.45it/s]

tensor(0.0475, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.6460e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  68%|██████▊   | 158/231 [01:04<00:29,  2.45it/s]

tensor(0.0214, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.1143e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  69%|██████▉   | 159/231 [01:05<00:29,  2.45it/s]

tensor(0.0579, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.0668e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  69%|██████▉   | 160/231 [01:05<00:29,  2.45it/s]

tensor(0.0246, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.6134e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  70%|██████▉   | 161/231 [01:06<00:28,  2.44it/s]

tensor(0.0151, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  70%|███████   | 162/231 [01:06<00:28,  2.44it/s]

tensor(0.0465, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  71%|███████   | 163/231 [01:06<00:27,  2.43it/s]

tensor(0.0009, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.3540e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  71%|███████   | 164/231 [01:07<00:27,  2.43it/s]

tensor(0.0043, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.3134e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  71%|███████▏  | 165/231 [01:07<00:27,  2.44it/s]

tensor(0.0049, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.8994e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  72%|███████▏  | 166/231 [01:08<00:26,  2.44it/s]

tensor(0.0160, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.7134e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  72%|███████▏  | 167/231 [01:08<00:26,  2.44it/s]

tensor(0.0035, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  73%|███████▎  | 168/231 [01:08<00:25,  2.44it/s]

tensor(0.0032, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  73%|███████▎  | 169/231 [01:09<00:25,  2.43it/s]

tensor(0.0163, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.0799e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  74%|███████▎  | 170/231 [01:09<00:25,  2.43it/s]

tensor(0.0299, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.9364e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  74%|███████▍  | 171/231 [01:10<00:24,  2.44it/s]

tensor(0.0203, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  74%|███████▍  | 172/231 [01:10<00:24,  2.46it/s]

tensor(0.0075, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.9151e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  75%|███████▍  | 173/231 [01:10<00:23,  2.45it/s]

tensor(0.0043, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.6233e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  75%|███████▌  | 174/231 [01:11<00:23,  2.46it/s]

tensor(0.0021, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  76%|███████▌  | 175/231 [01:11<00:22,  2.45it/s]

tensor(0.0010, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  76%|███████▌  | 176/231 [01:12<00:22,  2.44it/s]

tensor(0.0036, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.2633e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  77%|███████▋  | 177/231 [01:12<00:22,  2.44it/s]

tensor(0.0176, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.0055e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  77%|███████▋  | 178/231 [01:12<00:21,  2.45it/s]

tensor(0.0459, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  77%|███████▋  | 179/231 [01:13<00:21,  2.45it/s]

tensor(0.0073, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  78%|███████▊  | 180/231 [01:13<00:20,  2.43it/s]

tensor(0.0046, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0013, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  78%|███████▊  | 181/231 [01:14<00:20,  2.43it/s]

tensor(0.0605, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.9540e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  79%|███████▉  | 182/231 [01:14<00:20,  2.43it/s]

tensor(0.0045, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  79%|███████▉  | 183/231 [01:15<00:19,  2.44it/s]

tensor(0.0158, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  80%|███████▉  | 184/231 [01:15<00:19,  2.44it/s]

tensor(0.0192, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.7622e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  80%|████████  | 185/231 [01:15<00:18,  2.44it/s]

tensor(0.0021, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0017, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  81%|████████  | 186/231 [01:16<00:18,  2.44it/s]

tensor(0.0276, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.4634e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0203, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  81%|████████▏ | 188/231 [01:17<00:17,  2.43it/s]

tensor(0.0577, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  82%|████████▏ | 189/231 [01:17<00:17,  2.42it/s]

tensor(0.0022, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.6314e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  82%|████████▏ | 190/231 [01:17<00:16,  2.44it/s]

tensor(0.0080, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.3675e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  83%|████████▎ | 191/231 [01:18<00:16,  2.44it/s]

tensor(0.0020, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  83%|████████▎ | 192/231 [01:18<00:15,  2.44it/s]

tensor(0.0203, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  84%|████████▎ | 193/231 [01:19<00:15,  2.44it/s]

tensor(0.0071, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0008, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  84%|████████▍ | 194/231 [01:19<00:15,  2.44it/s]

tensor(0.0096, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  84%|████████▍ | 195/231 [01:19<00:14,  2.44it/s]

tensor(0.0031, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.8980e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  85%|████████▍ | 196/231 [01:20<00:14,  2.44it/s]

tensor(0.0174, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.2143e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  85%|████████▌ | 197/231 [01:20<00:13,  2.45it/s]

tensor(0.0063, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  86%|████████▌ | 198/231 [01:21<00:13,  2.45it/s]

tensor(0.0027, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.8538e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  86%|████████▌ | 199/231 [01:21<00:13,  2.45it/s]

tensor(0.0134, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.0366e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  87%|████████▋ | 200/231 [01:22<00:12,  2.42it/s]

tensor(0.0048, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  87%|████████▋ | 201/231 [01:22<00:12,  2.41it/s]

tensor(0.0086, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.0205e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  87%|████████▋ | 202/231 [01:22<00:12,  2.41it/s]

tensor(0.0207, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  88%|████████▊ | 203/231 [01:23<00:11,  2.42it/s]

tensor(0.0085, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  88%|████████▊ | 204/231 [01:23<00:11,  2.43it/s]

tensor(0.0034, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.3135e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  89%|████████▊ | 205/231 [01:24<00:10,  2.43it/s]

tensor(0.0209, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.4786e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  89%|████████▉ | 206/231 [01:24<00:10,  2.42it/s]

tensor(0.0255, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.0723e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0088, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.5396e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  90%|█████████ | 208/231 [01:25<00:09,  2.38it/s]

tensor(0.0076, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  90%|█████████ | 209/231 [01:25<00:09,  2.40it/s]

tensor(0.0075, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  91%|█████████ | 210/231 [01:26<00:08,  2.41it/s]

tensor(0.0098, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.8827e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  91%|█████████▏| 211/231 [01:26<00:08,  2.42it/s]

tensor(0.0062, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  92%|█████████▏| 212/231 [01:26<00:07,  2.43it/s]

tensor(0.0029, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  92%|█████████▏| 213/231 [01:27<00:07,  2.42it/s]

tensor(0.0049, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  93%|█████████▎| 214/231 [01:27<00:07,  2.41it/s]

tensor(0.0077, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.9009e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  93%|█████████▎| 215/231 [01:28<00:06,  2.42it/s]

tensor(0.0031, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  94%|█████████▎| 216/231 [01:28<00:06,  2.41it/s]

tensor(0.0135, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.8366e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  94%|█████████▍| 217/231 [01:29<00:05,  2.41it/s]

tensor(0.0307, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  94%|█████████▍| 218/231 [01:29<00:05,  2.42it/s]

tensor(0.0337, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.3356e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  95%|█████████▍| 219/231 [01:29<00:04,  2.42it/s]

tensor(0.0279, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.7234e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  95%|█████████▌| 220/231 [01:30<00:04,  2.43it/s]

tensor(0.0309, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.2480e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  96%|█████████▌| 221/231 [01:30<00:04,  2.43it/s]

tensor(0.0088, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0160, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  96%|█████████▌| 222/231 [01:31<00:03,  2.44it/s]

tensor(0.0086, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  97%|█████████▋| 223/231 [01:31<00:03,  2.44it/s]

tensor(0.0091, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  97%|█████████▋| 224/231 [01:31<00:02,  2.41it/s]

tensor(0.0033, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0009, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  97%|█████████▋| 225/231 [01:32<00:02,  2.43it/s]

tensor(0.0036, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.8696e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  98%|█████████▊| 226/231 [01:32<00:02,  2.43it/s]

tensor(0.0236, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.9397e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  98%|█████████▊| 227/231 [01:33<00:01,  2.44it/s]

tensor(0.0119, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.1112e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  99%|█████████▊| 228/231 [01:33<00:01,  2.44it/s]

tensor(0.1258, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006:  99%|█████████▉| 229/231 [01:33<00:00,  2.44it/s]

tensor(0.0200, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.9001e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006: 100%|█████████▉| 230/231 [01:34<00:00,  2.44it/s]

tensor(0.0015, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.0593e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 006: 100%|██████████| 231/231 [01:34<00:00,  2.44it/s]

tensor(0.0024, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)
Epoch 7/235, Average Training Loss: 0.0195



100%|██████████| 235/235 [00:21<00:00, 11.03it/s]


Accuracy on validation set: 0.9994
Warmup...


Epoch 007: 100%|██████████| 3/3 [00:00<00:00, 10.84it/s]
Epoch 007:   0%|          | 1/231 [00:00<01:49,  2.10it/s]

tensor(0.0032, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0227, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:   1%|▏         | 3/231 [00:01<01:39,  2.28it/s]

tensor(0.0012, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.1075e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:   2%|▏         | 4/231 [00:01<01:38,  2.31it/s]

tensor(0.0184, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:   2%|▏         | 5/231 [00:02<01:48,  2.08it/s]

tensor(0.0146, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:   3%|▎         | 6/231 [00:02<01:43,  2.17it/s]

tensor(0.0026, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.6787e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:   3%|▎         | 7/231 [00:03<01:40,  2.22it/s]

tensor(0.0104, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:   3%|▎         | 8/231 [00:03<01:38,  2.27it/s]

tensor(0.0078, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.9475e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:   4%|▍         | 9/231 [00:04<01:35,  2.31it/s]

tensor(0.0029, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.4528e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:   4%|▍         | 10/231 [00:04<01:33,  2.36it/s]

tensor(0.0015, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.1375e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:   5%|▍         | 11/231 [00:04<01:32,  2.39it/s]

tensor(0.0711, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:   5%|▌         | 12/231 [00:05<01:31,  2.40it/s]

tensor(0.0034, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.4360e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:   6%|▌         | 13/231 [00:05<01:30,  2.41it/s]

tensor(0.0249, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:   6%|▌         | 14/231 [00:06<01:29,  2.42it/s]

tensor(0.0048, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:   6%|▋         | 15/231 [00:06<01:28,  2.43it/s]

tensor(0.0099, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.9591e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:   7%|▋         | 16/231 [00:06<01:28,  2.43it/s]

tensor(0.0674, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:   7%|▋         | 17/231 [00:07<01:28,  2.42it/s]

tensor(0.0036, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:   8%|▊         | 18/231 [00:07<01:28,  2.42it/s]

tensor(0.0037, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.8460e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:   8%|▊         | 19/231 [00:08<01:27,  2.41it/s]

tensor(0.0108, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:   9%|▊         | 20/231 [00:08<01:27,  2.42it/s]

tensor(0.0021, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.6969e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:   9%|▉         | 21/231 [00:08<01:27,  2.41it/s]

tensor(0.0393, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.1067e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  10%|▉         | 22/231 [00:09<01:27,  2.40it/s]

tensor(0.0155, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.9989e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  10%|▉         | 23/231 [00:09<01:26,  2.41it/s]

tensor(0.0119, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  10%|█         | 24/231 [00:10<01:25,  2.42it/s]

tensor(0.0096, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.1467e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  11%|█         | 25/231 [00:10<01:24,  2.42it/s]

tensor(0.0030, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.7025e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  11%|█▏        | 26/231 [00:11<01:24,  2.42it/s]

tensor(0.0073, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0008, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  12%|█▏        | 27/231 [00:11<01:24,  2.43it/s]

tensor(0.0171, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  12%|█▏        | 28/231 [00:11<01:23,  2.43it/s]

tensor(0.0078, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.0793e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  13%|█▎        | 29/231 [00:12<01:23,  2.43it/s]

tensor(0.0054, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.7833e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  13%|█▎        | 30/231 [00:12<01:22,  2.43it/s]

tensor(0.0101, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.4035e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  13%|█▎        | 31/231 [00:13<01:22,  2.44it/s]

tensor(0.0069, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.9823e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  14%|█▍        | 32/231 [00:13<01:21,  2.43it/s]

tensor(0.0240, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.7504e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  14%|█▍        | 33/231 [00:13<01:21,  2.43it/s]

tensor(0.0097, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.7197e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0065, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.7942e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  15%|█▍        | 34/231 [00:14<01:21,  2.43it/s]

tensor(0.0098, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  15%|█▌        | 35/231 [00:14<01:21,  2.40it/s]

tensor(0.0567, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.7806e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  16%|█▌        | 36/231 [00:15<01:21,  2.40it/s]

tensor(0.0037, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.4327e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  16%|█▌        | 37/231 [00:15<01:21,  2.39it/s]

tensor(0.0056, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.2939e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  17%|█▋        | 39/231 [00:16<01:20,  2.40it/s]

tensor(0.0083, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  17%|█▋        | 40/231 [00:16<01:19,  2.41it/s]

tensor(0.0377, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.5162e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  18%|█▊        | 41/231 [00:17<01:18,  2.41it/s]

tensor(0.0061, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0031, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.6519e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  19%|█▊        | 43/231 [00:18<01:18,  2.39it/s]

tensor(0.0328, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.1192e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  19%|█▉        | 44/231 [00:18<01:18,  2.39it/s]

tensor(0.0092, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.8126e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  19%|█▉        | 45/231 [00:18<01:17,  2.41it/s]

tensor(0.0070, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.8103e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  20%|█▉        | 46/231 [00:19<01:16,  2.42it/s]

tensor(0.0853, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.8584e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  20%|██        | 47/231 [00:19<01:16,  2.42it/s]

tensor(0.0222, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.7867e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  21%|██        | 48/231 [00:20<01:15,  2.43it/s]

tensor(0.0031, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.7944e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  21%|██        | 49/231 [00:20<01:14,  2.43it/s]

tensor(0.0027, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  22%|██▏       | 50/231 [00:20<01:14,  2.43it/s]

tensor(0.0212, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.0076e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  22%|██▏       | 51/231 [00:21<01:14,  2.43it/s]

tensor(0.0063, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  23%|██▎       | 52/231 [00:21<01:13,  2.43it/s]

tensor(0.0064, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  23%|██▎       | 53/231 [00:22<01:13,  2.43it/s]

tensor(0.0343, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  23%|██▎       | 54/231 [00:22<01:12,  2.43it/s]

tensor(0.0073, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.1743e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  24%|██▍       | 55/231 [00:23<01:12,  2.43it/s]

tensor(0.0090, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.9810e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  24%|██▍       | 56/231 [00:23<01:11,  2.45it/s]

tensor(0.0059, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  25%|██▍       | 57/231 [00:23<01:11,  2.42it/s]

tensor(0.0819, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.6479e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  25%|██▌       | 58/231 [00:24<01:12,  2.40it/s]

tensor(0.0055, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  26%|██▌       | 59/231 [00:24<01:11,  2.41it/s]

tensor(0.0084, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  26%|██▌       | 60/231 [00:25<01:11,  2.40it/s]

tensor(0.0518, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.4168e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  26%|██▋       | 61/231 [00:25<01:11,  2.39it/s]

tensor(0.0108, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0051, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  27%|██▋       | 62/231 [00:25<01:10,  2.40it/s]

tensor(0.0077, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  27%|██▋       | 63/231 [00:26<01:09,  2.40it/s]

tensor(0.0101, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0026, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  28%|██▊       | 64/231 [00:26<01:09,  2.42it/s]

tensor(0.0153, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  28%|██▊       | 65/231 [00:27<01:08,  2.42it/s]

tensor(0.0017, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0009, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  29%|██▊       | 66/231 [00:27<01:07,  2.43it/s]

tensor(0.0066, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  29%|██▉       | 67/231 [00:27<01:07,  2.44it/s]

tensor(0.0406, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  29%|██▉       | 68/231 [00:28<01:06,  2.44it/s]

tensor(0.0026, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.1394e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  30%|██▉       | 69/231 [00:28<01:06,  2.44it/s]

tensor(0.0010, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  30%|███       | 70/231 [00:29<01:05,  2.44it/s]

tensor(0.0145, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.3716e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  31%|███       | 71/231 [00:29<01:05,  2.45it/s]

tensor(0.0883, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.2531e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  31%|███       | 72/231 [00:30<01:04,  2.45it/s]

tensor(0.0080, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.9089e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  32%|███▏      | 73/231 [00:30<01:04,  2.45it/s]

tensor(0.0111, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  32%|███▏      | 74/231 [00:30<01:04,  2.45it/s]

tensor(0.0041, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  32%|███▏      | 75/231 [00:31<01:04,  2.42it/s]

tensor(0.0127, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  33%|███▎      | 76/231 [00:31<01:04,  2.40it/s]

tensor(0.0103, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  33%|███▎      | 77/231 [00:32<01:03,  2.41it/s]

tensor(0.0097, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.4320e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  34%|███▍      | 78/231 [00:32<01:03,  2.42it/s]

tensor(0.0088, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.6126e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  34%|███▍      | 79/231 [00:32<01:02,  2.43it/s]

tensor(0.0011, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.5076e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  35%|███▍      | 80/231 [00:33<01:02,  2.43it/s]

tensor(0.0186, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  35%|███▌      | 81/231 [00:33<01:01,  2.44it/s]

tensor(0.0118, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.0677e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  35%|███▌      | 82/231 [00:34<01:01,  2.44it/s]

tensor(0.0022, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.8843e-06, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0038, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.8336e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  36%|███▋      | 84/231 [00:34<01:01,  2.41it/s]

tensor(0.0119, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.8495e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  37%|███▋      | 85/231 [00:35<01:00,  2.42it/s]

tensor(0.0525, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  37%|███▋      | 86/231 [00:35<00:59,  2.43it/s]

tensor(0.0139, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0015, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  38%|███▊      | 87/231 [00:36<00:59,  2.43it/s]

tensor(0.0116, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.5777e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  38%|███▊      | 88/231 [00:36<00:58,  2.43it/s]

tensor(0.0058, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.4271e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  39%|███▊      | 89/231 [00:37<00:58,  2.44it/s]

tensor(0.0555, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  39%|███▉      | 90/231 [00:37<00:57,  2.45it/s]

tensor(0.0167, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  39%|███▉      | 91/231 [00:37<00:57,  2.45it/s]

tensor(0.0141, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  40%|███▉      | 92/231 [00:38<00:56,  2.44it/s]

tensor(0.0024, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  40%|████      | 93/231 [00:38<00:56,  2.44it/s]

tensor(0.0108, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  41%|████      | 94/231 [00:39<00:56,  2.43it/s]

tensor(0.0073, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  41%|████      | 95/231 [00:39<00:55,  2.43it/s]

tensor(0.0831, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0008, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  42%|████▏     | 96/231 [00:39<00:55,  2.43it/s]

tensor(0.0099, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  42%|████▏     | 97/231 [00:40<00:55,  2.43it/s]

tensor(0.1149, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0016, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  42%|████▏     | 98/231 [00:40<00:54,  2.44it/s]

tensor(0.0058, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  43%|████▎     | 99/231 [00:41<00:54,  2.43it/s]

tensor(0.0042, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.9649e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  43%|████▎     | 100/231 [00:41<00:53,  2.43it/s]

tensor(0.0044, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.8857e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  44%|████▎     | 101/231 [00:41<00:53,  2.43it/s]

tensor(0.0031, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.7784e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  44%|████▍     | 102/231 [00:42<00:53,  2.42it/s]

tensor(0.0044, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.0679e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  45%|████▍     | 103/231 [00:42<00:52,  2.43it/s]

tensor(0.0026, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  45%|████▌     | 104/231 [00:43<00:52,  2.43it/s]

tensor(0.0179, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.6131e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  45%|████▌     | 105/231 [00:43<00:51,  2.43it/s]

tensor(0.0049, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.6509e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  46%|████▌     | 106/231 [00:44<00:51,  2.42it/s]

tensor(0.0109, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.3188e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  46%|████▋     | 107/231 [00:44<00:51,  2.42it/s]

tensor(0.0273, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.2842e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  47%|████▋     | 108/231 [00:44<00:50,  2.43it/s]

tensor(0.0070, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.6938e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  47%|████▋     | 109/231 [00:45<00:50,  2.42it/s]

tensor(0.0175, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.6177e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  48%|████▊     | 110/231 [00:45<00:49,  2.42it/s]

tensor(0.0045, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.3468e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  48%|████▊     | 111/231 [00:46<00:49,  2.42it/s]

tensor(0.0038, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.6304e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  48%|████▊     | 112/231 [00:46<00:48,  2.43it/s]

tensor(0.0036, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.1475e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  49%|████▉     | 113/231 [00:46<00:48,  2.43it/s]

tensor(0.0089, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.7564e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  49%|████▉     | 114/231 [00:47<00:48,  2.43it/s]

tensor(0.0046, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.9455e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0431, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0015, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  50%|█████     | 116/231 [00:48<00:47,  2.42it/s]

tensor(0.0285, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.3161e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  51%|█████     | 117/231 [00:48<00:47,  2.42it/s]

tensor(0.0748, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  51%|█████     | 118/231 [00:48<00:46,  2.43it/s]

tensor(0.0037, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  52%|█████▏    | 119/231 [00:49<00:46,  2.43it/s]

tensor(0.0192, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  52%|█████▏    | 120/231 [00:49<00:45,  2.44it/s]

tensor(0.0205, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.6632e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  52%|█████▏    | 121/231 [00:50<00:44,  2.45it/s]

tensor(0.0040, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.4412e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  53%|█████▎    | 122/231 [00:50<00:44,  2.45it/s]

tensor(0.0018, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  53%|█████▎    | 123/231 [00:51<00:44,  2.45it/s]

tensor(0.0380, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  54%|█████▎    | 124/231 [00:51<00:43,  2.45it/s]

tensor(0.0044, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  54%|█████▍    | 125/231 [00:51<00:47,  2.22it/s]

tensor(0.0057, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.1794e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  55%|█████▍    | 126/231 [00:52<00:45,  2.29it/s]

tensor(0.0556, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0095, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  55%|█████▍    | 127/231 [00:52<00:44,  2.34it/s]

tensor(0.0144, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  55%|█████▌    | 128/231 [00:53<00:43,  2.38it/s]

tensor(0.0150, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.3687e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  56%|█████▌    | 129/231 [00:53<00:42,  2.40it/s]

tensor(0.0030, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.1545e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  56%|█████▋    | 130/231 [00:53<00:41,  2.43it/s]

tensor(0.0242, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.7970e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  57%|█████▋    | 131/231 [00:54<00:41,  2.44it/s]

tensor(0.0092, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0021, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  57%|█████▋    | 132/231 [00:54<00:40,  2.44it/s]

tensor(0.0146, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  58%|█████▊    | 133/231 [00:55<00:40,  2.45it/s]

tensor(0.0032, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  58%|█████▊    | 134/231 [00:55<00:39,  2.45it/s]

tensor(0.0049, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  58%|█████▊    | 135/231 [00:56<00:39,  2.45it/s]

tensor(0.0264, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.2473e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  59%|█████▉    | 136/231 [00:56<00:38,  2.45it/s]

tensor(0.0058, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  59%|█████▉    | 137/231 [00:56<00:38,  2.45it/s]

tensor(0.0051, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0008, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  60%|█████▉    | 138/231 [00:57<00:38,  2.44it/s]

tensor(0.0338, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.5077e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  60%|██████    | 139/231 [00:57<00:37,  2.44it/s]

tensor(0.0113, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0012, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  61%|██████    | 140/231 [00:58<00:37,  2.44it/s]

tensor(0.0186, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.9019e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  61%|██████    | 141/231 [00:58<00:36,  2.44it/s]

tensor(0.0071, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.6189e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  61%|██████▏   | 142/231 [00:58<00:36,  2.44it/s]

tensor(0.0041, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  62%|██████▏   | 143/231 [00:59<00:36,  2.44it/s]

tensor(0.0292, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  62%|██████▏   | 144/231 [00:59<00:35,  2.44it/s]

tensor(0.0013, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.6808e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  63%|██████▎   | 145/231 [01:00<00:35,  2.44it/s]

tensor(0.0056, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.2528e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  63%|██████▎   | 146/231 [01:00<00:34,  2.44it/s]

tensor(0.0062, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.0813e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  64%|██████▎   | 147/231 [01:00<00:34,  2.45it/s]

tensor(0.0128, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.0754e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  64%|██████▍   | 148/231 [01:01<00:33,  2.45it/s]

tensor(0.0042, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.4859e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  65%|██████▍   | 149/231 [01:01<00:33,  2.44it/s]

tensor(0.0252, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  65%|██████▍   | 150/231 [01:02<00:33,  2.44it/s]

tensor(0.0011, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  65%|██████▌   | 151/231 [01:02<00:32,  2.43it/s]

tensor(0.0301, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.4626e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  66%|██████▌   | 152/231 [01:03<00:32,  2.42it/s]

tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  66%|██████▌   | 153/231 [01:03<00:32,  2.41it/s]

tensor(0.0035, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  67%|██████▋   | 154/231 [01:03<00:32,  2.41it/s]

tensor(0.0147, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  67%|██████▋   | 155/231 [01:04<00:31,  2.39it/s]

tensor(0.0019, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.9881e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  68%|██████▊   | 156/231 [01:04<00:31,  2.39it/s]

tensor(0.0319, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.9711e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  68%|██████▊   | 157/231 [01:05<00:30,  2.39it/s]

tensor(0.0122, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.2803e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  68%|██████▊   | 158/231 [01:05<00:30,  2.40it/s]

tensor(0.0029, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.3170e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  69%|██████▉   | 159/231 [01:05<00:29,  2.42it/s]

tensor(0.0193, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  69%|██████▉   | 160/231 [01:06<00:29,  2.43it/s]

tensor(0.0096, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  70%|██████▉   | 161/231 [01:06<00:28,  2.43it/s]

tensor(0.0022, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.3743e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  70%|███████   | 162/231 [01:07<00:28,  2.44it/s]

tensor(0.0268, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  71%|███████   | 163/231 [01:07<00:27,  2.44it/s]

tensor(0.0181, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.1800e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  71%|███████   | 164/231 [01:07<00:27,  2.44it/s]

tensor(0.0037, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.3166e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  71%|███████▏  | 165/231 [01:08<00:27,  2.44it/s]

tensor(0.0097, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.9215e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  72%|███████▏  | 166/231 [01:08<00:26,  2.43it/s]

tensor(0.0931, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  72%|███████▏  | 167/231 [01:09<00:26,  2.43it/s]

tensor(0.0852, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  73%|███████▎  | 168/231 [01:09<00:25,  2.44it/s]

tensor(0.0019, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  73%|███████▎  | 169/231 [01:10<00:25,  2.45it/s]

tensor(0.0050, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.7200e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  74%|███████▎  | 170/231 [01:10<00:24,  2.45it/s]

tensor(0.0114, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.6197e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  74%|███████▍  | 171/231 [01:10<00:24,  2.45it/s]

tensor(0.0060, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.1591e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  74%|███████▍  | 172/231 [01:11<00:24,  2.45it/s]

tensor(0.0207, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.3594e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  75%|███████▍  | 173/231 [01:11<00:23,  2.44it/s]

tensor(0.0506, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.6475e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  75%|███████▌  | 174/231 [01:12<00:23,  2.44it/s]

tensor(0.0039, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.3281e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  76%|███████▌  | 175/231 [01:12<00:23,  2.43it/s]

tensor(0.0083, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  76%|███████▌  | 176/231 [01:12<00:22,  2.44it/s]

tensor(0.0062, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  77%|███████▋  | 177/231 [01:13<00:22,  2.44it/s]

tensor(0.0047, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  77%|███████▋  | 178/231 [01:13<00:21,  2.44it/s]

tensor(0.0053, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.6703e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  77%|███████▋  | 179/231 [01:14<00:21,  2.44it/s]

tensor(0.0028, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.4038e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  78%|███████▊  | 180/231 [01:14<00:20,  2.45it/s]

tensor(0.0043, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0011, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  78%|███████▊  | 181/231 [01:14<00:20,  2.44it/s]

tensor(0.0382, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.2068e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  79%|███████▉  | 182/231 [01:15<00:20,  2.44it/s]

tensor(0.0644, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.1012e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  79%|███████▉  | 183/231 [01:15<00:19,  2.44it/s]

tensor(0.0063, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  80%|███████▉  | 184/231 [01:16<00:19,  2.43it/s]

tensor(0.0017, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.1939e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  80%|████████  | 185/231 [01:16<00:18,  2.43it/s]

tensor(0.0370, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  81%|████████  | 186/231 [01:16<00:18,  2.44it/s]

tensor(0.0125, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.2033e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  81%|████████  | 187/231 [01:17<00:18,  2.44it/s]

tensor(0.0047, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.1344e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  81%|████████▏ | 188/231 [01:17<00:17,  2.45it/s]

tensor(0.0259, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.6144e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0324, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  82%|████████▏ | 190/231 [01:18<00:17,  2.39it/s]

tensor(0.0067, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.1540e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0240, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  83%|████████▎ | 192/231 [01:19<00:16,  2.32it/s]

tensor(0.0090, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0057, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.4829e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  84%|████████▎ | 193/231 [01:19<00:16,  2.32it/s]

tensor(0.0134, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  84%|████████▍ | 195/231 [01:20<00:15,  2.33it/s]

tensor(0.0107, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.4766e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0399, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  85%|████████▍ | 196/231 [01:21<00:14,  2.33it/s]

tensor(0.0352, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  85%|████████▌ | 197/231 [01:21<00:14,  2.33it/s]

tensor(0.0079, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.2415e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  86%|████████▌ | 198/231 [01:22<00:14,  2.34it/s]

tensor(0.0029, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  86%|████████▌ | 199/231 [01:22<00:13,  2.34it/s]

tensor(0.0017, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.0506e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  87%|████████▋ | 201/231 [01:23<00:12,  2.34it/s]

tensor(0.0030, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.1906e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  87%|████████▋ | 202/231 [01:23<00:12,  2.34it/s]

tensor(0.0050, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.1972e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  88%|████████▊ | 203/231 [01:24<00:11,  2.35it/s]

tensor(0.0138, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0024, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.4010e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  89%|████████▊ | 205/231 [01:25<00:10,  2.37it/s]

tensor(0.1200, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  89%|████████▉ | 206/231 [01:25<00:10,  2.38it/s]

tensor(0.0035, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.2269e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  90%|████████▉ | 207/231 [01:25<00:10,  2.40it/s]

tensor(0.0201, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.6619e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0361, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  90%|█████████ | 208/231 [01:26<00:09,  2.38it/s]

tensor(0.0224, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  91%|█████████ | 210/231 [01:27<00:08,  2.38it/s]

tensor(0.0060, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.0782e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0151, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.2426e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  91%|█████████▏| 211/231 [01:27<00:08,  2.37it/s]

tensor(0.0084, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.0540e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  92%|█████████▏| 213/231 [01:28<00:07,  2.37it/s]

tensor(0.0169, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.5865e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0382, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.0564e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  93%|█████████▎| 214/231 [01:28<00:07,  2.38it/s]

tensor(0.0099, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  93%|█████████▎| 215/231 [01:29<00:06,  2.37it/s]

tensor(0.0300, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.6225e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  94%|█████████▍| 217/231 [01:30<00:05,  2.38it/s]

tensor(0.0335, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.3557e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  94%|█████████▍| 218/231 [01:30<00:05,  2.38it/s]

tensor(0.0147, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0413, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.4525e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  95%|█████████▍| 219/231 [01:30<00:05,  2.37it/s]

tensor(0.0093, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  96%|█████████▌| 221/231 [01:31<00:04,  2.36it/s]

tensor(0.0053, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  96%|█████████▌| 222/231 [01:32<00:03,  2.37it/s]

tensor(0.0157, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  97%|█████████▋| 223/231 [01:32<00:03,  2.38it/s]

tensor(0.0438, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.2569e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  97%|█████████▋| 224/231 [01:33<00:02,  2.40it/s]

tensor(0.0032, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  97%|█████████▋| 225/231 [01:33<00:02,  2.40it/s]

tensor(0.0051, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.3118e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  98%|█████████▊| 226/231 [01:33<00:02,  2.41it/s]

tensor(0.0044, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.1055e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  98%|█████████▊| 227/231 [01:34<00:01,  2.42it/s]

tensor(0.0067, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  99%|█████████▊| 228/231 [01:34<00:01,  2.43it/s]

tensor(0.0033, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.3186e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007:  99%|█████████▉| 229/231 [01:35<00:00,  2.41it/s]

tensor(0.0213, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.1914e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007: 100%|█████████▉| 230/231 [01:35<00:00,  2.18it/s]

tensor(0.0237, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 007: 100%|██████████| 231/231 [01:36<00:00,  2.40it/s]

tensor(0.0123, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0014, device='cuda:0', grad_fn=<DivBackward1>)
Epoch 8/235, Average Training Loss: 0.0131



100%|██████████| 235/235 [00:21<00:00, 10.74it/s]


Accuracy on validation set: 0.9992
Warmup...


Epoch 008: 100%|██████████| 3/3 [00:00<00:00, 10.69it/s]
Epoch 008:   0%|          | 1/231 [00:00<01:33,  2.45it/s]

tensor(0.0193, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0011, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:   1%|          | 2/231 [00:00<01:34,  2.41it/s]

tensor(0.0855, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:   1%|▏         | 3/231 [00:01<01:34,  2.42it/s]

tensor(0.0012, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:   2%|▏         | 4/231 [00:01<01:34,  2.41it/s]

tensor(0.0135, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0038, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:   2%|▏         | 5/231 [00:02<01:33,  2.42it/s]

tensor(0.0035, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.7504e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:   3%|▎         | 6/231 [00:02<01:32,  2.43it/s]

tensor(0.0021, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0021, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:   3%|▎         | 7/231 [00:02<01:32,  2.43it/s]

tensor(0.0030, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:   3%|▎         | 8/231 [00:03<01:31,  2.43it/s]

tensor(0.0077, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0056, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:   4%|▍         | 9/231 [00:03<01:31,  2.43it/s]

tensor(0.0053, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:   4%|▍         | 10/231 [00:04<01:31,  2.43it/s]

tensor(0.0029, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.1143e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:   5%|▍         | 11/231 [00:04<01:30,  2.43it/s]

tensor(0.0053, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:   5%|▌         | 12/231 [00:04<01:30,  2.43it/s]

tensor(0.0161, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.5479e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:   6%|▌         | 13/231 [00:05<01:29,  2.43it/s]

tensor(0.0040, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.0873e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:   6%|▌         | 14/231 [00:05<01:29,  2.43it/s]

tensor(0.0131, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:   6%|▋         | 15/231 [00:06<01:28,  2.43it/s]

tensor(0.0026, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0009, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:   7%|▋         | 16/231 [00:06<01:28,  2.43it/s]

tensor(0.0093, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.4020e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:   7%|▋         | 17/231 [00:07<01:27,  2.43it/s]

tensor(0.0052, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:   8%|▊         | 18/231 [00:07<01:27,  2.44it/s]

tensor(0.0049, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.0629e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:   8%|▊         | 19/231 [00:07<01:27,  2.44it/s]

tensor(0.0079, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:   9%|▊         | 20/231 [00:08<01:26,  2.43it/s]

tensor(0.0191, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:   9%|▉         | 21/231 [00:08<01:26,  2.43it/s]

tensor(0.0141, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.5412e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  10%|▉         | 22/231 [00:09<01:26,  2.42it/s]

tensor(0.0582, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.2824e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  10%|▉         | 23/231 [00:09<01:26,  2.42it/s]

tensor(0.0076, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.3320e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  10%|█         | 24/231 [00:09<01:25,  2.42it/s]

tensor(0.0213, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.9598e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  11%|█         | 25/231 [00:10<01:24,  2.43it/s]

tensor(0.0065, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.2611e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  11%|█▏        | 26/231 [00:10<01:24,  2.43it/s]

tensor(0.0060, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  12%|█▏        | 27/231 [00:11<01:24,  2.43it/s]

tensor(0.0052, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.2882e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  12%|█▏        | 28/231 [00:11<01:23,  2.43it/s]

tensor(0.0106, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0043, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  13%|█▎        | 30/231 [00:12<01:23,  2.42it/s]

tensor(0.0023, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0010, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  13%|█▎        | 31/231 [00:12<01:22,  2.42it/s]

tensor(0.0036, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  14%|█▍        | 32/231 [00:13<01:21,  2.43it/s]

tensor(0.0045, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  14%|█▍        | 33/231 [00:13<01:21,  2.43it/s]

tensor(0.0130, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.9732e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  15%|█▍        | 34/231 [00:14<01:20,  2.44it/s]

tensor(0.0048, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  15%|█▌        | 35/231 [00:14<01:20,  2.43it/s]

tensor(0.0307, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0012, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  16%|█▌        | 36/231 [00:14<01:19,  2.44it/s]

tensor(0.0046, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  16%|█▌        | 37/231 [00:15<01:20,  2.42it/s]

tensor(0.0012, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  16%|█▋        | 38/231 [00:15<01:20,  2.40it/s]

tensor(0.0143, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.7967e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  17%|█▋        | 39/231 [00:16<01:20,  2.39it/s]

tensor(0.0047, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  17%|█▋        | 40/231 [00:16<01:19,  2.40it/s]

tensor(0.0057, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  18%|█▊        | 41/231 [00:16<01:19,  2.40it/s]

tensor(0.0144, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  18%|█▊        | 42/231 [00:17<01:18,  2.39it/s]

tensor(0.0039, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0015, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  19%|█▊        | 43/231 [00:17<01:18,  2.40it/s]

tensor(0.0073, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0009, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  19%|█▉        | 44/231 [00:18<01:18,  2.38it/s]

tensor(0.0136, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  19%|█▉        | 45/231 [00:18<01:17,  2.40it/s]

tensor(0.0036, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  20%|█▉        | 46/231 [00:19<01:17,  2.40it/s]

tensor(0.0173, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.6692e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  20%|██        | 47/231 [00:19<01:16,  2.41it/s]

tensor(0.0174, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  21%|██        | 48/231 [00:19<01:15,  2.42it/s]

tensor(0.0948, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  21%|██        | 49/231 [00:20<01:14,  2.43it/s]

tensor(0.0186, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  22%|██▏       | 50/231 [00:20<01:15,  2.39it/s]

tensor(0.0022, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  22%|██▏       | 51/231 [00:21<01:14,  2.41it/s]

tensor(0.0209, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.4583e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  23%|██▎       | 52/231 [00:21<01:14,  2.41it/s]

tensor(0.0053, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  23%|██▎       | 53/231 [00:21<01:13,  2.42it/s]

tensor(0.0016, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0107, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  23%|██▎       | 54/231 [00:22<01:12,  2.43it/s]

tensor(0.0282, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.0804e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  24%|██▍       | 55/231 [00:22<01:12,  2.43it/s]

tensor(0.0449, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  24%|██▍       | 56/231 [00:23<01:11,  2.43it/s]

tensor(0.0122, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  25%|██▍       | 57/231 [00:23<01:11,  2.44it/s]

tensor(0.0541, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  25%|██▌       | 58/231 [00:23<01:11,  2.43it/s]

tensor(0.0187, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.8467e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  26%|██▌       | 59/231 [00:24<01:10,  2.43it/s]

tensor(0.0069, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.7627e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  26%|██▌       | 60/231 [00:24<01:10,  2.43it/s]

tensor(0.0030, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.6099e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  26%|██▋       | 61/231 [00:25<01:09,  2.44it/s]

tensor(0.0068, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  27%|██▋       | 62/231 [00:25<01:09,  2.44it/s]

tensor(0.0015, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  27%|██▋       | 63/231 [00:26<01:08,  2.44it/s]

tensor(0.0039, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  28%|██▊       | 64/231 [00:26<01:08,  2.44it/s]

tensor(0.0380, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.0124e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  28%|██▊       | 65/231 [00:26<01:07,  2.46it/s]

tensor(0.0070, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.0153e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  29%|██▊       | 66/231 [00:27<01:07,  2.44it/s]

tensor(0.0184, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  29%|██▉       | 67/231 [00:27<01:07,  2.44it/s]

tensor(0.0123, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  29%|██▉       | 68/231 [00:28<01:06,  2.44it/s]

tensor(0.0027, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  30%|██▉       | 69/231 [00:28<01:06,  2.43it/s]

tensor(0.0056, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.4891e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  30%|███       | 70/231 [00:28<01:06,  2.44it/s]

tensor(0.0198, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.6631e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  31%|███       | 71/231 [00:29<01:05,  2.44it/s]

tensor(0.0019, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  31%|███       | 72/231 [00:29<01:04,  2.45it/s]

tensor(0.0271, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  32%|███▏      | 73/231 [00:30<01:04,  2.44it/s]

tensor(0.0446, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  32%|███▏      | 74/231 [00:30<01:04,  2.44it/s]

tensor(0.0012, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.9463e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  32%|███▏      | 75/231 [00:30<01:03,  2.44it/s]

tensor(0.0234, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  33%|███▎      | 76/231 [00:31<01:03,  2.43it/s]

tensor(0.0166, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  33%|███▎      | 77/231 [00:31<01:03,  2.44it/s]

tensor(0.0031, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0015, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  34%|███▍      | 78/231 [00:32<01:02,  2.43it/s]

tensor(0.0199, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.4739e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  34%|███▍      | 79/231 [00:32<01:02,  2.44it/s]

tensor(0.0083, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  35%|███▍      | 80/231 [00:32<01:02,  2.43it/s]

tensor(0.0015, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0164, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  35%|███▌      | 81/231 [00:33<01:01,  2.44it/s]

tensor(0.0103, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0009, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  35%|███▌      | 82/231 [00:33<01:01,  2.44it/s]

tensor(0.0138, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  36%|███▌      | 83/231 [00:34<01:00,  2.44it/s]

tensor(0.0094, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  36%|███▋      | 84/231 [00:34<01:00,  2.44it/s]

tensor(0.0036, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  37%|███▋      | 85/231 [00:35<00:59,  2.45it/s]

tensor(0.0497, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0015, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  37%|███▋      | 86/231 [00:35<00:59,  2.44it/s]

tensor(0.0025, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  38%|███▊      | 87/231 [00:35<00:58,  2.45it/s]

tensor(0.0041, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0014, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  38%|███▊      | 88/231 [00:36<00:58,  2.45it/s]

tensor(0.0082, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  39%|███▊      | 89/231 [00:36<00:57,  2.45it/s]

tensor(0.0046, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  39%|███▉      | 90/231 [00:37<00:57,  2.45it/s]

tensor(0.0024, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0009, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  39%|███▉      | 91/231 [00:37<00:57,  2.45it/s]

tensor(0.0105, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  40%|███▉      | 92/231 [00:37<00:56,  2.45it/s]

tensor(0.0017, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  40%|████      | 93/231 [00:38<00:56,  2.44it/s]

tensor(0.0320, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  41%|████      | 94/231 [00:38<00:56,  2.43it/s]

tensor(0.0077, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  41%|████      | 95/231 [00:39<00:56,  2.43it/s]

tensor(0.0063, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  42%|████▏     | 96/231 [00:39<00:55,  2.43it/s]

tensor(0.0039, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.1326e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  42%|████▏     | 97/231 [00:39<00:54,  2.44it/s]

tensor(0.0015, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  42%|████▏     | 98/231 [00:40<00:54,  2.44it/s]

tensor(0.0088, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  43%|████▎     | 99/231 [00:40<00:54,  2.44it/s]

tensor(0.0017, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.2283e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  43%|████▎     | 100/231 [00:41<00:53,  2.44it/s]

tensor(0.0029, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.3448e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  44%|████▎     | 101/231 [00:41<00:53,  2.45it/s]

tensor(0.0268, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0011, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  44%|████▍     | 102/231 [00:41<00:52,  2.45it/s]

tensor(0.0088, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  45%|████▍     | 103/231 [00:42<00:57,  2.22it/s]

tensor(0.0215, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  45%|████▌     | 104/231 [00:42<00:55,  2.28it/s]

tensor(0.0013, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.7140e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  45%|████▌     | 105/231 [00:43<00:54,  2.33it/s]

tensor(0.0020, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  46%|████▌     | 106/231 [00:43<00:52,  2.36it/s]

tensor(0.0027, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.7526e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  46%|████▋     | 107/231 [00:44<00:52,  2.38it/s]

tensor(0.0088, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  47%|████▋     | 108/231 [00:44<00:51,  2.40it/s]

tensor(0.0020, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  47%|████▋     | 109/231 [00:44<00:50,  2.41it/s]

tensor(0.0190, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  48%|████▊     | 110/231 [00:45<00:49,  2.42it/s]

tensor(0.0223, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  48%|████▊     | 111/231 [00:45<00:49,  2.43it/s]

tensor(0.0022, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  48%|████▊     | 112/231 [00:46<00:49,  2.41it/s]

tensor(0.0009, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  49%|████▉     | 113/231 [00:46<00:48,  2.41it/s]

tensor(0.0160, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  49%|████▉     | 114/231 [00:47<00:48,  2.42it/s]

tensor(0.0020, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  50%|████▉     | 115/231 [00:47<00:47,  2.42it/s]

tensor(0.0023, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.1509e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  50%|█████     | 116/231 [00:47<00:47,  2.42it/s]

tensor(0.0016, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.8401e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  51%|█████     | 117/231 [00:48<00:46,  2.43it/s]

tensor(0.0021, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.5833e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  51%|█████     | 118/231 [00:48<00:46,  2.43it/s]

tensor(0.0110, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  52%|█████▏    | 119/231 [00:49<00:46,  2.43it/s]

tensor(0.0275, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  52%|█████▏    | 120/231 [00:49<00:45,  2.44it/s]

tensor(0.0025, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.5043e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  52%|█████▏    | 121/231 [00:49<00:45,  2.44it/s]

tensor(0.0201, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.5717e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  53%|█████▎    | 122/231 [00:50<00:44,  2.44it/s]

tensor(0.0134, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0008, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  53%|█████▎    | 123/231 [00:50<00:44,  2.44it/s]

tensor(0.0145, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.7459e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  54%|█████▎    | 124/231 [00:51<00:43,  2.44it/s]

tensor(0.0103, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  54%|█████▍    | 125/231 [00:51<00:43,  2.44it/s]

tensor(0.0056, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.2158e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  55%|█████▍    | 126/231 [00:51<00:42,  2.45it/s]

tensor(0.0008, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.0708e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  55%|█████▍    | 127/231 [00:52<00:42,  2.44it/s]

tensor(0.0041, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.7576e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  55%|█████▌    | 128/231 [00:52<00:42,  2.44it/s]

tensor(0.0031, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  56%|█████▌    | 129/231 [00:53<00:41,  2.43it/s]

tensor(0.0022, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  56%|█████▋    | 130/231 [00:53<00:41,  2.45it/s]

tensor(0.0050, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  57%|█████▋    | 131/231 [00:54<00:40,  2.45it/s]

tensor(0.0044, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0011, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  57%|█████▋    | 132/231 [00:54<00:40,  2.45it/s]

tensor(0.0009, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  58%|█████▊    | 133/231 [00:54<00:40,  2.45it/s]

tensor(0.0059, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  58%|█████▊    | 134/231 [00:55<00:39,  2.45it/s]

tensor(0.0011, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0010, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  58%|█████▊    | 135/231 [00:55<00:39,  2.45it/s]

tensor(0.0047, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.6015e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  59%|█████▉    | 136/231 [00:56<00:38,  2.45it/s]

tensor(0.0064, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  59%|█████▉    | 137/231 [00:56<00:38,  2.45it/s]

tensor(0.0021, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.8877e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  60%|█████▉    | 138/231 [00:56<00:37,  2.45it/s]

tensor(0.0055, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.3484e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  60%|██████    | 139/231 [00:57<00:37,  2.44it/s]

tensor(0.0016, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0026, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  61%|██████    | 140/231 [00:57<00:37,  2.44it/s]

tensor(0.0135, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  61%|██████    | 141/231 [00:58<00:36,  2.44it/s]

tensor(0.0081, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  61%|██████▏   | 142/231 [00:58<00:36,  2.43it/s]

tensor(0.0050, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.6446e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  62%|██████▏   | 143/231 [00:58<00:36,  2.43it/s]

tensor(0.0089, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0009, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  62%|██████▏   | 144/231 [00:59<00:35,  2.43it/s]

tensor(0.0011, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  63%|██████▎   | 145/231 [00:59<00:35,  2.43it/s]

tensor(0.0031, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  63%|██████▎   | 146/231 [01:00<00:34,  2.44it/s]

tensor(0.0009, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0014, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  64%|██████▎   | 147/231 [01:00<00:34,  2.43it/s]

tensor(0.0024, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  64%|██████▍   | 148/231 [01:00<00:34,  2.43it/s]

tensor(0.0026, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0012, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  65%|██████▍   | 149/231 [01:01<00:33,  2.44it/s]

tensor(0.0325, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.9624e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  65%|██████▍   | 150/231 [01:01<00:33,  2.45it/s]

tensor(0.0104, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  65%|██████▌   | 151/231 [01:02<00:32,  2.46it/s]

tensor(0.0115, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  66%|██████▌   | 152/231 [01:02<00:32,  2.46it/s]

tensor(0.0193, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.6310e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  66%|██████▌   | 153/231 [01:03<00:31,  2.47it/s]

tensor(0.0209, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  67%|██████▋   | 154/231 [01:03<00:31,  2.46it/s]

tensor(0.0057, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  67%|██████▋   | 155/231 [01:03<00:31,  2.45it/s]

tensor(0.0075, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.9959e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  68%|██████▊   | 156/231 [01:04<00:30,  2.45it/s]

tensor(0.0086, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  68%|██████▊   | 157/231 [01:04<00:30,  2.44it/s]

tensor(0.0409, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  68%|██████▊   | 158/231 [01:05<00:29,  2.44it/s]

tensor(0.0015, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  69%|██████▉   | 159/231 [01:05<00:29,  2.44it/s]

tensor(0.0012, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0180, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0046, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  69%|██████▉   | 160/231 [01:05<00:29,  2.42it/s]

tensor(0.0125, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  70%|███████   | 162/231 [01:06<00:28,  2.42it/s]

tensor(0.0268, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.8750e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  71%|███████   | 163/231 [01:07<00:28,  2.42it/s]

tensor(0.0028, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  71%|███████   | 164/231 [01:07<00:27,  2.43it/s]

tensor(0.0054, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  71%|███████▏  | 165/231 [01:07<00:27,  2.43it/s]

tensor(0.0234, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  72%|███████▏  | 166/231 [01:08<00:26,  2.44it/s]

tensor(0.0191, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.5097e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  72%|███████▏  | 167/231 [01:08<00:26,  2.44it/s]

tensor(0.0157, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.8680e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  73%|███████▎  | 168/231 [01:09<00:25,  2.44it/s]

tensor(0.0075, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  73%|███████▎  | 169/231 [01:09<00:25,  2.44it/s]

tensor(0.0044, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.8657e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  74%|███████▎  | 170/231 [01:10<00:25,  2.44it/s]

tensor(0.0027, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.1515e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  74%|███████▍  | 171/231 [01:10<00:24,  2.45it/s]

tensor(0.0017, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  74%|███████▍  | 172/231 [01:10<00:24,  2.46it/s]

tensor(0.0043, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.5979e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  75%|███████▍  | 173/231 [01:11<00:23,  2.42it/s]

tensor(0.0018, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  75%|███████▌  | 174/231 [01:11<00:23,  2.42it/s]

tensor(0.0192, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  76%|███████▌  | 175/231 [01:12<00:23,  2.43it/s]

tensor(0.0039, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  76%|███████▌  | 176/231 [01:12<00:22,  2.43it/s]

tensor(0.0241, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.8695e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  77%|███████▋  | 177/231 [01:12<00:22,  2.42it/s]

tensor(0.0018, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.3488e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0384, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  77%|███████▋  | 179/231 [01:13<00:21,  2.42it/s]

tensor(0.0024, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.5398e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  78%|███████▊  | 180/231 [01:14<00:21,  2.43it/s]

tensor(0.0034, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.3133e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0365, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.6710e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  78%|███████▊  | 181/231 [01:14<00:20,  2.42it/s]

tensor(0.0582, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  79%|███████▉  | 183/231 [01:15<00:19,  2.42it/s]

tensor(0.0143, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.9064e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  80%|███████▉  | 184/231 [01:15<00:19,  2.42it/s]

tensor(0.0020, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.4321e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0296, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  81%|████████  | 186/231 [01:16<00:18,  2.42it/s]

tensor(0.0023, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.3231e-06, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.1853e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  81%|████████▏ | 188/231 [01:17<00:18,  2.39it/s]

tensor(0.0092, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.1533e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  82%|████████▏ | 189/231 [01:17<00:17,  2.40it/s]

tensor(0.0101, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0042, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.5407e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  83%|████████▎ | 191/231 [01:18<00:16,  2.41it/s]

tensor(0.0105, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.7086e-06, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0621, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  83%|████████▎ | 192/231 [01:19<00:16,  2.40it/s]

tensor(0.0063, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.8934e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  84%|████████▎ | 193/231 [01:19<00:15,  2.40it/s]

tensor(0.0018, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.2348e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  84%|████████▍ | 194/231 [01:19<00:15,  2.41it/s]

tensor(0.0161, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  84%|████████▍ | 195/231 [01:20<00:14,  2.40it/s]

tensor(0.0471, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.4467e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  85%|████████▌ | 197/231 [01:21<00:14,  2.42it/s]

tensor(0.0142, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.8354e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  86%|████████▌ | 198/231 [01:21<00:13,  2.42it/s]

tensor(0.0035, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  86%|████████▌ | 199/231 [01:22<00:13,  2.43it/s]

tensor(0.0049, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.7193e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0041, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  87%|████████▋ | 200/231 [01:22<00:12,  2.43it/s]

tensor(0.0155, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  87%|████████▋ | 202/231 [01:23<00:12,  2.41it/s]

tensor(0.0145, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0653, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.2887e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  88%|████████▊ | 203/231 [01:23<00:11,  2.41it/s]

tensor(0.0218, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.9963e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  88%|████████▊ | 204/231 [01:24<00:11,  2.40it/s]

tensor(0.0258, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.5494e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  89%|████████▊ | 205/231 [01:24<00:10,  2.37it/s]

tensor(0.0046, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  90%|████████▉ | 207/231 [01:25<00:10,  2.36it/s]

tensor(0.0428, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.5543e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0047, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  90%|█████████ | 209/231 [01:26<00:09,  2.37it/s]

tensor(0.0207, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  91%|█████████ | 210/231 [01:26<00:08,  2.38it/s]

tensor(0.0207, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  91%|█████████▏| 211/231 [01:27<00:08,  2.39it/s]

tensor(0.0733, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.8988e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  92%|█████████▏| 212/231 [01:27<00:07,  2.39it/s]

tensor(0.0015, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  92%|█████████▏| 213/231 [01:27<00:07,  2.38it/s]

tensor(0.0058, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0250, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  93%|█████████▎| 214/231 [01:28<00:07,  2.37it/s]

tensor(0.0520, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  93%|█████████▎| 215/231 [01:28<00:06,  2.39it/s]

tensor(0.0254, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  94%|█████████▎| 216/231 [01:29<00:06,  2.39it/s]

tensor(0.0088, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.5312e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  94%|█████████▍| 217/231 [01:29<00:05,  2.38it/s]

tensor(0.0082, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  95%|█████████▍| 219/231 [01:30<00:05,  2.40it/s]

tensor(0.0203, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0057, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.8586e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  96%|█████████▌| 221/231 [01:31<00:04,  2.40it/s]

tensor(0.0029, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0208, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  97%|█████████▋| 223/231 [01:32<00:03,  2.41it/s]

tensor(0.0198, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0024, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0047, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  97%|█████████▋| 224/231 [01:32<00:02,  2.39it/s]

tensor(0.0066, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  97%|█████████▋| 225/231 [01:32<00:02,  2.40it/s]

tensor(0.0043, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  98%|█████████▊| 226/231 [01:33<00:02,  2.39it/s]

tensor(0.0036, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  98%|█████████▊| 227/231 [01:33<00:01,  2.37it/s]

tensor(0.0012, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  99%|█████████▊| 228/231 [01:34<00:01,  2.35it/s]

tensor(0.0521, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008:  99%|█████████▉| 229/231 [01:34<00:00,  2.37it/s]

tensor(0.0021, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008: 100%|█████████▉| 230/231 [01:35<00:00,  2.33it/s]

tensor(0.0080, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 008: 100%|██████████| 231/231 [01:35<00:00,  2.42it/s]


Epoch 9/235, Average Training Loss: 0.0113


100%|██████████| 235/235 [00:21<00:00, 11.04it/s]


Accuracy on validation set: 0.9995
Warmup...


Epoch 009: 100%|██████████| 3/3 [00:00<00:00, 11.03it/s]
Epoch 009:   0%|          | 0/231 [00:00<?, ?it/s]

tensor(0.0026, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:   0%|          | 1/231 [00:00<01:35,  2.42it/s]

tensor(0.0033, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.0023e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:   1%|          | 2/231 [00:00<01:36,  2.37it/s]

tensor(0.0163, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:   1%|▏         | 3/231 [00:01<01:36,  2.36it/s]

tensor(0.0039, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:   2%|▏         | 4/231 [00:01<01:35,  2.38it/s]

tensor(0.0279, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.7620e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:   2%|▏         | 5/231 [00:02<01:35,  2.37it/s]

tensor(0.0051, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:   3%|▎         | 6/231 [00:02<01:34,  2.37it/s]

tensor(0.0056, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.3458e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:   3%|▎         | 7/231 [00:02<01:35,  2.34it/s]

tensor(0.0048, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.4240e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:   3%|▎         | 8/231 [00:03<01:35,  2.33it/s]

tensor(0.0017, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.2483e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:   4%|▍         | 9/231 [00:03<01:35,  2.33it/s]

tensor(0.0095, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:   5%|▍         | 11/231 [00:04<01:34,  2.33it/s]

tensor(0.0033, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:   5%|▌         | 12/231 [00:05<01:42,  2.13it/s]

tensor(0.0478, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0008, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:   6%|▌         | 13/231 [00:05<01:38,  2.21it/s]

tensor(0.0045, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0010, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:   6%|▌         | 14/231 [00:06<01:35,  2.27it/s]

tensor(0.0025, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.7625e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:   6%|▋         | 15/231 [00:06<01:33,  2.32it/s]

tensor(0.0069, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.2205e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:   7%|▋         | 16/231 [00:06<01:31,  2.35it/s]

tensor(0.0052, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:   7%|▋         | 17/231 [00:07<01:30,  2.37it/s]

tensor(0.0026, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.1237e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:   8%|▊         | 18/231 [00:07<01:29,  2.39it/s]

tensor(0.0020, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:   8%|▊         | 19/231 [00:08<01:28,  2.40it/s]

tensor(0.0025, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:   9%|▊         | 20/231 [00:08<01:27,  2.40it/s]

tensor(0.0222, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0195, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0011, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  10%|▉         | 22/231 [00:09<01:27,  2.38it/s]

tensor(0.0171, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0018, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0087, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  10%|█         | 24/231 [00:10<01:26,  2.40it/s]

tensor(0.0023, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0017, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.4236e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  11%|█▏        | 26/231 [00:11<01:25,  2.41it/s]

tensor(0.0187, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.9744e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  12%|█▏        | 27/231 [00:11<01:24,  2.42it/s]

tensor(0.0053, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.9224e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  12%|█▏        | 28/231 [00:11<01:23,  2.42it/s]

tensor(0.0061, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0010, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  13%|█▎        | 29/231 [00:12<01:22,  2.43it/s]

tensor(0.0013, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.6811e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  13%|█▎        | 30/231 [00:12<01:22,  2.43it/s]

tensor(0.0068, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.6000e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  13%|█▎        | 31/231 [00:13<01:23,  2.40it/s]

tensor(0.0099, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  14%|█▍        | 32/231 [00:13<01:22,  2.41it/s]

tensor(0.0055, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.5746e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  14%|█▍        | 33/231 [00:13<01:22,  2.41it/s]

tensor(0.0014, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.7132e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0338, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.6960e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  15%|█▌        | 35/231 [00:14<01:21,  2.42it/s]

tensor(0.0017, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.1387e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  16%|█▌        | 36/231 [00:15<01:20,  2.42it/s]

tensor(0.0013, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.6087e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  16%|█▌        | 37/231 [00:15<01:20,  2.42it/s]

tensor(0.0019, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.2224e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  16%|█▋        | 38/231 [00:16<01:19,  2.42it/s]

tensor(0.0040, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.7685e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  17%|█▋        | 39/231 [00:16<01:19,  2.42it/s]

tensor(0.0056, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.7695e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  17%|█▋        | 40/231 [00:16<01:18,  2.43it/s]

tensor(0.0012, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.7910e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  18%|█▊        | 41/231 [00:17<01:18,  2.43it/s]

tensor(0.0100, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.8531e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  18%|█▊        | 42/231 [00:17<01:17,  2.43it/s]

tensor(0.0016, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  19%|█▊        | 43/231 [00:18<01:17,  2.44it/s]

tensor(0.0246, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  19%|█▉        | 44/231 [00:18<01:16,  2.44it/s]

tensor(0.0015, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.9484e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  19%|█▉        | 45/231 [00:18<01:16,  2.44it/s]

tensor(0.0034, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  20%|█▉        | 46/231 [00:19<01:15,  2.43it/s]

tensor(0.0049, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  20%|██        | 47/231 [00:19<01:15,  2.43it/s]

tensor(0.0015, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  21%|██        | 48/231 [00:20<01:15,  2.43it/s]

tensor(0.0030, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.5813e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  21%|██        | 49/231 [00:20<01:14,  2.43it/s]

tensor(0.0205, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.3886e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  22%|██▏       | 50/231 [00:20<01:14,  2.43it/s]

tensor(0.0013, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.4841e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  23%|██▎       | 52/231 [00:21<01:13,  2.43it/s]

tensor(0.0034, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.5474e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  23%|██▎       | 53/231 [00:22<01:13,  2.43it/s]

tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.1318e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  23%|██▎       | 54/231 [00:22<01:12,  2.43it/s]

tensor(0.0112, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  24%|██▍       | 55/231 [00:23<01:12,  2.43it/s]

tensor(0.0205, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.1254e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  24%|██▍       | 56/231 [00:23<01:12,  2.42it/s]

tensor(0.0096, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  25%|██▍       | 57/231 [00:23<01:12,  2.40it/s]

tensor(0.0048, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  25%|██▌       | 58/231 [00:24<01:12,  2.39it/s]

tensor(0.0020, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.0906e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  26%|██▌       | 59/231 [00:24<01:12,  2.38it/s]

tensor(0.0018, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.6107e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  26%|██▌       | 60/231 [00:25<01:11,  2.38it/s]

tensor(0.0026, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  26%|██▋       | 61/231 [00:25<01:11,  2.37it/s]

tensor(0.0074, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0035, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  27%|██▋       | 62/231 [00:25<01:11,  2.37it/s]

tensor(0.0024, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.4542e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  27%|██▋       | 63/231 [00:26<01:11,  2.36it/s]

tensor(0.0080, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  28%|██▊       | 64/231 [00:26<01:10,  2.36it/s]

tensor(0.0049, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0032, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  29%|██▊       | 66/231 [00:27<01:09,  2.36it/s]

tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.2487e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  29%|██▉       | 67/231 [00:28<01:09,  2.36it/s]

tensor(0.0020, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  29%|██▉       | 68/231 [00:28<01:08,  2.37it/s]

tensor(0.0014, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0021, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  30%|██▉       | 69/231 [00:28<01:08,  2.37it/s]

tensor(0.0008, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.0961e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0019, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.6936e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  30%|███       | 70/231 [00:29<01:08,  2.37it/s]

tensor(0.0022, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.0516e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  31%|███       | 71/231 [00:29<01:07,  2.36it/s]

tensor(0.0023, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  31%|███       | 72/231 [00:30<01:07,  2.36it/s]

tensor(0.0061, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.0415e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  32%|███▏      | 74/231 [00:31<01:06,  2.36it/s]

tensor(0.0141, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  32%|███▏      | 75/231 [00:31<01:06,  2.35it/s]

tensor(0.0016, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.7178e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  33%|███▎      | 76/231 [00:31<01:05,  2.37it/s]

tensor(0.0066, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.5927e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  33%|███▎      | 77/231 [00:32<01:05,  2.37it/s]

tensor(0.0024, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.7721e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  34%|███▍      | 78/231 [00:32<01:04,  2.36it/s]

tensor(0.0018, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.8384e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0014, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  35%|███▍      | 80/231 [00:33<01:04,  2.36it/s]

tensor(0.0023, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.5176e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  35%|███▌      | 81/231 [00:34<01:03,  2.36it/s]

tensor(0.0144, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.7413e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  35%|███▌      | 82/231 [00:34<01:03,  2.36it/s]

tensor(0.0006, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  36%|███▌      | 83/231 [00:34<01:02,  2.36it/s]

tensor(0.0050, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  36%|███▋      | 84/231 [00:35<01:01,  2.38it/s]

tensor(0.0016, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.0450e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  37%|███▋      | 85/231 [00:35<01:00,  2.40it/s]

tensor(0.0087, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  37%|███▋      | 86/231 [00:36<00:59,  2.43it/s]

tensor(0.0122, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.3304e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  38%|███▊      | 87/231 [00:36<00:59,  2.43it/s]

tensor(0.0013, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  38%|███▊      | 88/231 [00:36<00:58,  2.44it/s]

tensor(0.0015, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.8770e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  39%|███▊      | 89/231 [00:37<00:58,  2.44it/s]

tensor(0.0020, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.9502e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  39%|███▉      | 90/231 [00:37<00:57,  2.44it/s]

tensor(0.0034, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  39%|███▉      | 91/231 [00:38<00:57,  2.42it/s]

tensor(0.0084, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  40%|███▉      | 92/231 [00:38<00:57,  2.42it/s]

tensor(0.0023, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  40%|████      | 93/231 [00:38<00:57,  2.42it/s]

tensor(0.0194, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.7765e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  41%|████      | 94/231 [00:39<00:56,  2.43it/s]

tensor(0.0076, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0016, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  41%|████      | 95/231 [00:39<00:55,  2.43it/s]

tensor(0.0023, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.8255e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  42%|████▏     | 96/231 [00:40<00:55,  2.43it/s]

tensor(0.0018, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.0127e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  42%|████▏     | 97/231 [00:40<00:55,  2.41it/s]

tensor(0.0011, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.3683e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  42%|████▏     | 98/231 [00:41<00:55,  2.39it/s]

tensor(0.0061, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.1038e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0864, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0096, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  43%|████▎     | 100/231 [00:41<00:54,  2.40it/s]

tensor(0.0132, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0020, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.8441e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  44%|████▎     | 101/231 [00:42<00:54,  2.38it/s]

tensor(0.0019, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.0731e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  44%|████▍     | 102/231 [00:42<00:54,  2.38it/s]

tensor(0.0431, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  45%|████▍     | 103/231 [00:43<00:53,  2.39it/s]

tensor(0.0014, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0005, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  45%|████▌     | 105/231 [00:43<00:52,  2.40it/s]

tensor(0.0027, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.3875e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0023, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  46%|████▌     | 106/231 [00:44<00:52,  2.38it/s]

tensor(0.0159, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.7887e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  47%|████▋     | 108/231 [00:45<00:51,  2.38it/s]

tensor(0.0146, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.8138e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  47%|████▋     | 109/231 [00:45<00:50,  2.40it/s]

tensor(0.0011, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.2412e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  48%|████▊     | 110/231 [00:46<00:50,  2.41it/s]

tensor(0.0245, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.2675e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  48%|████▊     | 111/231 [00:46<00:49,  2.42it/s]

tensor(0.0056, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  48%|████▊     | 112/231 [00:46<00:48,  2.43it/s]

tensor(0.0078, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  49%|████▉     | 113/231 [00:47<00:48,  2.45it/s]

tensor(0.0333, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.0357e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  49%|████▉     | 114/231 [00:47<00:48,  2.42it/s]

tensor(0.0061, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.0438e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  50%|████▉     | 115/231 [00:48<00:48,  2.41it/s]

tensor(0.0049, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.0478e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  50%|█████     | 116/231 [00:48<00:48,  2.39it/s]

tensor(0.0034, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  51%|█████     | 117/231 [00:48<00:47,  2.39it/s]

tensor(0.0062, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.5437e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  51%|█████     | 118/231 [00:49<00:47,  2.38it/s]

tensor(0.0148, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.3699e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  52%|█████▏    | 119/231 [00:49<00:47,  2.37it/s]

tensor(0.0039, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.5881e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  52%|█████▏    | 120/231 [00:50<00:46,  2.37it/s]

tensor(0.0113, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.4765e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  52%|█████▏    | 121/231 [00:50<00:46,  2.37it/s]

tensor(0.0010, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  53%|█████▎    | 122/231 [00:51<00:45,  2.37it/s]

tensor(0.0016, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.5001e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  53%|█████▎    | 123/231 [00:51<00:45,  2.38it/s]

tensor(0.0168, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.2435e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  54%|█████▎    | 124/231 [00:51<00:45,  2.37it/s]

tensor(0.0059, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.1371e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  54%|█████▍    | 125/231 [00:52<00:44,  2.37it/s]

tensor(0.0548, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.7360e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  55%|█████▍    | 126/231 [00:52<00:44,  2.37it/s]

tensor(0.0042, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.4044e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  55%|█████▍    | 127/231 [00:53<00:43,  2.37it/s]

tensor(0.0089, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  55%|█████▌    | 128/231 [00:53<00:43,  2.36it/s]

tensor(0.0017, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  56%|█████▌    | 129/231 [00:54<00:43,  2.36it/s]

tensor(0.0015, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  56%|█████▋    | 130/231 [00:54<00:42,  2.36it/s]

tensor(0.0010, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.0760e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  57%|█████▋    | 131/231 [00:54<00:42,  2.36it/s]

tensor(0.0040, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.4848e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  57%|█████▋    | 132/231 [00:55<00:41,  2.36it/s]

tensor(0.0034, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.7139e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  58%|█████▊    | 133/231 [00:55<00:41,  2.36it/s]

tensor(0.0009, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.2690e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0033, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.6682e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  58%|█████▊    | 134/231 [00:56<00:41,  2.36it/s]

tensor(0.0074, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.7249e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  59%|█████▉    | 136/231 [00:57<00:40,  2.36it/s]

tensor(0.0014, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.8935e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  59%|█████▉    | 137/231 [00:57<00:39,  2.38it/s]

tensor(0.0033, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.2342e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  60%|█████▉    | 138/231 [00:57<00:39,  2.37it/s]

tensor(0.0052, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.7307e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  60%|██████    | 139/231 [00:58<00:38,  2.37it/s]

tensor(0.0240, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  61%|██████    | 140/231 [00:58<00:38,  2.37it/s]

tensor(0.0128, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.1936e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  61%|██████    | 141/231 [00:59<00:37,  2.37it/s]

tensor(0.0285, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.8467e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  61%|██████▏   | 142/231 [00:59<00:37,  2.39it/s]

tensor(0.0012, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.4735e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  62%|██████▏   | 143/231 [00:59<00:36,  2.39it/s]

tensor(0.0020, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.6920e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  62%|██████▏   | 144/231 [01:00<00:36,  2.40it/s]

tensor(0.0279, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  63%|██████▎   | 145/231 [01:00<00:35,  2.42it/s]

tensor(0.0609, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.1925e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  63%|██████▎   | 146/231 [01:01<00:35,  2.42it/s]

tensor(0.0371, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.3035e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  64%|██████▎   | 147/231 [01:01<00:34,  2.41it/s]

tensor(0.0022, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.4125e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  64%|██████▍   | 148/231 [01:02<00:34,  2.39it/s]

tensor(0.0086, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.1879e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  65%|██████▍   | 149/231 [01:02<00:34,  2.41it/s]

tensor(0.0031, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.6787e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  65%|██████▍   | 150/231 [01:02<00:33,  2.39it/s]

tensor(0.0385, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  65%|██████▌   | 151/231 [01:03<00:33,  2.39it/s]

tensor(0.0031, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.4159e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  66%|██████▌   | 152/231 [01:03<00:32,  2.41it/s]

tensor(0.0038, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.3102e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  66%|██████▌   | 153/231 [01:04<00:32,  2.40it/s]

tensor(0.0032, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  67%|██████▋   | 154/231 [01:04<00:32,  2.38it/s]

tensor(0.0116, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  67%|██████▋   | 155/231 [01:04<00:31,  2.38it/s]

tensor(0.0134, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.6928e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  68%|██████▊   | 156/231 [01:05<00:31,  2.37it/s]

tensor(0.0035, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.2658e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  68%|██████▊   | 157/231 [01:05<00:31,  2.36it/s]

tensor(0.0140, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  68%|██████▊   | 158/231 [01:06<00:30,  2.36it/s]

tensor(0.0121, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.6838e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  69%|██████▉   | 159/231 [01:06<00:30,  2.36it/s]

tensor(0.0032, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.2829e-06, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0239, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.2827e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  70%|██████▉   | 161/231 [01:07<00:31,  2.22it/s]

tensor(0.0008, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  70%|███████   | 162/231 [01:08<00:30,  2.28it/s]

tensor(0.0142, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.4572e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  71%|███████   | 163/231 [01:08<00:29,  2.32it/s]

tensor(0.0186, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.1216e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  71%|███████   | 164/231 [01:08<00:28,  2.35it/s]

tensor(0.0074, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.3581e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  71%|███████▏  | 165/231 [01:09<00:27,  2.37it/s]

tensor(0.0048, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.1914e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  72%|███████▏  | 166/231 [01:09<00:27,  2.40it/s]

tensor(0.0014, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.7985e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  72%|███████▏  | 167/231 [01:10<00:26,  2.42it/s]

tensor(0.0011, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.3794e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  73%|███████▎  | 168/231 [01:10<00:25,  2.43it/s]

tensor(0.0154, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.2869e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  73%|███████▎  | 169/231 [01:10<00:25,  2.43it/s]

tensor(0.0335, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  74%|███████▎  | 170/231 [01:11<00:25,  2.43it/s]

tensor(0.0038, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  74%|███████▍  | 171/231 [01:11<00:24,  2.41it/s]

tensor(0.0058, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.9682e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  74%|███████▍  | 172/231 [01:12<00:24,  2.41it/s]

tensor(0.0088, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.9066e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  75%|███████▍  | 173/231 [01:12<00:23,  2.42it/s]

tensor(0.0364, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  75%|███████▌  | 174/231 [01:12<00:23,  2.41it/s]

tensor(0.0026, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  76%|███████▌  | 175/231 [01:13<00:23,  2.42it/s]

tensor(0.0040, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  76%|███████▌  | 176/231 [01:13<00:22,  2.42it/s]

tensor(0.0118, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0042, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.6108e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  77%|███████▋  | 178/231 [01:14<00:21,  2.41it/s]

tensor(0.0154, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  77%|███████▋  | 179/231 [01:15<00:21,  2.41it/s]

tensor(0.0263, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.3026e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  78%|███████▊  | 180/231 [01:15<00:21,  2.39it/s]

tensor(0.0008, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0003, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  78%|███████▊  | 181/231 [01:15<00:20,  2.38it/s]

tensor(0.0039, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.5527e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  79%|███████▉  | 182/231 [01:16<00:20,  2.37it/s]

tensor(0.0035, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0147, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0021, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  80%|███████▉  | 184/231 [01:17<00:19,  2.39it/s]

tensor(0.0173, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0279, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.9674e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  81%|████████  | 186/231 [01:17<00:18,  2.41it/s]

tensor(0.0015, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  81%|████████  | 187/231 [01:18<00:18,  2.42it/s]

tensor(0.0083, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.2014e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  81%|████████▏ | 188/231 [01:18<00:17,  2.43it/s]

tensor(0.0076, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.8761e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0024, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0007, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  82%|████████▏ | 190/231 [01:19<00:16,  2.41it/s]

tensor(0.0034, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.7362e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0093, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.0192e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  83%|████████▎ | 192/231 [01:20<00:16,  2.42it/s]

tensor(0.0482, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  84%|████████▎ | 193/231 [01:20<00:15,  2.42it/s]

tensor(0.0010, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  84%|████████▍ | 194/231 [01:21<00:15,  2.42it/s]

tensor(0.0011, device='cuda:0', grad_fn=<DivBackward1>) tensor(4.3514e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0025, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.9261e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  85%|████████▍ | 196/231 [01:22<00:14,  2.42it/s]

tensor(0.0074, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  85%|████████▌ | 197/231 [01:22<00:14,  2.43it/s]

tensor(0.0008, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.8930e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0347, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  86%|████████▌ | 199/231 [01:23<00:13,  2.41it/s]

tensor(0.0022, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0050, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  87%|████████▋ | 200/231 [01:23<00:12,  2.41it/s]

tensor(0.0213, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  87%|████████▋ | 202/231 [01:24<00:12,  2.41it/s]

tensor(0.0030, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  88%|████████▊ | 203/231 [01:25<00:11,  2.42it/s]

tensor(0.0044, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  88%|████████▊ | 204/231 [01:25<00:11,  2.42it/s]

tensor(0.0015, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  89%|████████▊ | 205/231 [01:25<00:10,  2.40it/s]

tensor(0.0213, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  89%|████████▉ | 206/231 [01:26<00:10,  2.41it/s]

tensor(0.0014, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.9727e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  90%|████████▉ | 207/231 [01:26<00:09,  2.42it/s]

tensor(0.0157, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  90%|█████████ | 208/231 [01:27<00:09,  2.42it/s]

tensor(0.0310, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  90%|█████████ | 209/231 [01:27<00:09,  2.40it/s]

tensor(0.0083, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.3691e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0011, device='cuda:0', grad_fn=<DivBackward1>) tensor(9.3724e-06, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  91%|█████████ | 210/231 [01:27<00:08,  2.41it/s]

tensor(0.0014, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  92%|█████████▏| 212/231 [01:28<00:07,  2.41it/s]

tensor(0.0024, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.2343e-05, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0040, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.7342e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  92%|█████████▏| 213/231 [01:29<00:07,  2.40it/s]

tensor(0.0021, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  93%|█████████▎| 214/231 [01:29<00:07,  2.40it/s]

tensor(0.0031, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.9776e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  94%|█████████▎| 216/231 [01:30<00:06,  2.40it/s]

tensor(0.0018, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  94%|█████████▍| 217/231 [01:30<00:05,  2.40it/s]

tensor(0.0050, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  94%|█████████▍| 218/231 [01:31<00:05,  2.41it/s]

tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0016, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  95%|█████████▍| 219/231 [01:31<00:04,  2.42it/s]

tensor(0.0208, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.4985e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  95%|█████████▌| 220/231 [01:32<00:04,  2.41it/s]

tensor(0.0029, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.4692e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  96%|█████████▌| 221/231 [01:32<00:04,  2.41it/s]

tensor(0.0020, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0004, device='cuda:0', grad_fn=<DivBackward1>)
tensor(0.0256, device='cuda:0', grad_fn=<DivBackward1>) tensor(1.6124e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  96%|█████████▌| 222/231 [01:32<00:03,  2.41it/s]

tensor(0.0156, device='cuda:0', grad_fn=<DivBackward1>) tensor(8.0633e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  97%|█████████▋| 223/231 [01:33<00:03,  2.41it/s]

tensor(0.0022, device='cuda:0', grad_fn=<DivBackward1>) tensor(5.7792e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  97%|█████████▋| 224/231 [01:33<00:02,  2.38it/s]

tensor(0.0066, device='cuda:0', grad_fn=<DivBackward1>) tensor(6.4803e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  97%|█████████▋| 225/231 [01:34<00:02,  2.39it/s]

tensor(0.0041, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0001, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  98%|█████████▊| 226/231 [01:34<00:02,  2.39it/s]

tensor(0.0020, device='cuda:0', grad_fn=<DivBackward1>) tensor(3.4070e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  98%|█████████▊| 227/231 [01:34<00:01,  2.39it/s]

tensor(0.0083, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0002, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009:  99%|█████████▊| 228/231 [01:35<00:01,  2.40it/s]

tensor(0.0036, device='cuda:0', grad_fn=<DivBackward1>) tensor(2.7706e-05, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009: 100%|█████████▉| 230/231 [01:36<00:00,  2.41it/s]

tensor(0.0018, device='cuda:0', grad_fn=<DivBackward1>) tensor(0.0008, device='cuda:0', grad_fn=<DivBackward1>)


Epoch 009: 100%|██████████| 231/231 [01:36<00:00,  2.39it/s]

tensor(0.0038, device='cuda:0', grad_fn=<DivBackward1>) tensor(7.4894e-06, device='cuda:0', grad_fn=<DivBackward1>)
Epoch 10/235, Average Training Loss: 0.0046



100%|██████████| 235/235 [00:21<00:00, 11.08it/s]


Accuracy on validation set: 0.9991


In [12]:
# 加载保存的状态字典
state_dict = torch.load('best_model_grod2_clinc_GPT_0.001.ckpt')
model_grod = GRODNet(model, 1, 10)
# 将加载的状态字典加载到模型中
model_grod.load_state_dict(state_dict)
print(model_grod)

GRODNet(
  (backbone): GPT2ForSequenceClassification(
    (transformer): GPT2Model(
      (wte): Embedding(50257, 768)
      (wpe): Embedding(1024, 768)
      (drop): Dropout(p=0.1, inplace=False)
      (h): ModuleList(
        (0-11): 12 x GPT2Block(
          (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (attn): GPT2Attention(
            (c_attn): Conv1D()
            (c_proj): Conv1D()
            (attn_dropout): Dropout(p=0.1, inplace=False)
            (resid_dropout): Dropout(p=0.1, inplace=False)
          )
          (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (mlp): GPT2MLP(
            (c_fc): Conv1D()
            (c_proj): Conv1D()
            (act): NewGELUActivation()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    )
    (score): Linear(in_features=768, out_features=10, bias=True)
  )
  (lda): LDA()
  (pca): PCA()

In [13]:
from typing import Any

import numpy as np
import torch
import torch.nn as nn
from numpy.linalg import norm, pinv
from scipy.special import logsumexp
from sklearn.covariance import EmpiricalCovariance
from tqdm import tqdm

# run
from typing import Any
from tqdm import tqdm

import torch
import torch.nn as nn
from torch.utils.data import DataLoader


class BasePostprocessor:
    def __init__(self, config):
        self.config = config

    def setup(self, net: nn.Module, id_loader_dict, ood_loader_dict):
        pass

    @torch.no_grad()
    def postprocess(self, net: nn.Module, data: Any, attention):
        output = net(input_ids=data, attention_mask=attention)[0]
        score = torch.softmax(net.head(output), dim=1)
        conf, pred = torch.max(score, dim=1)
        return pred, conf

    def inference(self,
                  net: nn.Module,
                  data_loader: DataLoader, 
                  alpha, w, b, u, NS,
                  progress: bool = True):
        pred_list, conf_list, label_list = [], [], []
        for batch in tqdm(data_loader):
            input_ids = batch['input_ids'].cuda()
            attention_mask = batch["attention_mask"].cuda()
            labels = batch['label'].cuda()
            pred, conf = self.postprocess(net.cuda(), input_ids, attention_mask, alpha, w, b, u, NS)

            pred_list.append(pred.cpu())
            conf_list.append(conf.cpu())
            label_list.append(labels.cpu())

        # convert values into numpy array
        pred_list = torch.cat(pred_list).numpy().astype(int)
        conf_list = torch.cat(conf_list).numpy()
        label_list = torch.cat(label_list).numpy().astype(int)

        return pred_list, conf_list, label_list


class GRODPostprocessor(BasePostprocessor):
    def __init__(self, config):
        super().__init__(config)
        self.args = self.config['postprocessor']['postprocessor_args']
        self.args_dict = self.config['postprocessor']['postprocessor_sweep']
        self.dim = self.args['dim']
        self.setup_flag = False

    def setup(self, net: nn.Module, id_loader_dict):
        if not self.setup_flag:
            net.eval()
            net.cuda()
            with torch.no_grad():
                self.w, self.b = net.head.weight[:-1,:].cpu().numpy(), net.head.bias[:-1].cpu().numpy()
                # print(self.w.size())
                print('Extracting id training feature')
                feature_id_train = []
                logit_id_train = []
                for batch in tqdm(id_loader_dict,
                                  desc='Setup: ',
                                  position=0,
                                  leave=True):
                    data = batch['input_ids'].cuda()
                    attention_mask = batch["attention_mask"].cuda()
                    labels = batch['label'].cuda()
                    # data = data.float()
                    
                    hidden_states = net.backbone.transformer(input_ids=data, attention_mask=attention_mask)[0]
        
                    feature = hidden_states[torch.arange(data.size(0), device=hidden_states.device), -1].squeeze()
                    logit = net.head(feature)
                    score = torch.softmax(logit, dim=1)
                    score0 = torch.softmax(logit[:,:-1], dim=1)
                    conf, pred = torch.max(score, dim=1)
                    conf0, pred0 = torch.max(score0, dim=1)
                    for i in range(pred.size(0)):
                        if pred[i] == logit.size(1) - 1:
                            conf[i] = 0.01
                            pred[i] = 1
                            score0[i, :] = 0.01 * torch.ones(score0.size(1)).cuda()
                        else:
                            conf[i] = conf0[i]     
                        
                    feature_id_train.append(feature.cpu().numpy())
                    logit_id_train.append(score0.cpu().numpy())
                feature_id_train = np.concatenate(feature_id_train, axis=0)
                logit_id_train = np.concatenate(logit_id_train, axis=0)

                # logit_id_train = feature_id_train @ self.w.T + self.b

            self.u = -np.matmul(pinv(self.w), self.b)
            ec = EmpiricalCovariance(assume_centered=True)
            ec.fit(feature_id_train - self.u)
            eig_vals, eigen_vectors = np.linalg.eig(ec.covariance_)
            self.NS = np.ascontiguousarray(
                (eigen_vectors.T[np.argsort(eig_vals * -1)[self.dim:]]).T)

            vlogit_id_train = norm(np.matmul(feature_id_train - self.u,
                                             self.NS),
                                   axis=-1)
            
            print(feature_id_train - self.u, self.NS)
            
            self.alpha = logit_id_train.max(
                axis=-1).mean() / vlogit_id_train.mean()
            print(f'{self.alpha=:.4f}')

            self.setup_flag = True
        else:
            pass
        return self.alpha, self.w, self.b, self.u, self.NS
    
    @torch.no_grad()
    def postprocess(self, net: nn.Module, data: Any, attention, alpha, w, b, u, NS):
        hidden_states = net.backbone.transformer(input_ids=data, attention_mask=attention)[0]
        
        feature_ood = hidden_states[torch.arange(data.size(0), device=hidden_states.device), -1].squeeze()
        logit = net.head(feature_ood)
        score = torch.softmax(logit, dim=1)
        score0 = torch.softmax(logit[:,:-1], dim=1)
        conf, pred = torch.max(score, dim=1)
        conf0, pred0 = torch.max(score0, dim=1)
        for i in range(pred.size(0)):
          if pred[i] == logit.size(1) - 1:
            conf[i] = 0.1
            pred[i] = 1
            score0[i, :] = 0.1 * torch.ones(score0.size(1)).cuda()
          else:
            conf[i] = conf0[i]
        logit_ood = score0.cpu()    
        
        feature_ood = feature_ood.cpu()
        
        # logit_ood = feature_ood @ w.T + b
        _, pred = torch.max(logit_ood, dim=1)
        energy_ood = logsumexp(logit_ood.numpy(), axis=-1)
        vlogit_ood = norm(np.matmul(feature_ood.numpy() - u, NS),
                          axis=-1) * alpha
        score_ood = -vlogit_ood + energy_ood
        return pred, torch.from_numpy(score_ood)

    def set_hyperparam(self, hyperparam: list):
        self.dim = hyperparam[0]

    def get_hyperparam(self):
        return self.dim


In [14]:
yaml_file_path = 'grod copy.yml'
loaded_parameters = load_parameters_from_yaml(yaml_file_path)
config_grod = loaded_parameters
alpha, w, b, u, NS = GRODPostprocessor(config_grod).setup(model_grod, train_dataloader)

pred_list, conf_list, label_list = GRODPostprocessor(config_grod).inference(model_grod, test_dataloader, alpha, w, b, u, NS)
# conf_list = 1e7 * np.ones(conf_list.shape[0])
# [fpr, auroc, aupr_in, aupr_out, accuracy]
compute_all_metrics(conf_list, label_list, pred_list)

# [0.22866666666666666,
#  0.9519519999999999,
#  0.9868800598520897,
#  0.8540156668092043,
#  0.9753333333333334]

Extracting id training feature


Setup: 100%|██████████| 235/235 [00:16<00:00, 13.89it/s]


[[-0.3885019  -0.50341797  1.2052389  ...  0.11835438  0.41107127
   0.7871417 ]
 [-0.23824078  0.14025849 -0.38675025 ...  0.65785193 -0.86279285
   0.38748842]
 [-1.294266    0.97808367 -0.51460195 ... -0.01821798 -1.1607496
   1.920844  ]
 ...
 [ 2.2194543  -0.2681387  -1.3348029  ... -0.03340535 -0.07211639
   0.61603427]
 [-0.92062813 -2.3408258  -1.720541   ... -1.3645623   0.00621642
  -0.16125806]
 [-1.561258    0.43807253 -3.043076   ...  0.04060426 -1.8698362
   0.17093289]] [[ 6.9086589e-02  1.1879435e-02 -3.5077233e-02 ... -4.0615350e-04
   2.7860857e-05  4.6710175e-04]
 [ 5.4524638e-02  1.0728780e-02 -3.2899824e-03 ...  8.3292380e-06
  -3.3345157e-05  5.6123955e-04]
 [ 3.8575683e-02 -4.7345150e-02 -2.3312101e-02 ... -3.6718330e-04
  -1.9018617e-04  5.2444660e-04]
 ...
 [ 7.4236043e-04  2.3809254e-02 -4.8332617e-02 ... -2.5227404e-04
  -2.8735242e-04  7.6716812e-04]
 [ 3.9038409e-02 -1.7264565e-02 -3.0213155e-02 ... -4.5027354e-04
  -3.4165758e-04  1.0440374e-03]
 [ 5.08624

100%|██████████| 86/86 [00:06<00:00, 13.18it/s]


[0.22866666666666666,
 0.9519519999999999,
 0.9868800598520897,
 0.8540156668092043,
 0.9753333333333334]

: 

In [None]:
yaml_file_path = 'grod copy.yml'
loaded_parameters = load_parameters_from_yaml(yaml_file_path)
config_grod = loaded_parameters
        
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model_grod = GRODNet(model, 1, 10)

# if torch.cuda.device_count() > 1:
#     print("Using", torch.cuda.device_count(), "GPUs!")
#     model_grod = model_grod
# model_grod = model_grod.to(device)

trainer = GRODTrainer(model_grod, train_dataloader, config_grod)
trainer.train(config_grod['optimizer']['num_epochs']) 



# Save the best model state
trainer.save_best_model('best_model_grod2_clinc_GPT_0.001.ckpt') 

In [None]:
# 加载保存的状态字典
state_dict = torch.load('best_model_grod2_clinc_GPT_0.001.ckpt')
model_grod = GRODNet(model, 1, 2)
# 将加载的状态字典加载到模型中
model_grod.load_state_dict(state_dict)
print(model_grod)

yaml_file_path = 'grod copy.yml'
loaded_parameters = load_parameters_from_yaml(yaml_file_path)
config_grod = loaded_parameters
alpha, w, b, u, NS = GRODPostprocessor(config_grod).setup(model_grod, train_dataloader)

pred_list, conf_list, label_list = GRODPostprocessor(config_grod).inference(model_grod, test_dataloader, alpha, w, b, u, NS)
# conf_list = 1e7 * np.ones(conf_list.shape[0])
# [fpr, auroc, aupr_in, aupr_out, accuracy]
compute_all_metrics(conf_list, label_list, pred_list)