In [20]:
# target parameter attack on linear regression with close form solution on the cross derivative

import os
import time
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader
from numpy import linalg as LA
import numpy as np
import math
from tqdm import tqdm
import matplotlib
from matplotlib import pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib
from sklearn.datasets import make_classification


torch.manual_seed(0)
device = 'cuda'

In [149]:
# creating the gaussian dataset

# define training set
separable = False
while not separable:
    samples = make_classification(n_samples=1000, n_features=3, n_redundant=0, n_informative=1, n_clusters_per_class=1, flip_y=-1)
    red = samples[0][samples[1] == 0]
    blue = samples[0][samples[1] == 1]
    separable = any([red[:, k].max() < blue[:, k].min() or red[:, k].min() > blue[:, k].max() for k in range(2)])
red_labels = np.zeros(len(red))
blue_labels = np.ones(len(blue))

labels = np.append(red_labels,blue_labels)
inputs = np.concatenate((red,blue),axis=0)

X_train, X_test, y_train,  y_test = train_test_split(
    inputs, labels, test_size=0.33, random_state=42)

X_train, X_test = torch.Tensor(X_train),torch.Tensor(X_test)
y_train, y_test = torch.Tensor(y_train),torch.Tensor(y_test)



In [150]:
batch_size_train= len(X_train)
batch_size_test = len(X_test)
class LinearDataset(Dataset):
    def __init__(self, X, y):
        assert X.size()[0] == y.size()[0]
        self.X = X
        self.y = y

    def __len__(self):
        return self.X.size()[0]

    def __getitem__(self, idx):
        
        
        return [self.X[idx], self.y[idx]]
train_loader = DataLoader(LinearDataset(X_train, y_train), batch_size=batch_size_train, shuffle=False)
test_loader = DataLoader(LinearDataset(X_test, y_test), batch_size=batch_size_test, shuffle=False)

In [151]:
class LogisticRegression(torch.nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LogisticRegression, self).__init__()
        self.linear = torch.nn.Linear(input_dim, output_dim)
        
    def forward(self, x):
        outputs = torch.sigmoid(self.linear(x))
        return outputs

input_dim = 3 
output_dim = 1 # Two possible outputs
learning_rate = 0.01

model = LogisticRegression(input_dim,output_dim).to(device)
model.load_state_dict(torch.load("gaussian_gd_0.5.pt"))

<All keys matched successfully>

In [152]:
from scipy.special import lambertw
import math
lambert_w = lambertw(1/math.e)
print(lambert_w)

(0.2784645427610738+0j)


In [153]:
for param in model.parameters():
    w_p = param
    break
print(w_p/10)

tensor([[-0.0346, -0.2471,  0.0194]], device='cuda:0', grad_fn=<DivBackward0>)


In [154]:
for name, param in model.named_parameters():
    if param.requires_grad:
        print(name, param.data)

linear.weight tensor([[-0.3458, -2.4712,  0.1935]], device='cuda:0')
linear.bias tensor([1.2288], device='cuda:0')


In [155]:
# try scaling the weights 
state_dict = model.state_dict()
state_dict['linear.weight'] = torch.tensor([[-0.03458,-0.24712,0.01935]])
model.load_state_dict(state_dict)

<All keys matched successfully>

In [158]:
# the script for training target attack
epsilon = 1
lr = 0.005
epochs = 3000

def adjust_learning_rate(lr, epoch):
    """Decay the learning rate based on schedule"""
    lr *= 0.5 * (1. + math.cos(math.pi * epoch / epochs))
    return(lr)


def autograd(outputs, inputs, create_graph=False):
    """Compute gradient of outputs w.r.t. inputs, assuming outputs is a scalar."""
    #inputs = tuple(inputs)
    grads = torch.autograd.grad(outputs, inputs, create_graph=create_graph, allow_unused=True)
    return [xx if xx is not None else yy.new_zeros(yy.size()) for xx, yy in zip(grads, inputs)]

def train(epoch, X,y):
    data, target = X.to(device), y.to(device)
    data.requires_grad=True
    if epoch==0:
        # initialize poisoned data
        data_p = Variable(data[:(int(epsilon*len(data)))])
        target_p = Variable(target[:(int(epsilon*len(target)))])
        torch.save(target_p,'target_p_gaussian_{}.pt'.format(epsilon))
    else:
        data_p = torch.load('data_p_gaussian_{}.pt'.format(epsilon))
        target_p = torch.load('target_p_gaussian_{}.pt'.format(epsilon))
    data_p.requires_grad=True

    # initialize f function
    criterion = torch.nn.BCELoss(reduction='sum')

    # calculate gradient of w on clean sample
    output_c = torch.squeeze(model(data))
    loss_c =  0.5 * criterion(output_c,target)

    # calculate dL/dg_1
    grad_c= autograd(loss_c,tuple(model.parameters()),create_graph=True)
    g1 = grad_c[0]/len(y)

    # calculate the size of epsilon_d w.r.t g1 and the Lambert's W function
    g_mu_dot_w = np.dot(g1.to('cpu').detach().numpy().squeeze(),w_p.cpu().detach().numpy().squeeze())
    if epoch == 0:
        print(g_mu_dot_w)
        print('the necessary size of epsilon_d:{}'.format(g_mu_dot_w/lambert_w))
    

    # calculate gradient of w on poisoned sample
    output_p = torch.squeeze(model(data_p))
    loss_p = 0.5 * criterion(output_p,target_p)
    grad_p= autograd(loss_p,tuple(model.parameters()),create_graph=True)

    g2 = torch.matmul((output_p - target_p).t(),data_p)
    # calculate the true loss: |g_c + g_p|_{2}
    grad_sum = g1+g2
    loss = torch.norm(grad_sum,2)

    update = autograd(loss,data_p,create_graph=True)
    data_t = data_p - lr * update[0]


    data_t = data_t.to('cuda')

    torch.save(data_t, 'data_p_gaussian_{}.pt'.format(epsilon))
    

    print("epoch:{},lr:{},loss:{}".format(epoch,lr,loss))
    
    return loss
        

In [159]:
for epoch in range(epochs):
    loss = train(epoch,X_train,y_train)
    if loss<0.0001:
        break

0.0679838
the necessary size of epsilon_d:(0.24413807951492283+0j)
[tensor([[-0.0151,  0.3248, -0.0140],
        [ 0.0599, -0.8156,  0.0309],
        [-0.0153,  0.3253, -0.0139],
        ...,
        [-0.0143,  0.3290, -0.0143],
        [ 0.0616, -0.8409,  0.0318],
        [ 0.0626, -0.8498,  0.0321]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:0,lr:0.005,loss:373.5389404296875
[tensor([[-0.0151,  0.3247, -0.0139],
        [ 0.0599, -0.8152,  0.0308],
        [-0.0154,  0.3251, -0.0139],
        ...,
        [-0.0143,  0.3288, -0.0143],
        [ 0.0617, -0.8406,  0.0318],
        [ 0.0626, -0.8495,  0.0321]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1,lr:0.005,loss:372.1507568359375
[tensor([[-0.0151,  0.3245, -0.0139],
        [ 0.0599, -0.8149,  0.0308],
        [-0.0154,  0.3249, -0.0139],
        ...,
        [-0.0144,  0.3287, -0.0143],
        [ 0.0617, -0.8403,  0.0318],
        [ 0.0627, -0.8492,  0.0321]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2,lr:0.005

epoch:34,lr:0.005,loss:326.9038391113281
[tensor([[-0.0157,  0.3190, -0.0135],
        [ 0.0613, -0.8044,  0.0300],
        [-0.0159,  0.3194, -0.0135],
        ...,
        [-0.0149,  0.3231, -0.0139],
        [ 0.0631, -0.8304,  0.0310],
        [ 0.0640, -0.8395,  0.0313]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:35,lr:0.005,loss:325.5498352050781
[tensor([[-0.0157,  0.3188, -0.0135],
        [ 0.0613, -0.8041,  0.0300],
        [-0.0160,  0.3192, -0.0135],
        ...,
        [-0.0149,  0.3229, -0.0139],
        [ 0.0631, -0.8301,  0.0310],
        [ 0.0641, -0.8392,  0.0313]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:36,lr:0.005,loss:324.1968078613281
[tensor([[-0.0157,  0.3186, -0.0135],
        [ 0.0613, -0.8037,  0.0300],
        [-0.0160,  0.3190, -0.0135],
        ...,
        [-0.0150,  0.3228, -0.0139],
        [ 0.0631, -0.8298,  0.0310],
        [ 0.0641, -0.8389,  0.0313]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:37,lr:0.005,loss:322.8447875976562

[tensor([[-0.0164,  0.3123, -0.0131],
        [ 0.0629, -0.7911,  0.0290],
        [-0.0166,  0.3127, -0.0130],
        ...,
        [-0.0156,  0.3163, -0.0134],
        [ 0.0648, -0.8177,  0.0301],
        [ 0.0657, -0.8272,  0.0304]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:76,lr:0.005,loss:270.9007873535156
[tensor([[-0.0164,  0.3121, -0.0131],
        [ 0.0630, -0.7907,  0.0290],
        [-0.0167,  0.3125, -0.0130],
        ...,
        [-0.0156,  0.3162, -0.0134],
        [ 0.0648, -0.8174,  0.0300],
        [ 0.0658, -0.8269,  0.0303]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:77,lr:0.005,loss:269.5889892578125
[tensor([[-0.0164,  0.3120, -0.0130],
        [ 0.0630, -0.7904,  0.0290],
        [-0.0167,  0.3123, -0.0130],
        ...,
        [-0.0157,  0.3160, -0.0134],
        [ 0.0648, -0.8171,  0.0300],
        [ 0.0658, -0.8266,  0.0303]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:78,lr:0.005,loss:268.2781677246094
[tensor([[-0.0164,  0.3118, -0.0130],
  

[tensor([[-0.0171,  0.3052, -0.0126],
        [ 0.0648, -0.7762,  0.0279],
        [-0.0174,  0.3055, -0.0125],
        ...,
        [-0.0164,  0.3092, -0.0129],
        [ 0.0666, -0.8035,  0.0290],
        [ 0.0676, -0.8133,  0.0293]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:121,lr:0.005,loss:212.86534118652344
[tensor([[-0.0171,  0.3051, -0.0125],
        [ 0.0648, -0.7758,  0.0279],
        [-0.0174,  0.3054, -0.0125],
        ...,
        [-0.0164,  0.3090, -0.0129],
        [ 0.0667, -0.8032,  0.0290],
        [ 0.0677, -0.8130,  0.0293]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:122,lr:0.005,loss:211.59878540039062
[tensor([[-0.0171,  0.3049, -0.0125],
        [ 0.0649, -0.7755,  0.0279],
        [-0.0174,  0.3052, -0.0125],
        ...,
        [-0.0164,  0.3089, -0.0129],
        [ 0.0667, -0.8029,  0.0289],
        [ 0.0677, -0.8127,  0.0293]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:123,lr:0.005,loss:210.3332061767578
[tensor([[-0.0172,  0.3047, -0.0125

epoch:162,lr:0.005,loss:161.75669860839844
[tensor([[-0.0178,  0.2988, -0.0121],
        [ 0.0665, -0.7621,  0.0269],
        [-0.0181,  0.2991, -0.0121],
        ...,
        [-0.0170,  0.3027, -0.0125],
        [ 0.0684, -0.7900,  0.0280],
        [ 0.0694, -0.8000,  0.0283]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:163,lr:0.005,loss:160.53114318847656
[tensor([[-0.0178,  0.2986, -0.0121],
        [ 0.0665, -0.7617,  0.0268],
        [-0.0181,  0.2989, -0.0121],
        ...,
        [-0.0171,  0.3026, -0.0124],
        [ 0.0684, -0.7897,  0.0279],
        [ 0.0695, -0.7997,  0.0283]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:164,lr:0.005,loss:159.3065948486328
[tensor([[-0.0178,  0.2985, -0.0121],
        [ 0.0666, -0.7614,  0.0268],
        [-0.0181,  0.2988, -0.0120],
        ...,
        [-0.0171,  0.3024, -0.0124],
        [ 0.0685, -0.7893,  0.0279],
        [ 0.0695, -0.7994,  0.0282]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:165,lr:0.005,loss:158.0830383

epoch:206,lr:0.005,loss:108.77476501464844
[tensor([[-0.0185,  0.2923, -0.0116],
        [ 0.0683, -0.7471,  0.0258],
        [-0.0188,  0.2925, -0.0116],
        ...,
        [-0.0177,  0.2961, -0.0120],
        [ 0.0702, -0.7755,  0.0269],
        [ 0.0712, -0.7858,  0.0272]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:207,lr:0.005,loss:107.5929946899414
[tensor([[-0.0185,  0.2921, -0.0116],
        [ 0.0683, -0.7468,  0.0258],
        [-0.0188,  0.2924, -0.0116],
        ...,
        [-0.0177,  0.2960, -0.0120],
        [ 0.0702, -0.7752,  0.0269],
        [ 0.0713, -0.7855,  0.0272]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:208,lr:0.005,loss:106.4122085571289
[tensor([[-0.0185,  0.2920, -0.0116],
        [ 0.0683, -0.7464,  0.0257],
        [-0.0188,  0.2922, -0.0116],
        ...,
        [-0.0177,  0.2959, -0.0120],
        [ 0.0703, -0.7749,  0.0268],
        [ 0.0713, -0.7852,  0.0272]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:209,lr:0.005,loss:105.23241424

[tensor([[-0.0191,  0.2861, -0.0112],
        [ 0.0698, -0.7324,  0.0247],
        [-0.0194,  0.2863, -0.0112],
        ...,
        [-0.0183,  0.2899, -0.0116],
        [ 0.0718, -0.7612,  0.0259],
        [ 0.0729, -0.7717,  0.0262]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:250,lr:0.005,loss:57.711326599121094
[tensor([[-0.0191,  0.2860, -0.0112],
        [ 0.0699, -0.7320,  0.0247],
        [-0.0194,  0.2862, -0.0112],
        ...,
        [-0.0183,  0.2898, -0.0116],
        [ 0.0719, -0.7608,  0.0258],
        [ 0.0729, -0.7714,  0.0262]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:251,lr:0.005,loss:56.57294845581055
[tensor([[-0.0191,  0.2859, -0.0112],
        [ 0.0699, -0.7317,  0.0247],
        [-0.0194,  0.2861, -0.0112],
        ...,
        [-0.0183,  0.2897, -0.0115],
        [ 0.0719, -0.7605,  0.0258],
        [ 0.0730, -0.7710,  0.0262]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:252,lr:0.005,loss:55.43555450439453
[tensor([[-0.0191,  0.2857, -0.0112]

[tensor([[-0.0193,  0.2802, -0.0109],
        [ 0.0706, -0.7175,  0.0239],
        [-0.0196,  0.2804, -0.0108],
        ...,
        [-0.0186,  0.2839, -0.0112],
        [ 0.0726, -0.7468,  0.0250],
        [ 0.0736, -0.7575,  0.0254]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:293,lr:0.005,loss:9.64425277709961
[tensor([[-0.0193,  0.2801, -0.0109],
        [ 0.0705, -0.7171,  0.0239],
        [-0.0196,  0.2803, -0.0108],
        ...,
        [-0.0186,  0.2838, -0.0112],
        [ 0.0725, -0.7464,  0.0250],
        [ 0.0736, -0.7572,  0.0254]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:294,lr:0.005,loss:8.547842979431152
[tensor([[-0.0193,  0.2800, -0.0109],
        [ 0.0705, -0.7168,  0.0239],
        [-0.0196,  0.2801, -0.0108],
        ...,
        [-0.0186,  0.2837, -0.0112],
        [ 0.0725, -0.7461,  0.0250],
        [ 0.0735, -0.7569,  0.0254]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:295,lr:0.005,loss:7.4524030685424805
[tensor([[-0.0193,  0.2798, -0.0109],

[tensor([[ 0.0557, -0.2725,  0.0011],
        [-0.1752,  0.7062,  0.0038],
        [ 0.0565, -0.2707,  0.0009],
        ...,
        [ 0.0544, -0.2788,  0.0017],
        [-0.1832,  0.7189,  0.0049],
        [-0.1869,  0.7224,  0.0055]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:334,lr:0.005,loss:0.19626069068908691
[tensor([[-5.6196e-02,  2.7251e-01, -9.6346e-04],
        [ 1.7665e-01, -7.0629e-01, -4.1988e-03],
        [-5.6979e-02,  2.7062e-01, -7.0369e-04],
        ...,
        [-5.4935e-02,  2.7887e-01, -1.5419e-03],
        [ 1.8476e-01, -7.1880e-01, -5.3483e-03],
        [ 1.8845e-01, -7.2210e-01, -5.9877e-03]], device='cuda:0',
       grad_fn=<AddBackward0>)]
epoch:335,lr:0.005,loss:0.8923653364181519
[tensor([[ 5.8379e-02, -2.7179e-01,  2.9048e-04],
        [-1.8293e-01,  7.0492e-01,  6.1088e-03],
        [ 5.9193e-02, -2.6977e-01,  1.6927e-05],
        ...,
        [ 5.7091e-02, -2.7828e-01,  8.8156e-04],
        [-1.9140e-01,  7.1639e-01,  7.4047e-03],
        [-1.9526e

[tensor([[-0.0917,  0.2599,  0.0109],
        [ 0.2788, -0.6812, -0.0379],
        [-0.0930,  0.2557,  0.0114],
        ...,
        [-0.0901,  0.2681,  0.0101],
        [ 0.2929, -0.6762, -0.0416],
        [ 0.2991, -0.6719, -0.0433]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:359,lr:0.005,loss:0.8874475955963135
[tensor([[ 0.0943, -0.2585, -0.0119],
        [-0.2862,  0.6783,  0.0406],
        [ 0.0956, -0.2542, -0.0124],
        ...,
        [ 0.0926, -0.2668, -0.0111],
        [-0.3007,  0.6720,  0.0445],
        [-0.3071,  0.6672,  0.0463]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:360,lr:0.005,loss:0.20229603350162506
[tensor([[-0.0949,  0.2584,  0.0121],
        [ 0.2880, -0.6780, -0.0413],
        [-0.0963,  0.2540,  0.0126],
        ...,
        [-0.0933,  0.2667,  0.0113],
        [ 0.3026, -0.6713, -0.0452],
        [ 0.3091, -0.6664, -0.0470]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:361,lr:0.005,loss:0.8870675563812256
[tensor([[ 0.0975, -0.2570, -0.01

epoch:408,lr:0.005,loss:0.21181513369083405
[tensor([[-0.1689,  0.2018,  0.0461],
        [ 0.4995, -0.5447, -0.1382],
        [-0.1714,  0.1915,  0.0473],
        ...,
        [-0.1668,  0.2111,  0.0452],
        [ 0.5269, -0.4962, -0.1485],
        [ 0.5387, -0.4741, -0.1529]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:409,lr:0.005,loss:0.8796809315681458
[tensor([[ 0.1708, -0.1994, -0.0473],
        [-0.5051,  0.5387,  0.1416],
        [ 0.1734, -0.1889, -0.0485],
        ...,
        [ 0.1688, -0.2086, -0.0463],
        [-0.5328,  0.4889,  0.1520],
        [-0.5447,  0.4662,  0.1566]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:410,lr:0.005,loss:0.21214011311531067
[tensor([[-0.1714,  0.1989,  0.0476],
        [ 0.5066, -0.5374, -0.1424],
        [-0.1739,  0.1883,  0.0488],
        ...,
        [-0.1693,  0.2081,  0.0466],
        [ 0.5344, -0.4873, -0.1529],
        [ 0.5463, -0.4645, -0.1575]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:411,lr:0.005,loss:0.879447

[tensor([[-0.2062,  0.1401,  0.0753],
        [ 0.6053, -0.3875, -0.2216],
        [-0.2095,  0.1253,  0.0769],
        ...,
        [-0.2045,  0.1460,  0.0745],
        [ 0.6395, -0.3115, -0.2366],
        [ 0.6540, -0.2790, -0.2429]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:453,lr:0.005,loss:0.8755757808685303
[tensor([[ 0.2069, -0.1382, -0.0762],
        [-0.6072,  0.3824,  0.2242],
        [ 0.2102, -0.1233, -0.0778],
        ...,
        [ 0.2052, -0.1439, -0.0754],
        [-0.6416,  0.3058,  0.2393],
        [-0.6562,  0.2730,  0.2457]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:454,lr:0.005,loss:0.21757330000400543
[tensor([[-0.2071,  0.1377,  0.0764],
        [ 0.6078, -0.3812, -0.2249],
        [-0.2105,  0.1228,  0.0781],
        ...,
        [-0.2054,  0.1434,  0.0756],
        [ 0.6422, -0.3044, -0.2400],
        [ 0.6568, -0.2716, -0.2464]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:455,lr:0.005,loss:0.8754377961158752
[tensor([[ 0.2078, -0.1358, -0.07

[tensor([[-0.2164,  0.1009,  0.0955],
        [ 0.6338, -0.2815, -0.2800],
        [-0.2201,  0.0832,  0.0975],
        ...,
        [-0.2153,  0.1018,  0.0950],
        [ 0.6703, -0.1931, -0.2977],
        [ 0.6855, -0.1567, -0.3052]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:495,lr:0.005,loss:0.8733429312705994
[tensor([[ 0.2165, -0.0998, -0.0962],
        [-0.6341,  0.2784,  0.2819],
        [ 0.2202, -0.0821, -0.0981],
        ...,
        [ 0.2155, -0.1006, -0.0957],
        [-0.6706,  0.1897,  0.2997],
        [-0.6858,  0.1533,  0.3071]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:496,lr:0.005,loss:0.2202870100736618
[tensor([[-0.2165,  0.0995,  0.0963],
        [ 0.6342, -0.2777, -0.2824],
        [-0.2203,  0.0818,  0.0983],
        ...,
        [-0.2155,  0.1003,  0.0959],
        [ 0.6707, -0.1889, -0.3002],
        [ 0.6859, -0.1525, -0.3077]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:497,lr:0.005,loss:0.8732661604881287
[tensor([[ 0.2166, -0.0985, -0.097

epoch:537,lr:0.005,loss:0.872143030166626
[tensor([[ 0.2158, -0.0799, -0.1113],
        [-0.6322,  0.2199,  0.3261],
        [ 0.2198, -0.0602, -0.1135],
        ...,
        [ 0.2155, -0.0755, -0.1112],
        [-0.6690,  0.1262,  0.3457],
        [-0.6842,  0.0890,  0.3538]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:538,lr:0.005,loss:0.2216430902481079
[tensor([[-0.2158,  0.0797,  0.1114],
        [ 0.6321, -0.2195, -0.3265],
        [-0.2198,  0.0601,  0.1136],
        ...,
        [-0.2155,  0.0753,  0.1113],
        [ 0.6689, -0.1258, -0.3461],
        [ 0.6841, -0.0886, -0.3542]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:539,lr:0.005,loss:0.8721078038215637
[tensor([[ 0.2157, -0.0792, -0.1119],
        [-0.6317,  0.2179,  0.3280],
        [ 0.2197, -0.0595, -0.1141],
        ...,
        [ 0.2154, -0.0746, -0.1118],
        [-0.6686,  0.1241,  0.3477],
        [-0.6838,  0.0869,  0.3558]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:540,lr:0.005,loss:0.221687003

[tensor([[ 0.2104, -0.0698, -0.1258],
        [-0.6173,  0.1847,  0.3689],
        [ 0.2147, -0.0484, -0.1282],
        ...,
        [ 0.2110, -0.0596, -0.1261],
        [-0.6537,  0.0893,  0.3901],
        [-0.6685,  0.0528,  0.3987]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:588,lr:0.005,loss:0.22239810228347778
[tensor([[-0.2104,  0.0698,  0.1259],
        [ 0.6172, -0.1846, -0.3692],
        [-0.2146,  0.0483,  0.1283],
        ...,
        [-0.2110,  0.0596,  0.1262],
        [ 0.6535, -0.0891, -0.3904],
        [ 0.6683, -0.0526, -0.3990]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:589,lr:0.005,loss:0.8714434504508972
[tensor([[ 0.2102, -0.0696, -0.1263],
        [-0.6166,  0.1838,  0.3704],
        [ 0.2144, -0.0481, -0.1287],
        ...,
        [ 0.2108, -0.0592, -0.1266],
        [-0.6529,  0.0883,  0.3917],
        [-0.6677,  0.0519,  0.4003]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:590,lr:0.005,loss:0.22241760790348053
[tensor([[-0.2101,  0.0696,  0.1

[tensor([[-0.2044,  0.0669,  0.1365],
        [ 0.6006, -0.1693, -0.4007],
        [-0.2088,  0.0441,  0.1391],
        ...,
        [-0.2056,  0.0520,  0.1371],
        [ 0.6363, -0.0738, -0.4230],
        [ 0.6506, -0.0385, -0.4320]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:633,lr:0.005,loss:0.8711838722229004
[tensor([[ 0.2042, -0.0668, -0.1369],
        [-0.6000,  0.1688,  0.4018],
        [ 0.2085, -0.0440, -0.1394],
        ...,
        [ 0.2054, -0.0518, -0.1375],
        [-0.6356,  0.0735,  0.4241],
        [-0.6500,  0.0381,  0.4331]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:634,lr:0.005,loss:0.22271588444709778
[tensor([[-0.2041,  0.0668,  0.1370],
        [ 0.5998, -0.1688, -0.4020],
        [-0.2085,  0.0440,  0.1395],
        ...,
        [-0.2054,  0.0517,  0.1376],
        [ 0.6355, -0.0734, -0.4244],
        [ 0.6498, -0.0380, -0.4334]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:635,lr:0.005,loss:0.8711746335029602
[tensor([[ 0.2039, -0.0668, -0.13

epoch:679,lr:0.005,loss:0.8710570335388184
[tensor([[ 0.1976, -0.0664, -0.1463],
        [-0.5819,  0.1602,  0.4300],
        [ 0.2021, -0.0425, -0.1490],
        ...,
        [ 0.1995, -0.0470, -0.1473],
        [-0.6168,  0.0656,  0.4533],
        [-0.6307,  0.0316,  0.4626]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:680,lr:0.005,loss:0.222872793674469
[tensor([[-0.1976,  0.0664,  0.1464],
        [ 0.5818, -0.1602, -0.4303],
        [-0.2021,  0.0425,  0.1491],
        ...,
        [-0.1995,  0.0470,  0.1473],
        [ 0.6166, -0.0655, -0.4535],
        [ 0.6305, -0.0316, -0.4628]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:681,lr:0.005,loss:0.8710534572601318
[tensor([[ 0.1973, -0.0664, -0.1467],
        [-0.5811,  0.1599,  0.4312],
        [ 0.2018, -0.0425, -0.1494],
        ...,
        [ 0.1993, -0.0468, -0.1477],
        [-0.6160,  0.0654,  0.4544],
        [-0.6298,  0.0315,  0.4638]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:682,lr:0.005,loss:0.222879126

[tensor([[-0.1915,  0.0670,  0.1542],
        [ 0.5649, -0.1550, -0.4535],
        [-0.1961,  0.0421,  0.1570],
        ...,
        [-0.1939,  0.0438,  0.1554],
        [ 0.5990, -0.0614, -0.4775],
        [ 0.6124, -0.0288, -0.4871]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:725,lr:0.005,loss:0.8710010051727295
[tensor([[ 0.1913, -0.0670, -0.1544],
        [-0.5644,  0.1549,  0.4543],
        [ 0.1959, -0.0421, -0.1573],
        ...,
        [ 0.1938, -0.0437, -0.1556],
        [-0.5984,  0.0613,  0.4783],
        [-0.6118,  0.0287,  0.4879]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:726,lr:0.005,loss:0.22295448184013367
[tensor([[-0.1912,  0.0670,  0.1545],
        [ 0.5642, -0.1548, -0.4545],
        [-0.1958,  0.0421,  0.1573],
        ...,
        [-0.1937,  0.0437,  0.1557],
        [ 0.5982, -0.0613, -0.4785],
        [ 0.6116, -0.0287, -0.4881]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:727,lr:0.005,loss:0.8709968328475952
[tensor([[ 0.1910, -0.0670, -0.15

[tensor([[-0.1853,  0.0679,  0.1614],
        [ 0.5477, -0.1512, -0.4751],
        [-0.1900,  0.0422,  0.1643],
        ...,
        [-0.1883,  0.0412,  0.1629],
        [ 0.5810, -0.0589, -0.4998],
        [ 0.5940, -0.0276, -0.5096]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:773,lr:0.005,loss:0.8709751963615417
[tensor([[ 0.1851, -0.0680, -0.1616],
        [-0.5472,  0.1511,  0.4758],
        [ 0.1898, -0.0423, -0.1646],
        ...,
        [ 0.1881, -0.0412, -0.1631],
        [-0.5804,  0.0588,  0.5005],
        [-0.5934,  0.0276,  0.5103]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:774,lr:0.005,loss:0.22300046682357788
[tensor([[-0.1850,  0.0680,  0.1617],
        [ 0.5471, -0.1511, -0.4759],
        [-0.1897,  0.0423,  0.1646],
        ...,
        [-0.1880,  0.0411,  0.1631],
        [ 0.5803, -0.0588, -0.5006],
        [ 0.5932, -0.0276, -0.5104]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:775,lr:0.005,loss:0.8709758520126343
[tensor([[ 0.1848, -0.0680, -0.16

[tensor([[-0.1805,  0.0688,  0.1666],
        [ 0.5344, -0.1487, -0.4907],
        [-0.1852,  0.0425,  0.1696],
        ...,
        [-0.1838,  0.0395,  0.1682],
        [ 0.5670, -0.0574, -0.5158],
        [ 0.5796, -0.0272, -0.5258]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:813,lr:0.005,loss:0.8709695935249329
[tensor([[ 0.1803, -0.0688, -0.1668],
        [-0.5339,  0.1486,  0.4913],
        [ 0.1850, -0.0425, -0.1698],
        ...,
        [ 0.1837, -0.0394, -0.1684],
        [-0.5665,  0.0574,  0.5164],
        [-0.5791,  0.0272,  0.5264]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:814,lr:0.005,loss:0.2230217456817627
[tensor([[-0.1803,  0.0688,  0.1669],
        [ 0.5338, -0.1486, -0.4914],
        [-0.1850,  0.0425,  0.1699],
        ...,
        [-0.1836,  0.0394,  0.1685],
        [ 0.5664, -0.0574, -0.5166],
        [ 0.5790, -0.0272, -0.5265]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:815,lr:0.005,loss:0.8709691762924194
[tensor([[ 0.1801, -0.0689, -0.167

[tensor([[ 0.1754, -0.0697, -0.1717],
        [-0.5204,  0.1464,  0.5061],
        [ 0.1802, -0.0428, -0.1748],
        ...,
        [ 0.1792, -0.0378, -0.1736],
        [-0.5524,  0.0563,  0.5317],
        [-0.5646,  0.0271,  0.5418]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:858,lr:0.005,loss:0.22303976118564606
[tensor([[-0.1754,  0.0697,  0.1718],
        [ 0.5203, -0.1464, -0.5062],
        [-0.1802,  0.0428,  0.1749],
        ...,
        [-0.1792,  0.0378,  0.1736],
        [ 0.5523, -0.0562, -0.5318],
        [ 0.5645, -0.0271, -0.5419]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:859,lr:0.005,loss:0.8709608912467957
[tensor([[ 0.1752, -0.0698, -0.1720],
        [-0.5198,  0.1463,  0.5067],
        [ 0.1800, -0.0429, -0.1750],
        ...,
        [ 0.1790, -0.0377, -0.1738],
        [-0.5518,  0.0562,  0.5323],
        [-0.5640,  0.0271,  0.5424]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:860,lr:0.005,loss:0.22303949296474457
[tensor([[-0.1752,  0.0698,  0.1

epoch:883,lr:0.005,loss:0.870962381362915
[tensor([[ 0.1728, -0.0702, -0.1743],
        [-0.5130,  0.1452,  0.5139],
        [ 0.1776, -0.0430, -0.1775],
        ...,
        [ 0.1767, -0.0369, -0.1762],
        [-0.5446,  0.0557,  0.5397],
        [-0.5567,  0.0271,  0.5498]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:884,lr:0.005,loss:0.22304831445217133
[tensor([[-0.1728,  0.0702,  0.1744],
        [ 0.5129, -0.1452, -0.5140],
        [-0.1776,  0.0430,  0.1775],
        ...,
        [-0.1767,  0.0369,  0.1763],
        [ 0.5445, -0.0556, -0.5398],
        [ 0.5566, -0.0271, -0.5499]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:885,lr:0.005,loss:0.8709635138511658
[tensor([[ 0.1726, -0.0702, -0.1745],
        [-0.5125,  0.1451,  0.5144],
        [ 0.1774, -0.0430, -0.1777],
        ...,
        [ 0.1766, -0.0368, -0.1764],
        [-0.5441,  0.0556,  0.5402],
        [-0.5561,  0.0271,  0.5504]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:886,lr:0.005,loss:0.22304709

[tensor([[ 0.1684, -0.0710, -0.1785],
        [-0.5006,  0.1432,  0.5264],
        [ 0.1732, -0.0433, -0.1817],
        ...,
        [ 0.1726, -0.0355, -0.1806],
        [-0.5317,  0.0547,  0.5526],
        [-0.5434,  0.0271,  0.5628]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:932,lr:0.005,loss:0.22305478155612946
[tensor([[-0.1683,  0.0710,  0.1786],
        [ 0.5005, -0.1432, -0.5265],
        [-0.1732,  0.0433,  0.1817],
        ...,
        [-0.1726,  0.0355,  0.1806],
        [ 0.5315, -0.0547, -0.5527],
        [ 0.5433, -0.0271, -0.5629]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:933,lr:0.005,loss:0.8709614276885986
[tensor([[ 0.1682, -0.0710, -0.1787],
        [-0.5002,  0.1431,  0.5269],
        [ 0.1730, -0.0433, -0.1819],
        ...,
        [ 0.1725, -0.0354, -0.1807],
        [-0.5312,  0.0547,  0.5531],
        [-0.5429,  0.0271,  0.5633]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:934,lr:0.005,loss:0.22305521368980408
[tensor([[-0.1682,  0.0710,  0.1

epoch:975,lr:0.005,loss:0.8709622621536255
[tensor([[ 0.1647, -0.0716, -0.1818],
        [-0.4905,  0.1416,  0.5362],
        [ 0.1696, -0.0436, -0.1850],
        ...,
        [ 0.1692, -0.0343, -0.1840],
        [-0.5210,  0.0540,  0.5627],
        [-0.5325,  0.0272,  0.5730]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:976,lr:0.005,loss:0.22305767238140106
[tensor([[-0.1647,  0.0716,  0.1818],
        [ 0.4904, -0.1416, -0.5363],
        [-0.1696,  0.0436,  0.1851],
        ...,
        [-0.1692,  0.0343,  0.1840],
        [ 0.5209, -0.0540, -0.5628],
        [ 0.5324, -0.0271, -0.5731]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:977,lr:0.005,loss:0.8709635734558105
[tensor([[ 0.1646, -0.0716, -0.1819],
        [-0.4901,  0.1415,  0.5366],
        [ 0.1694, -0.0436, -0.1852],
        ...,
        [ 0.1691, -0.0343, -0.1841],
        [-0.5206,  0.0540,  0.5631],
        [-0.5320,  0.0272,  0.5735]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:978,lr:0.005,loss:0.2230605

[tensor([[ 0.1616, -0.0721, -0.1845],
        [-0.4818,  0.1402,  0.5444],
        [ 0.1665, -0.0438, -0.1878],
        ...,
        [ 0.1663, -0.0334, -0.1868],
        [-0.5119,  0.0534,  0.5711],
        [-0.5231,  0.0272,  0.5815]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1018,lr:0.005,loss:0.22306573390960693
[tensor([[-0.1616,  0.0721,  0.1846],
        [ 0.4817, -0.1402, -0.5445],
        [-0.1665,  0.0437,  0.1878],
        ...,
        [-0.1663,  0.0334,  0.1868],
        [ 0.5118, -0.0534, -0.5712],
        [ 0.5230, -0.0272, -0.5816]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1019,lr:0.005,loss:0.8709659576416016
[tensor([[ 0.1615, -0.0721, -0.1846],
        [-0.4814,  0.1401,  0.5448],
        [ 0.1664, -0.0438, -0.1879],
        ...,
        [ 0.1662, -0.0333, -0.1869],
        [-0.5114,  0.0534,  0.5715],
        [-0.5227,  0.0272,  0.5819]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1020,lr:0.005,loss:0.2230643481016159
[tensor([[-0.1614,  0.0721,  0

[tensor([[-0.1588,  0.0725,  0.1869],
        [ 0.4739, -0.1390, -0.5516],
        [-0.1637,  0.0439,  0.1902],
        ...,
        [-0.1637,  0.0325,  0.1893],
        [ 0.5036, -0.0529, -0.5784],
        [ 0.5146, -0.0273, -0.5889]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1061,lr:0.005,loss:0.8709567785263062
[tensor([[ 0.1587, -0.0725, -0.1870],
        [-0.4736,  0.1389,  0.5518],
        [ 0.1636, -0.0439, -0.1903],
        ...,
        [ 0.1636, -0.0325, -0.1894],
        [-0.5033,  0.0529,  0.5787],
        [-0.5143,  0.0273,  0.5892]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1062,lr:0.005,loss:0.2230713665485382
[tensor([[-0.1586,  0.0725,  0.1870],
        [ 0.4735, -0.1389, -0.5519],
        [-0.1636,  0.0439,  0.1903],
        ...,
        [-0.1636,  0.0325,  0.1894],
        [ 0.5032, -0.0529, -0.5788],
        [ 0.5142, -0.0273, -0.5892]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1063,lr:0.005,loss:0.8709600567817688
[tensor([[ 0.1585, -0.0725, -0.

epoch:1104,lr:0.005,loss:0.2230740338563919
[tensor([[-0.1562,  0.0729,  0.1890],
        [ 0.4666, -0.1378, -0.5580],
        [-0.1611,  0.0441,  0.1924],
        ...,
        [-0.1613,  0.0317,  0.1915],
        [ 0.4960, -0.0524, -0.5850],
        [ 0.5068, -0.0274, -0.5955]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1105,lr:0.005,loss:0.8709617853164673
[tensor([[ 0.1561, -0.0729, -0.1891],
        [-0.4664,  0.1378,  0.5582],
        [ 0.1610, -0.0441, -0.1924],
        ...,
        [ 0.1612, -0.0317, -0.1916],
        [-0.4957,  0.0524,  0.5852],
        [-0.5066,  0.0274,  0.5958]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1106,lr:0.005,loss:0.22307226061820984
[tensor([[-0.1561,  0.0729,  0.1891],
        [ 0.4663, -0.1378, -0.5582],
        [-0.1610,  0.0441,  0.1925],
        ...,
        [-0.1612,  0.0317,  0.1916],
        [ 0.4956, -0.0524, -0.5853],
        [ 0.5065, -0.0274, -0.5958]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1107,lr:0.005,loss:0.870

[tensor([[ 0.1539, -0.0732, -0.1908],
        [-0.4603,  0.1368,  0.5634],
        [ 0.1589, -0.0442, -0.1942],
        ...,
        [ 0.1592, -0.0310, -0.1934],
        [-0.4893,  0.0520,  0.5906],
        [-0.5000,  0.0275,  0.6012]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1148,lr:0.005,loss:0.22308306396007538
[tensor([[-0.1539,  0.0732,  0.1909],
        [ 0.4603, -0.1368, -0.5635],
        [-0.1589,  0.0442,  0.1942],
        ...,
        [-0.1592,  0.0310,  0.1934],
        [ 0.4893, -0.0520, -0.5906],
        [ 0.4999, -0.0275, -0.6012]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1149,lr:0.005,loss:0.8709527254104614
[tensor([[ 0.1538, -0.0732, -0.1909],
        [-0.4600,  0.1367,  0.5636],
        [ 0.1588, -0.0442, -0.1943],
        ...,
        [ 0.1591, -0.0310, -0.1935],
        [-0.4891,  0.0520,  0.5908],
        [-0.4997,  0.0275,  0.6014]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1150,lr:0.005,loss:0.22308383882045746
[tensor([[-0.1538,  0.0732,  

[tensor([[ 0.1516, -0.0735, -0.1927],
        [-0.4539,  0.1357,  0.5688],
        [ 0.1566, -0.0443, -0.1960],
        ...,
        [ 0.1570, -0.0303, -0.1953],
        [-0.4826,  0.0516,  0.5962],
        [-0.4931,  0.0275,  0.6068]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1198,lr:0.005,loss:0.22308407723903656
[tensor([[-0.1516,  0.0735,  0.1927],
        [ 0.4538, -0.1357, -0.5689],
        [-0.1566,  0.0443,  0.1961],
        ...,
        [-0.1570,  0.0303,  0.1953],
        [ 0.4825, -0.0515, -0.5962],
        [ 0.4930, -0.0275, -0.6068]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1199,lr:0.005,loss:0.8709550499916077
[tensor([[ 0.1515, -0.0735, -0.1927],
        [-0.4536,  0.1357,  0.5690],
        [ 0.1565, -0.0443, -0.1961],
        ...,
        [ 0.1569, -0.0303, -0.1953],
        [-0.4823,  0.0515,  0.5964],
        [-0.4928,  0.0275,  0.6070]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1200,lr:0.005,loss:0.22308534383773804
[tensor([[-0.1515,  0.0735,  

[tensor([[ 0.1498, -0.0738, -0.1940],
        [-0.4488,  0.1349,  0.5730],
        [ 0.1548, -0.0444, -0.1974],
        ...,
        [ 0.1553, -0.0298, -0.1967],
        [-0.4772,  0.0512,  0.6005],
        [-0.4876,  0.0276,  0.6111]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1244,lr:0.005,loss:0.22309157252311707
[tensor([[-0.1498,  0.0738,  0.1941],
        [ 0.4487, -0.1349, -0.5731],
        [-0.1548,  0.0444,  0.1975],
        ...,
        [-0.1553,  0.0298,  0.1967],
        [ 0.4772, -0.0512, -0.6005],
        [ 0.4876, -0.0275, -0.6112]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1245,lr:0.005,loss:0.8709491491317749
[tensor([[ 0.1497, -0.0738, -0.1941],
        [-0.4486,  0.1349,  0.5732],
        [ 0.1547, -0.0444, -0.1975],
        ...,
        [ 0.1553, -0.0298, -0.1968],
        [-0.4770,  0.0512,  0.6006],
        [-0.4874,  0.0276,  0.6113]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1246,lr:0.005,loss:0.22309020161628723
[tensor([[-0.1497,  0.0738,  

epoch:1290,lr:0.005,loss:0.2230934351682663
[tensor([[-0.1482,  0.0740,  0.1953],
        [ 0.4443, -0.1343, -0.5767],
        [-0.1532,  0.0445,  0.1987],
        ...,
        [-0.1538,  0.0293,  0.1980],
        [ 0.4725, -0.0510, -0.6042],
        [ 0.4828, -0.0276, -0.6149]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1291,lr:0.005,loss:0.8709475994110107
[tensor([[ 0.1482, -0.0740, -0.1953],
        [-0.4442,  0.1342,  0.5768],
        [ 0.1532, -0.0445, -0.1987],
        ...,
        [ 0.1538, -0.0293, -0.1980],
        [-0.4724,  0.0510,  0.6043],
        [-0.4826,  0.0277,  0.6150]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1292,lr:0.005,loss:0.2230948954820633
[tensor([[-0.1481,  0.0740,  0.1953],
        [ 0.4441, -0.1342, -0.5768],
        [-0.1531,  0.0445,  0.1987],
        ...,
        [-0.1538,  0.0293,  0.1980],
        [ 0.4723, -0.0510, -0.6043],
        [ 0.4826, -0.0277, -0.6150]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1293,lr:0.005,loss:0.8709

[tensor([[-0.1468,  0.0742,  0.1963],
        [ 0.4403, -0.1336, -0.5798],
        [-0.1518,  0.0446,  0.1997],
        ...,
        [-0.1525,  0.0289,  0.1991],
        [ 0.4683, -0.0507, -0.6075],
        [ 0.4785, -0.0277, -0.6182]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1339,lr:0.005,loss:0.8709423542022705
[tensor([[ 0.1467, -0.0742, -0.1963],
        [-0.4402,  0.1336,  0.5799],
        [ 0.1517, -0.0446, -0.1998],
        ...,
        [ 0.1525, -0.0289, -0.1991],
        [-0.4682,  0.0507,  0.6076],
        [-0.4784,  0.0277,  0.6183]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1340,lr:0.005,loss:0.2230977565050125
[tensor([[-0.1467,  0.0742,  0.1964],
        [ 0.4402, -0.1336, -0.5800],
        [-0.1517,  0.0446,  0.1998],
        ...,
        [-0.1524,  0.0289,  0.1991],
        [ 0.4682, -0.0507, -0.6076],
        [ 0.4783, -0.0277, -0.6183]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1341,lr:0.005,loss:0.8709427714347839
[tensor([[ 0.1467, -0.0742, -0.

[tensor([[ 0.1455, -0.0744, -0.1973],
        [-0.4367,  0.1330,  0.5827],
        [ 0.1505, -0.0447, -0.2007],
        ...,
        [ 0.1513, -0.0286, -0.2000],
        [-0.4646,  0.0505,  0.6104],
        [-0.4746,  0.0278,  0.6211]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1388,lr:0.005,loss:0.22310501337051392
[tensor([[-0.1455,  0.0744,  0.1973],
        [ 0.4367, -0.1331, -0.5827],
        [-0.1505,  0.0447,  0.2007],
        ...,
        [-0.1513,  0.0286,  0.2000],
        [ 0.4645, -0.0505, -0.6104],
        [ 0.4746, -0.0278, -0.6211]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1389,lr:0.005,loss:0.8709350228309631
[tensor([[ 0.1454, -0.0744, -0.1973],
        [-0.4366,  0.1330,  0.5828],
        [ 0.1505, -0.0447, -0.2007],
        ...,
        [ 0.1512, -0.0285, -0.2001],
        [-0.4644,  0.0505,  0.6105],
        [-0.4745,  0.0278,  0.6212]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1390,lr:0.005,loss:0.22310687601566315
[tensor([[-0.1454,  0.0744,  

epoch:1426,lr:0.005,loss:0.22311265766620636
[tensor([[-0.1446,  0.0745,  0.1979],
        [ 0.4342, -0.1326, -0.5846],
        [-0.1496,  0.0447,  0.2013],
        ...,
        [-0.1505,  0.0283,  0.2007],
        [ 0.4620, -0.0503, -0.6123],
        [ 0.4720, -0.0278, -0.6231]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1427,lr:0.005,loss:0.8709295392036438
[tensor([[ 0.1446, -0.0745, -0.1979],
        [-0.4342,  0.1326,  0.5847],
        [ 0.1496, -0.0447, -0.2014],
        ...,
        [ 0.1504, -0.0283, -0.2007],
        [-0.4619,  0.0503,  0.6124],
        [-0.4719,  0.0278,  0.6232]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1428,lr:0.005,loss:0.2231133133172989
[tensor([[-0.1446,  0.0745,  0.1979],
        [ 0.4341, -0.1326, -0.5847],
        [-0.1496,  0.0447,  0.2014],
        ...,
        [-0.1504,  0.0283,  0.2007],
        [ 0.4618, -0.0503, -0.6124],
        [ 0.4718, -0.0278, -0.6232]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1429,lr:0.005,loss:0.870

[tensor([[-0.1438,  0.0746,  0.1985],
        [ 0.4320, -0.1323, -0.5863],
        [-0.1488,  0.0447,  0.2019],
        ...,
        [-0.1497,  0.0281,  0.2013],
        [ 0.4596, -0.0502, -0.6141],
        [ 0.4695, -0.0278, -0.6249]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1467,lr:0.005,loss:0.8709231019020081
[tensor([[ 0.1438, -0.0746, -0.1985],
        [-0.4319,  0.1322,  0.5864],
        [ 0.1488, -0.0447, -0.2020],
        ...,
        [ 0.1497, -0.0280, -0.2013],
        [-0.4595,  0.0502,  0.6142],
        [-0.4695,  0.0278,  0.6250]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1468,lr:0.005,loss:0.22312119603157043
[tensor([[-0.1438,  0.0746,  0.1985],
        [ 0.4319, -0.1323, -0.5864],
        [-0.1488,  0.0447,  0.2020],
        ...,
        [-0.1497,  0.0280,  0.2013],
        [ 0.4595, -0.0502, -0.6142],
        [ 0.4694, -0.0278, -0.6250]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1469,lr:0.005,loss:0.8709230422973633
[tensor([[ 0.1438, -0.0746, -0

[tensor([[-0.1430,  0.0747,  0.1991],
        [ 0.4296, -0.1319, -0.5882],
        [-0.1480,  0.0448,  0.2026],
        ...,
        [-0.1489,  0.0278,  0.2020],
        [ 0.4571, -0.0500, -0.6160],
        [ 0.4669, -0.0278, -0.6268]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1515,lr:0.005,loss:0.8709132671356201
[tensor([[ 0.1429, -0.0747, -0.1991],
        [-0.4295,  0.1319,  0.5882],
        [ 0.1480, -0.0448, -0.2026],
        ...,
        [ 0.1489, -0.0278, -0.2020],
        [-0.4570,  0.0500,  0.6161],
        [-0.4669,  0.0278,  0.6269]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1516,lr:0.005,loss:0.22312502562999725
[tensor([[-0.1429,  0.0747,  0.1991],
        [ 0.4295, -0.1319, -0.5883],
        [-0.1480,  0.0448,  0.2026],
        ...,
        [-0.1489,  0.0278,  0.2020],
        [ 0.4570, -0.0500, -0.6161],
        [ 0.4668, -0.0278, -0.6269]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1517,lr:0.005,loss:0.8709174990653992
[tensor([[ 0.1429, -0.0747, -0

[tensor([[ 0.1423, -0.0747, -0.1996],
        [-0.4277,  0.1316,  0.5896],
        [ 0.1473, -0.0448, -0.2030],
        ...,
        [ 0.1483, -0.0276, -0.2024],
        [-0.4551,  0.0499,  0.6175],
        [-0.4650,  0.0279,  0.6282]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1558,lr:0.005,loss:0.22312912344932556
[tensor([[-0.1423,  0.0747,  0.1996],
        [ 0.4277, -0.1316, -0.5896],
        [-0.1473,  0.0448,  0.2030],
        ...,
        [-0.1483,  0.0276,  0.2024],
        [ 0.4551, -0.0499, -0.6175],
        [ 0.4649, -0.0278, -0.6282]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1559,lr:0.005,loss:0.8709125518798828
[tensor([[ 0.1423, -0.0748, -0.1996],
        [-0.4277,  0.1316,  0.5896],
        [ 0.1473, -0.0448, -0.2030],
        ...,
        [ 0.1483, -0.0276, -0.2025],
        [-0.4551,  0.0499,  0.6175],
        [-0.4649,  0.0279,  0.6283]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1560,lr:0.005,loss:0.2231297791004181
[tensor([[-0.1423,  0.0747,  0

[tensor([[ 0.1420, -0.0748, -0.1998],
        [-0.4268,  0.1314,  0.5903],
        [ 0.1470, -0.0448, -0.2033],
        ...,
        [ 0.1480, -0.0275, -0.2027],
        [-0.4542,  0.0499,  0.6182],
        [-0.4640,  0.0279,  0.6290]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1584,lr:0.005,loss:0.2231331467628479
[tensor([[-0.1420,  0.0748,  0.1998],
        [ 0.4268, -0.1314, -0.5903],
        [-0.1470,  0.0448,  0.2033],
        ...,
        [-0.1480,  0.0275,  0.2027],
        [ 0.4541, -0.0499, -0.6182],
        [ 0.4639, -0.0279, -0.6290]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1585,lr:0.005,loss:0.8709085583686829
[tensor([[ 0.1419, -0.0748, -0.1998],
        [-0.4267,  0.1314,  0.5903],
        [ 0.1470, -0.0448, -0.2033],
        ...,
        [ 0.1480, -0.0275, -0.2027],
        [-0.4541,  0.0499,  0.6182],
        [-0.4639,  0.0279,  0.6290]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1586,lr:0.005,loss:0.2231321632862091
[tensor([[-0.1419,  0.0748,  0.

[tensor([[ 0.1417, -0.0748, -0.2000],
        [-0.4259,  0.1313,  0.5910],
        [ 0.1467, -0.0449, -0.2035],
        ...,
        [ 0.1477, -0.0274, -0.2029],
        [-0.4532,  0.0498,  0.6189],
        [-0.4630,  0.0279,  0.6297]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1610,lr:0.005,loss:0.22313998639583588
[tensor([[-0.1417,  0.0748,  0.2000],
        [ 0.4259, -0.1313, -0.5910],
        [-0.1467,  0.0449,  0.2035],
        ...,
        [-0.1477,  0.0274,  0.2029],
        [ 0.4532, -0.0498, -0.6189],
        [ 0.4630, -0.0279, -0.6297]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1611,lr:0.005,loss:0.8709046840667725
[tensor([[ 0.1416, -0.0748, -0.2000],
        [-0.4258,  0.1313,  0.5910],
        [ 0.1467, -0.0449, -0.2035],
        ...,
        [ 0.1477, -0.0274, -0.2029],
        [-0.4531,  0.0498,  0.6189],
        [-0.4629,  0.0279,  0.6297]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1612,lr:0.005,loss:0.2231394201517105
[tensor([[-0.1416,  0.0748,  0

epoch:1635,lr:0.005,loss:0.8709005117416382
[tensor([[ 0.1414, -0.0749, -0.2002],
        [-0.4251,  0.1312,  0.5916],
        [ 0.1464, -0.0449, -0.2037],
        ...,
        [ 0.1474, -0.0274, -0.2031],
        [-0.4523,  0.0498,  0.6195],
        [-0.4621,  0.0279,  0.6303]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1636,lr:0.005,loss:0.22314248979091644
[tensor([[-0.1414,  0.0749,  0.2002],
        [ 0.4251, -0.1312, -0.5916],
        [-0.1464,  0.0449,  0.2037],
        ...,
        [-0.1474,  0.0274,  0.2031],
        [ 0.4523, -0.0498, -0.6195],
        [ 0.4621, -0.0279, -0.6303]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1637,lr:0.005,loss:0.8708993196487427
[tensor([[ 0.1413, -0.0749, -0.2002],
        [-0.4250,  0.1312,  0.5916],
        [ 0.1464, -0.0449, -0.2037],
        ...,
        [ 0.1474, -0.0274, -0.2031],
        [-0.4523,  0.0498,  0.6196],
        [-0.4620,  0.0280,  0.6304]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1638,lr:0.005,loss:0.223

[tensor([[-0.1409,  0.0749,  0.2005],
        [ 0.4238, -0.1310, -0.5925],
        [-0.1459,  0.0449,  0.2040],
        ...,
        [-0.1470,  0.0272,  0.2035],
        [ 0.4510, -0.0497, -0.6205],
        [ 0.4607, -0.0280, -0.6313]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1677,lr:0.005,loss:0.8708981275558472
[tensor([[ 0.1409, -0.0749, -0.2005],
        [-0.4238,  0.1310,  0.5926],
        [ 0.1459, -0.0449, -0.2040],
        ...,
        [ 0.1470, -0.0272, -0.2035],
        [-0.4510,  0.0497,  0.6205],
        [-0.4607,  0.0280,  0.6313]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1678,lr:0.005,loss:0.22314605116844177
[tensor([[-0.1409,  0.0749,  0.2006],
        [ 0.4237, -0.1310, -0.5926],
        [-0.1459,  0.0449,  0.2040],
        ...,
        [-0.1470,  0.0272,  0.2035],
        [ 0.4509, -0.0497, -0.6205],
        [ 0.4607, -0.0280, -0.6313]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1679,lr:0.005,loss:0.8708971738815308
[tensor([[ 0.1409, -0.0749, -0

epoch:1718,lr:0.005,loss:0.22315245866775513
[tensor([[-0.1405,  0.0750,  0.2008],
        [ 0.4227, -0.1309, -0.5934],
        [-0.1455,  0.0449,  0.2043],
        ...,
        [-0.1466,  0.0271,  0.2038],
        [ 0.4498, -0.0497, -0.6213],
        [ 0.4595, -0.0280, -0.6322]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1719,lr:0.005,loss:0.8708887100219727
[tensor([[ 0.1405, -0.0750, -0.2008],
        [-0.4226,  0.1308,  0.5934],
        [ 0.1455, -0.0449, -0.2043],
        ...,
        [ 0.1466, -0.0271, -0.2038],
        [-0.4498,  0.0497,  0.6214],
        [-0.4595,  0.0280,  0.6322]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1720,lr:0.005,loss:0.2231532335281372
[tensor([[-0.1405,  0.0750,  0.2008],
        [ 0.4226, -0.1308, -0.5934],
        [-0.1455,  0.0449,  0.2043],
        ...,
        [-0.1466,  0.0271,  0.2038],
        [ 0.4497, -0.0497, -0.6214],
        [ 0.4594, -0.0280, -0.6322]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1721,lr:0.005,loss:0.870

epoch:1760,lr:0.005,loss:0.22316648066043854
[tensor([[-0.1401,  0.0750,  0.2011],
        [ 0.4216, -0.1306, -0.5942],
        [-0.1452,  0.0449,  0.2046],
        ...,
        [-0.1463,  0.0270,  0.2040],
        [ 0.4487, -0.0496, -0.6221],
        [ 0.4584, -0.0279, -0.6329]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1761,lr:0.005,loss:0.8708767294883728
[tensor([[ 0.1401, -0.0750, -0.2011],
        [-0.4216,  0.1306,  0.5942],
        [ 0.1452, -0.0449, -0.2046],
        ...,
        [ 0.1462, -0.0270, -0.2040],
        [-0.4487,  0.0496,  0.6222],
        [-0.4584,  0.0280,  0.6330]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1762,lr:0.005,loss:0.22316619753837585
[tensor([[-0.1401,  0.0750,  0.2011],
        [ 0.4216, -0.1306, -0.5942],
        [-0.1452,  0.0449,  0.2046],
        ...,
        [-0.1462,  0.0270,  0.2040],
        [ 0.4487, -0.0496, -0.6222],
        [ 0.4583, -0.0279, -0.6330]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1763,lr:0.005,loss:0.87

[tensor([[ 0.1398, -0.0750, -0.2013],
        [-0.4207,  0.1304,  0.5949],
        [ 0.1448, -0.0449, -0.2048],
        ...,
        [ 0.1459, -0.0269, -0.2043],
        [-0.4477,  0.0495,  0.6229],
        [-0.4574,  0.0279,  0.6337]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1806,lr:0.005,loss:0.2231760174036026
[tensor([[-0.1398,  0.0750,  0.2013],
        [ 0.4207, -0.1305, -0.5949],
        [-0.1448,  0.0449,  0.2048],
        ...,
        [-0.1459,  0.0269,  0.2043],
        [ 0.4477, -0.0495, -0.6228],
        [ 0.4574, -0.0279, -0.6337]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1807,lr:0.005,loss:0.8708655834197998
[tensor([[ 0.1398, -0.0750, -0.2013],
        [-0.4207,  0.1304,  0.5949],
        [ 0.1448, -0.0449, -0.2048],
        ...,
        [ 0.1459, -0.0269, -0.2043],
        [-0.4477,  0.0495,  0.6229],
        [-0.4574,  0.0279,  0.6337]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1808,lr:0.005,loss:0.22317595779895782
[tensor([[-0.1398,  0.0750,  0

[tensor([[ 0.1396, -0.0751, -0.2015],
        [-0.4200,  0.1303,  0.5954],
        [ 0.1446, -0.0449, -0.2050],
        ...,
        [ 0.1457, -0.0268, -0.2044],
        [-0.4470,  0.0494,  0.6234],
        [-0.4567,  0.0279,  0.6342]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1848,lr:0.005,loss:0.22318251430988312
[tensor([[-0.1396,  0.0751,  0.2015],
        [ 0.4200, -0.1303, -0.5954],
        [-0.1446,  0.0449,  0.2050],
        ...,
        [-0.1457,  0.0268,  0.2044],
        [ 0.4470, -0.0494, -0.6234],
        [ 0.4566, -0.0279, -0.6342]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1849,lr:0.005,loss:0.8708602786064148
[tensor([[ 0.1396, -0.0751, -0.2015],
        [-0.4200,  0.1303,  0.5954],
        [ 0.1446, -0.0449, -0.2050],
        ...,
        [ 0.1457, -0.0268, -0.2044],
        [-0.4470,  0.0494,  0.6234],
        [-0.4566,  0.0279,  0.6342]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1850,lr:0.005,loss:0.22318203747272491
[tensor([[-0.1396,  0.0751,  

[tensor([[-0.1394,  0.0751,  0.2016],
        [ 0.4195, -0.1302, -0.5958],
        [-0.1444,  0.0449,  0.2051],
        ...,
        [-0.1455,  0.0267,  0.2046],
        [ 0.4464, -0.0493, -0.6238],
        [ 0.4561, -0.0279, -0.6346]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1895,lr:0.005,loss:0.8708474040031433
[tensor([[ 0.1394, -0.0751, -0.2016],
        [-0.4195,  0.1302,  0.5958],
        [ 0.1444, -0.0449, -0.2051],
        ...,
        [ 0.1455, -0.0267, -0.2046],
        [-0.4464,  0.0493,  0.6238],
        [-0.4560,  0.0279,  0.6346]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1896,lr:0.005,loss:0.22319217026233673
[tensor([[-0.1394,  0.0751,  0.2016],
        [ 0.4195, -0.1302, -0.5958],
        [-0.1444,  0.0449,  0.2051],
        ...,
        [-0.1455,  0.0267,  0.2046],
        [ 0.4464, -0.0493, -0.6238],
        [ 0.4560, -0.0279, -0.6346]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1897,lr:0.005,loss:0.8708481192588806
[tensor([[ 0.1393, -0.0751, -0

epoch:1942,lr:0.005,loss:0.22320249676704407
[tensor([[-0.1392,  0.0751,  0.2018],
        [ 0.4189, -0.1301, -0.5962],
        [-0.1442,  0.0449,  0.2053],
        ...,
        [-0.1453,  0.0267,  0.2047],
        [ 0.4458, -0.0493, -0.6242],
        [ 0.4554, -0.0279, -0.6351]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1943,lr:0.005,loss:0.8708401918411255
[tensor([[ 0.1391, -0.0751, -0.2018],
        [-0.4189,  0.1301,  0.5962],
        [ 0.1442, -0.0449, -0.2053],
        ...,
        [ 0.1453, -0.0267, -0.2047],
        [-0.4458,  0.0493,  0.6242],
        [-0.4554,  0.0279,  0.6351]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1944,lr:0.005,loss:0.22320260107517242
[tensor([[-0.1392,  0.0751,  0.2018],
        [ 0.4189, -0.1301, -0.5962],
        [-0.1442,  0.0449,  0.2053],
        ...,
        [-0.1453,  0.0267,  0.2047],
        [ 0.4458, -0.0493, -0.6242],
        [ 0.4554, -0.0279, -0.6351]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1945,lr:0.005,loss:0.87

[tensor([[ 0.1390, -0.0751, -0.2019],
        [-0.4184,  0.1300,  0.5965],
        [ 0.1440, -0.0450, -0.2054],
        ...,
        [ 0.1452, -0.0266, -0.2048],
        [-0.4453,  0.0493,  0.6246],
        [-0.4549,  0.0279,  0.6354]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1984,lr:0.005,loss:0.22320881485939026
[tensor([[-0.1390,  0.0751,  0.2019],
        [ 0.4184, -0.1301, -0.5966],
        [-0.1440,  0.0450,  0.2054],
        ...,
        [-0.1452,  0.0266,  0.2048],
        [ 0.4453, -0.0493, -0.6246],
        [ 0.4549, -0.0279, -0.6354]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1985,lr:0.005,loss:0.870832085609436
[tensor([[ 0.1390, -0.0751, -0.2019],
        [-0.4184,  0.1300,  0.5966],
        [ 0.1440, -0.0450, -0.2054],
        ...,
        [ 0.1452, -0.0266, -0.2048],
        [-0.4453,  0.0493,  0.6246],
        [-0.4549,  0.0279,  0.6354]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:1986,lr:0.005,loss:0.22321061789989471
[tensor([[-0.1390,  0.0751,  0

[tensor([[ 0.1388, -0.0752, -0.2020],
        [-0.4178,  0.1300,  0.5970],
        [ 0.1438, -0.0450, -0.2055],
        ...,
        [ 0.1450, -0.0266, -0.2050],
        [-0.4447,  0.0493,  0.6250],
        [-0.4543,  0.0280,  0.6359]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2034,lr:0.005,loss:0.22321946918964386
[tensor([[-0.1388,  0.0752,  0.2020],
        [ 0.4178, -0.1300, -0.5970],
        [-0.1438,  0.0450,  0.2055],
        ...,
        [-0.1450,  0.0266,  0.2050],
        [ 0.4447, -0.0493, -0.6250],
        [ 0.4543, -0.0279, -0.6358]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2035,lr:0.005,loss:0.8708205819129944
[tensor([[ 0.1388, -0.0752, -0.2020],
        [-0.4178,  0.1299,  0.5970],
        [ 0.1438, -0.0450, -0.2055],
        ...,
        [ 0.1450, -0.0266, -0.2050],
        [-0.4447,  0.0493,  0.6250],
        [-0.4543,  0.0279,  0.6359]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2036,lr:0.005,loss:0.2232215851545334
[tensor([[-0.1388,  0.0752,  0

[tensor([[-0.1386,  0.0752,  0.2021],
        [ 0.4174, -0.1300, -0.5973],
        [-0.1437,  0.0450,  0.2056],
        ...,
        [-0.1448,  0.0266,  0.2051],
        [ 0.4442, -0.0493, -0.6254],
        [ 0.4538, -0.0280, -0.6362]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2077,lr:0.005,loss:0.8708106875419617
[tensor([[ 0.1386, -0.0752, -0.2021],
        [-0.4173,  0.1299,  0.5973],
        [ 0.1437, -0.0450, -0.2056],
        ...,
        [ 0.1448, -0.0266, -0.2051],
        [-0.4442,  0.0493,  0.6254],
        [-0.4538,  0.0280,  0.6362]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2078,lr:0.005,loss:0.2232334464788437
[tensor([[-0.1386,  0.0752,  0.2021],
        [ 0.4173, -0.1300, -0.5973],
        [-0.1437,  0.0450,  0.2056],
        ...,
        [-0.1448,  0.0266,  0.2051],
        [ 0.4442, -0.0493, -0.6254],
        [ 0.4538, -0.0280, -0.6362]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2079,lr:0.005,loss:0.8708095550537109
[tensor([[ 0.1386, -0.0752, -0.

[tensor([[ 0.1385, -0.0752, -0.2022],
        [-0.4171,  0.1299,  0.5975],
        [ 0.1435, -0.0450, -0.2057],
        ...,
        [ 0.1447, -0.0265, -0.2052],
        [-0.4439,  0.0493,  0.6256],
        [-0.4535,  0.0281,  0.6364]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2118,lr:0.005,loss:0.22323893010616302
[tensor([[-0.1385,  0.0752,  0.2022],
        [ 0.4170, -0.1299, -0.5975],
        [-0.1435,  0.0450,  0.2057],
        ...,
        [-0.1447,  0.0265,  0.2052],
        [ 0.4439, -0.0493, -0.6256],
        [ 0.4534, -0.0281, -0.6364]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2119,lr:0.005,loss:0.8708030581474304
[tensor([[ 0.1385, -0.0752, -0.2022],
        [-0.4170,  0.1299,  0.5975],
        [ 0.1435, -0.0450, -0.2057],
        ...,
        [ 0.1447, -0.0265, -0.2052],
        [-0.4439,  0.0493,  0.6256],
        [-0.4534,  0.0281,  0.6364]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2120,lr:0.005,loss:0.22324106097221375
[tensor([[-0.1385,  0.0752,  

epoch:2157,lr:0.005,loss:0.8707922697067261
[tensor([[ 0.1385, -0.0752, -0.2022],
        [-0.4169,  0.1299,  0.5976],
        [ 0.1435, -0.0450, -0.2057],
        ...,
        [ 0.1447, -0.0265, -0.2052],
        [-0.4438,  0.0493,  0.6257],
        [-0.4533,  0.0280,  0.6365]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2158,lr:0.005,loss:0.2232503443956375
[tensor([[-0.1385,  0.0752,  0.2022],
        [ 0.4169, -0.1299, -0.5976],
        [-0.1435,  0.0450,  0.2057],
        ...,
        [-0.1447,  0.0265,  0.2052],
        [ 0.4438, -0.0493, -0.6257],
        [ 0.4533, -0.0280, -0.6365]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2159,lr:0.005,loss:0.8707932233810425
[tensor([[ 0.1385, -0.0752, -0.2022],
        [-0.4169,  0.1299,  0.5976],
        [ 0.1435, -0.0450, -0.2057],
        ...,
        [ 0.1447, -0.0265, -0.2052],
        [-0.4438,  0.0493,  0.6257],
        [-0.4533,  0.0280,  0.6365]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2160,lr:0.005,loss:0.2232

[tensor([[ 0.1384, -0.0752, -0.2023],
        [-0.4167,  0.1298,  0.5978],
        [ 0.1434, -0.0450, -0.2058],
        ...,
        [ 0.1446, -0.0265, -0.2053],
        [-0.4436,  0.0492,  0.6259],
        [-0.4531,  0.0280,  0.6367]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2200,lr:0.005,loss:0.2232583910226822
[tensor([[-0.1384,  0.0752,  0.2023],
        [ 0.4167, -0.1298, -0.5978],
        [-0.1434,  0.0450,  0.2058],
        ...,
        [-0.1446,  0.0265,  0.2053],
        [ 0.4435, -0.0492, -0.6258],
        [ 0.4531, -0.0280, -0.6367]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2201,lr:0.005,loss:0.8707824349403381
[tensor([[ 0.1384, -0.0752, -0.2023],
        [-0.4167,  0.1298,  0.5978],
        [ 0.1434, -0.0450, -0.2058],
        ...,
        [ 0.1446, -0.0265, -0.2053],
        [-0.4435,  0.0492,  0.6259],
        [-0.4531,  0.0280,  0.6367]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2202,lr:0.005,loss:0.22325758635997772
[tensor([[-0.1384,  0.0752,  0

[tensor([[ 0.1383, -0.0752, -0.2023],
        [-0.4165,  0.1297,  0.5979],
        [ 0.1434, -0.0450, -0.2058],
        ...,
        [ 0.1445, -0.0265, -0.2053],
        [-0.4434,  0.0492,  0.6260],
        [-0.4529,  0.0280,  0.6368]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2244,lr:0.005,loss:0.22326582670211792
[tensor([[-0.1383,  0.0752,  0.2023],
        [ 0.4165, -0.1298, -0.5979],
        [-0.1434,  0.0450,  0.2058],
        ...,
        [-0.1445,  0.0265,  0.2053],
        [ 0.4433, -0.0492, -0.6260],
        [ 0.4529, -0.0280, -0.6368]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2245,lr:0.005,loss:0.8707762956619263
[tensor([[ 0.1383, -0.0752, -0.2023],
        [-0.4165,  0.1297,  0.5979],
        [ 0.1434, -0.0450, -0.2058],
        ...,
        [ 0.1445, -0.0265, -0.2053],
        [-0.4433,  0.0492,  0.6260],
        [-0.4529,  0.0280,  0.6369]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2246,lr:0.005,loss:0.22326652705669403
[tensor([[-0.1383,  0.0752,  

[tensor([[ 0.1382, -0.0752, -0.2024],
        [-0.4163,  0.1297,  0.5981],
        [ 0.1433, -0.0450, -0.2059],
        ...,
        [ 0.1445, -0.0264, -0.2054],
        [-0.4431,  0.0492,  0.6262],
        [-0.4526,  0.0280,  0.6370]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2288,lr:0.005,loss:0.22327132523059845
[tensor([[-0.1382,  0.0752,  0.2024],
        [ 0.4163, -0.1297, -0.5981],
        [-0.1433,  0.0450,  0.2059],
        ...,
        [-0.1445,  0.0264,  0.2054],
        [ 0.4431, -0.0492, -0.6262],
        [ 0.4526, -0.0280, -0.6370]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2289,lr:0.005,loss:0.8707711100578308
[tensor([[ 0.1382, -0.0752, -0.2024],
        [-0.4163,  0.1297,  0.5981],
        [ 0.1433, -0.0450, -0.2059],
        ...,
        [ 0.1445, -0.0264, -0.2054],
        [-0.4431,  0.0492,  0.6262],
        [-0.4526,  0.0280,  0.6370]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2290,lr:0.005,loss:0.22327250242233276
[tensor([[-0.1382,  0.0752,  

epoch:2332,lr:0.005,loss:0.2232847362756729
[tensor([[-0.1382,  0.0752,  0.2024],
        [ 0.4161, -0.1297, -0.5982],
        [-0.1432,  0.0450,  0.2059],
        ...,
        [-0.1444,  0.0264,  0.2054],
        [ 0.4429, -0.0492, -0.6263],
        [ 0.4524, -0.0280, -0.6371]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2333,lr:0.005,loss:0.8707577586174011
[tensor([[ 0.1382, -0.0752, -0.2024],
        [-0.4161,  0.1297,  0.5982],
        [ 0.1432, -0.0450, -0.2059],
        ...,
        [ 0.1444, -0.0264, -0.2054],
        [-0.4429,  0.0492,  0.6263],
        [-0.4524,  0.0280,  0.6372]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2334,lr:0.005,loss:0.22328400611877441
[tensor([[-0.1382,  0.0752,  0.2024],
        [ 0.4161, -0.1297, -0.5982],
        [-0.1432,  0.0450,  0.2059],
        ...,
        [-0.1444,  0.0264,  0.2054],
        [ 0.4429, -0.0492, -0.6263],
        [ 0.4524, -0.0280, -0.6371]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2335,lr:0.005,loss:0.870

[tensor([[ 0.1381, -0.0752, -0.2025],
        [-0.4160,  0.1296,  0.5983],
        [ 0.1432, -0.0450, -0.2060],
        ...,
        [ 0.1444, -0.0264, -0.2055],
        [-0.4428,  0.0492,  0.6264],
        [-0.4523,  0.0280,  0.6373]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2378,lr:0.005,loss:0.22328844666481018
[tensor([[-0.1381,  0.0752,  0.2025],
        [ 0.4160, -0.1297, -0.5983],
        [-0.1432,  0.0450,  0.2060],
        ...,
        [-0.1444,  0.0264,  0.2055],
        [ 0.4428, -0.0492, -0.6264],
        [ 0.4523, -0.0280, -0.6372]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2379,lr:0.005,loss:0.8707498908042908
[tensor([[ 0.1381, -0.0752, -0.2025],
        [-0.4160,  0.1296,  0.5983],
        [ 0.1432, -0.0450, -0.2060],
        ...,
        [ 0.1444, -0.0264, -0.2055],
        [-0.4428,  0.0491,  0.6264],
        [-0.4523,  0.0280,  0.6373]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2380,lr:0.005,loss:0.22329087555408478
[tensor([[-0.1381,  0.0752,  

[tensor([[ 0.1381, -0.0752, -0.2025],
        [-0.4159,  0.1296,  0.5984],
        [ 0.1431, -0.0450, -0.2060],
        ...,
        [ 0.1443, -0.0264, -0.2055],
        [-0.4427,  0.0491,  0.6265],
        [-0.4522,  0.0279,  0.6373]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2428,lr:0.005,loss:0.2232961356639862
[tensor([[-0.1381,  0.0752,  0.2025],
        [ 0.4159, -0.1296, -0.5984],
        [-0.1431,  0.0450,  0.2060],
        ...,
        [-0.1443,  0.0264,  0.2055],
        [ 0.4427, -0.0491, -0.6264],
        [ 0.4522, -0.0279, -0.6373]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2429,lr:0.005,loss:0.8707472681999207
[tensor([[ 0.1381, -0.0752, -0.2025],
        [-0.4159,  0.1296,  0.5984],
        [ 0.1431, -0.0450, -0.2060],
        ...,
        [ 0.1443, -0.0264, -0.2055],
        [-0.4427,  0.0491,  0.6265],
        [-0.4522,  0.0279,  0.6373]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2430,lr:0.005,loss:0.22329668700695038
[tensor([[-0.1381,  0.0752,  0

[tensor([[ 0.1381, -0.0752, -0.2025],
        [-0.4158,  0.1296,  0.5985],
        [ 0.1431, -0.0450, -0.2060],
        ...,
        [ 0.1443, -0.0264, -0.2055],
        [-0.4426,  0.0491,  0.6266],
        [-0.4521,  0.0279,  0.6374]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2476,lr:0.005,loss:0.2233007550239563
[tensor([[-0.1381,  0.0752,  0.2025],
        [ 0.4158, -0.1296, -0.5985],
        [-0.1431,  0.0450,  0.2060],
        ...,
        [-0.1443,  0.0264,  0.2055],
        [ 0.4426, -0.0491, -0.6265],
        [ 0.4521, -0.0279, -0.6374]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2477,lr:0.005,loss:0.8707423806190491
[tensor([[ 0.1381, -0.0752, -0.2025],
        [-0.4158,  0.1296,  0.5985],
        [ 0.1431, -0.0450, -0.2060],
        ...,
        [ 0.1443, -0.0264, -0.2055],
        [-0.4426,  0.0491,  0.6266],
        [-0.4521,  0.0279,  0.6374]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2478,lr:0.005,loss:0.22330237925052643
[tensor([[-0.1381,  0.0752,  0

[tensor([[ 0.1380, -0.0752, -0.2025],
        [-0.4158,  0.1296,  0.5985],
        [ 0.1431, -0.0450, -0.2060],
        ...,
        [ 0.1443, -0.0264, -0.2055],
        [-0.4426,  0.0491,  0.6266],
        [-0.4521,  0.0279,  0.6374]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2502,lr:0.005,loss:0.22330138087272644
[tensor([[-0.1381,  0.0752,  0.2025],
        [ 0.4158, -0.1296, -0.5985],
        [-0.1431,  0.0450,  0.2060],
        ...,
        [-0.1443,  0.0263,  0.2055],
        [ 0.4426, -0.0491, -0.6266],
        [ 0.4521, -0.0279, -0.6374]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2503,lr:0.005,loss:0.8707405924797058
[tensor([[ 0.1380, -0.0752, -0.2025],
        [-0.4158,  0.1296,  0.5985],
        [ 0.1431, -0.0450, -0.2060],
        ...,
        [ 0.1443, -0.0264, -0.2055],
        [-0.4426,  0.0491,  0.6266],
        [-0.4521,  0.0279,  0.6374]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2504,lr:0.005,loss:0.22330279648303986
[tensor([[-0.1381,  0.0752,  

[tensor([[-0.1380,  0.0752,  0.2025],
        [ 0.4156, -0.1296, -0.5986],
        [-0.1431,  0.0450,  0.2061],
        ...,
        [-0.1443,  0.0263,  0.2055],
        [ 0.4424, -0.0491, -0.6266],
        [ 0.4519, -0.0279, -0.6375]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2549,lr:0.005,loss:0.8707280158996582
[tensor([[ 0.1380, -0.0752, -0.2025],
        [-0.4156,  0.1296,  0.5986],
        [ 0.1430, -0.0450, -0.2061],
        ...,
        [ 0.1443, -0.0263, -0.2055],
        [-0.4424,  0.0491,  0.6267],
        [-0.4519,  0.0279,  0.6375]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2550,lr:0.005,loss:0.22331158816814423
[tensor([[-0.1380,  0.0752,  0.2025],
        [ 0.4156, -0.1296, -0.5986],
        [-0.1430,  0.0450,  0.2061],
        ...,
        [-0.1443,  0.0263,  0.2055],
        [ 0.4424, -0.0491, -0.6267],
        [ 0.4519, -0.0279, -0.6375]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2551,lr:0.005,loss:0.8707306981086731
[tensor([[ 0.1380, -0.0752, -0

epoch:2574,lr:0.005,loss:0.22331756353378296
[tensor([[-0.1380,  0.0752,  0.2026],
        [ 0.4156, -0.1296, -0.5986],
        [-0.1430,  0.0450,  0.2061],
        ...,
        [-0.1442,  0.0263,  0.2056],
        [ 0.4424, -0.0491, -0.6267],
        [ 0.4519, -0.0279, -0.6375]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2575,lr:0.005,loss:0.870725691318512
[tensor([[ 0.1380, -0.0752, -0.2026],
        [-0.4156,  0.1295,  0.5986],
        [ 0.1430, -0.0450, -0.2061],
        ...,
        [ 0.1442, -0.0263, -0.2056],
        [-0.4424,  0.0491,  0.6267],
        [-0.4519,  0.0279,  0.6376]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2576,lr:0.005,loss:0.22331756353378296
[tensor([[-0.1380,  0.0752,  0.2026],
        [ 0.4156, -0.1296, -0.5986],
        [-0.1430,  0.0450,  0.2061],
        ...,
        [-0.1442,  0.0263,  0.2056],
        [ 0.4424, -0.0491, -0.6267],
        [ 0.4519, -0.0279, -0.6376]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2577,lr:0.005,loss:0.870

[tensor([[-0.1379,  0.0752,  0.2026],
        [ 0.4154, -0.1296, -0.5987],
        [-0.1430,  0.0450,  0.2061],
        ...,
        [-0.1442,  0.0263,  0.2056],
        [ 0.4422, -0.0491, -0.6268],
        [ 0.4517, -0.0279, -0.6377]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2623,lr:0.005,loss:0.8707106709480286
[tensor([[ 0.1379, -0.0752, -0.2026],
        [-0.4154,  0.1295,  0.5987],
        [ 0.1430, -0.0450, -0.2061],
        ...,
        [ 0.1442, -0.0263, -0.2056],
        [-0.4422,  0.0491,  0.6268],
        [-0.4517,  0.0279,  0.6377]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2624,lr:0.005,loss:0.22333137691020966
[tensor([[-0.1379,  0.0752,  0.2026],
        [ 0.4154, -0.1296, -0.5987],
        [-0.1430,  0.0450,  0.2061],
        ...,
        [-0.1442,  0.0263,  0.2056],
        [ 0.4422, -0.0491, -0.6268],
        [ 0.4517, -0.0279, -0.6377]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2625,lr:0.005,loss:0.8707097172737122
[tensor([[ 0.1379, -0.0752, -0

[tensor([[ 0.1379, -0.0752, -0.2026],
        [-0.4153,  0.1295,  0.5988],
        [ 0.1429, -0.0450, -0.2061],
        ...,
        [ 0.1441, -0.0263, -0.2056],
        [-0.4421,  0.0491,  0.6269],
        [-0.4516,  0.0279,  0.6378]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2672,lr:0.005,loss:0.22333993017673492
[tensor([[-0.1379,  0.0752,  0.2026],
        [ 0.4153, -0.1295, -0.5988],
        [-0.1429,  0.0450,  0.2061],
        ...,
        [-0.1441,  0.0263,  0.2056],
        [ 0.4421, -0.0491, -0.6269],
        [ 0.4516, -0.0279, -0.6378]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2673,lr:0.005,loss:0.870701789855957
[tensor([[ 0.1379, -0.0752, -0.2026],
        [-0.4153,  0.1295,  0.5988],
        [ 0.1429, -0.0450, -0.2061],
        ...,
        [ 0.1441, -0.0263, -0.2056],
        [-0.4421,  0.0491,  0.6269],
        [-0.4516,  0.0279,  0.6378]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2674,lr:0.005,loss:0.2233399599790573
[tensor([[-0.1379,  0.0752,  0.

[tensor([[-0.1379,  0.0752,  0.2026],
        [ 0.4153, -0.1295, -0.5988],
        [-0.1429,  0.0450,  0.2061],
        ...,
        [-0.1441,  0.0263,  0.2056],
        [ 0.4420, -0.0490, -0.6269],
        [ 0.4516, -0.0279, -0.6378]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2715,lr:0.005,loss:0.8706945180892944
[tensor([[ 0.1379, -0.0752, -0.2026],
        [-0.4153,  0.1295,  0.5988],
        [ 0.1429, -0.0450, -0.2061],
        ...,
        [ 0.1441, -0.0263, -0.2056],
        [-0.4421,  0.0490,  0.6269],
        [-0.4516,  0.0279,  0.6378]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2716,lr:0.005,loss:0.22334584593772888
[tensor([[-0.1379,  0.0752,  0.2026],
        [ 0.4153, -0.1295, -0.5988],
        [-0.1429,  0.0450,  0.2061],
        ...,
        [-0.1441,  0.0263,  0.2056],
        [ 0.4421, -0.0490, -0.6269],
        [ 0.4516, -0.0279, -0.6378]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2717,lr:0.005,loss:0.8706954121589661
[tensor([[ 0.1379, -0.0752, -0

[tensor([[-0.1379,  0.0752,  0.2026],
        [ 0.4152, -0.1295, -0.5989],
        [-0.1429,  0.0450,  0.2062],
        ...,
        [-0.1441,  0.0263,  0.2056],
        [ 0.4420, -0.0490, -0.6270],
        [ 0.4515, -0.0279, -0.6378]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2761,lr:0.005,loss:0.8706868886947632
[tensor([[ 0.1379, -0.0752, -0.2026],
        [-0.4152,  0.1295,  0.5989],
        [ 0.1429, -0.0450, -0.2062],
        ...,
        [ 0.1441, -0.0263, -0.2057],
        [-0.4420,  0.0490,  0.6270],
        [-0.4515,  0.0279,  0.6378]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2762,lr:0.005,loss:0.2233573943376541
[tensor([[-0.1379,  0.0752,  0.2026],
        [ 0.4152, -0.1295, -0.5989],
        [-0.1429,  0.0450,  0.2062],
        ...,
        [-0.1441,  0.0263,  0.2057],
        [ 0.4420, -0.0490, -0.6270],
        [ 0.4515, -0.0279, -0.6378]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2763,lr:0.005,loss:0.8706873655319214
[tensor([[ 0.1378, -0.0752, -0.

[tensor([[-0.1378,  0.0752,  0.2027],
        [ 0.4152, -0.1295, -0.5989],
        [-0.1429,  0.0450,  0.2062],
        ...,
        [-0.1441,  0.0263,  0.2057],
        [ 0.4419, -0.0490, -0.6270],
        [ 0.4514, -0.0279, -0.6379]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2787,lr:0.005,loss:0.8706803917884827
[tensor([[ 0.1378, -0.0752, -0.2027],
        [-0.4152,  0.1295,  0.5989],
        [ 0.1429, -0.0450, -0.2062],
        ...,
        [ 0.1441, -0.0263, -0.2057],
        [-0.4419,  0.0490,  0.6270],
        [-0.4514,  0.0279,  0.6379]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2788,lr:0.005,loss:0.22335997223854065
[tensor([[-0.1378,  0.0752,  0.2027],
        [ 0.4152, -0.1295, -0.5989],
        [-0.1429,  0.0450,  0.2062],
        ...,
        [-0.1441,  0.0263,  0.2057],
        [ 0.4419, -0.0490, -0.6270],
        [ 0.4514, -0.0279, -0.6379]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2789,lr:0.005,loss:0.8706819415092468
[tensor([[ 0.1378, -0.0752, -0

[tensor([[ 0.1377, -0.0753, -0.2027],
        [-0.4149,  0.1294,  0.5991],
        [ 0.1428, -0.0450, -0.2062],
        ...,
        [ 0.1440, -0.0263, -0.2057],
        [-0.4417,  0.0491,  0.6272],
        [-0.4512,  0.0279,  0.6380]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2832,lr:0.005,loss:0.22336404025554657
[tensor([[-0.1378,  0.0753,  0.2027],
        [ 0.4149, -0.1295, -0.5991],
        [-0.1428,  0.0450,  0.2062],
        ...,
        [-0.1440,  0.0263,  0.2057],
        [ 0.4417, -0.0491, -0.6272],
        [ 0.4512, -0.0279, -0.6380]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2833,lr:0.005,loss:0.8706774115562439
[tensor([[ 0.1377, -0.0753, -0.2027],
        [-0.4149,  0.1294,  0.5991],
        [ 0.1428, -0.0450, -0.2062],
        ...,
        [ 0.1440, -0.0263, -0.2057],
        [-0.4417,  0.0491,  0.6272],
        [-0.4512,  0.0279,  0.6380]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2834,lr:0.005,loss:0.2233639359474182
[tensor([[-0.1377,  0.0753,  0

[tensor([[-0.1377,  0.0753,  0.2028],
        [ 0.4148, -0.1294, -0.5992],
        [-0.1427,  0.0450,  0.2063],
        ...,
        [-0.1440,  0.0263,  0.2058],
        [ 0.4415, -0.0491, -0.6273],
        [ 0.4510, -0.0279, -0.6382]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2873,lr:0.005,loss:0.8706724047660828
[tensor([[ 0.1377, -0.0753, -0.2027],
        [-0.4148,  0.1294,  0.5992],
        [ 0.1427, -0.0450, -0.2063],
        ...,
        [ 0.1440, -0.0263, -0.2058],
        [-0.4415,  0.0491,  0.6273],
        [-0.4510,  0.0280,  0.6382]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2874,lr:0.005,loss:0.22337205708026886
[tensor([[-0.1377,  0.0753,  0.2028],
        [ 0.4148, -0.1294, -0.5992],
        [-0.1427,  0.0450,  0.2063],
        ...,
        [-0.1440,  0.0263,  0.2058],
        [ 0.4415, -0.0491, -0.6273],
        [ 0.4510, -0.0279, -0.6382]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2875,lr:0.005,loss:0.8706750869750977
[tensor([[ 0.1377, -0.0753, -0

[tensor([[ 0.1376, -0.0753, -0.2028],
        [-0.4147,  0.1294,  0.5993],
        [ 0.1427, -0.0450, -0.2063],
        ...,
        [ 0.1439, -0.0263, -0.2058],
        [-0.4414,  0.0491,  0.6274],
        [-0.4509,  0.0280,  0.6382]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2916,lr:0.005,loss:0.22338028252124786
[tensor([[-0.1377,  0.0753,  0.2028],
        [ 0.4147, -0.1295, -0.5993],
        [-0.1427,  0.0450,  0.2063],
        ...,
        [-0.1439,  0.0263,  0.2058],
        [ 0.4414, -0.0491, -0.6274],
        [ 0.4509, -0.0280, -0.6382]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2917,lr:0.005,loss:0.8706604838371277
[tensor([[ 0.1376, -0.0753, -0.2028],
        [-0.4147,  0.1294,  0.5993],
        [ 0.1427, -0.0450, -0.2063],
        ...,
        [ 0.1439, -0.0263, -0.2058],
        [-0.4414,  0.0491,  0.6274],
        [-0.4509,  0.0280,  0.6382]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2918,lr:0.005,loss:0.2233806848526001
[tensor([[-0.1377,  0.0753,  0

[tensor([[-0.1377,  0.0753,  0.2028],
        [ 0.4147, -0.1295, -0.5993],
        [-0.1427,  0.0450,  0.2063],
        ...,
        [-0.1439,  0.0263,  0.2058],
        [ 0.4414, -0.0491, -0.6274],
        [ 0.4509, -0.0280, -0.6382]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2959,lr:0.005,loss:0.8706466555595398
[tensor([[ 0.1376, -0.0753, -0.2028],
        [-0.4146,  0.1294,  0.5993],
        [ 0.1427, -0.0450, -0.2063],
        ...,
        [ 0.1439, -0.0263, -0.2058],
        [-0.4414,  0.0491,  0.6274],
        [-0.4509,  0.0280,  0.6383]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2960,lr:0.005,loss:0.2233949899673462
[tensor([[-0.1377,  0.0753,  0.2028],
        [ 0.4146, -0.1294, -0.5993],
        [-0.1427,  0.0450,  0.2063],
        ...,
        [-0.1439,  0.0263,  0.2058],
        [ 0.4414, -0.0491, -0.6274],
        [ 0.4509, -0.0280, -0.6382]], device='cuda:0', grad_fn=<AddBackward0>)]
epoch:2961,lr:0.005,loss:0.870647132396698
[tensor([[ 0.1376, -0.0753, -0.2

In [127]:
epsilon= 0.6
data_p = torch.load('data_p_gaussian_{}.pt'.format(epsilon))
target_p = torch.load('target_p_gaussian_{}.pt'.format(epsilon))
#print(data_p)

In [128]:
print(data_p.size())

torch.Size([402, 3])


In [136]:
X_all = torch.cat((X_train.to(device),data_p),0).to('cpu')
y_all = torch.cat((y_train.to(device),target_p),0).to('cpu')
print(X_all[:,0].size())

torch.Size([1072])


In [145]:
for name, param in model.named_parameters():
    if param.requires_grad:
        print(name, param.data)

linear.weight tensor([[-0.0346, -0.2471,  0.0193]], device='cuda:0')
linear.bias tensor([1.2288], device='cuda:0')


In [138]:
train_loader_all = DataLoader(LinearDataset(X_all, y_all), batch_size=batch_size_train, shuffle=False)
device = 'cuda'
optimizer = optim.SGD(model.parameters(), lr=0.1)

In [139]:
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in tqdm(enumerate(train_loader)):
        criterion = torch.nn.BCELoss()
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        #output = model(data)
        loss = criterion(torch.squeeze(output), target)
        loss.backward()
        optimizer.step()
        if batch_idx % 200 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                       100. * batch_idx / len(train_loader), loss.item()))


In [140]:
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            criterion = torch.nn.BCELoss()
            data, target = data.to(device), target.to(device)
            #output = model(data.view(data.size(0), -1))
            output = torch.squeeze(model(data))
            test_loss += criterion(output, target).item()  # sum up batch loss
            pred = torch.squeeze(output).round()  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [141]:
model1 = LogisticRegression(input_dim,output_dim).to(device)
optimizer1 = torch.optim.SGD(model1.parameters(), lr=0.1)

for epoch in range(100):
    train(model1, device, train_loader_all,optimizer1,epoch)
    test(model1,device, test_loader)

2it [00:00, 63.11it/s]



Test set: Average loss: 0.0021, Accuracy: 178/330 (53.94%)



2it [00:00, 51.51it/s]



Test set: Average loss: 0.0020, Accuracy: 195/330 (59.09%)



2it [00:00, 69.43it/s]



Test set: Average loss: 0.0020, Accuracy: 208/330 (63.03%)



2it [00:00, 63.82it/s]



Test set: Average loss: 0.0019, Accuracy: 222/330 (67.27%)



2it [00:00, 69.88it/s]



Test set: Average loss: 0.0018, Accuracy: 237/330 (71.82%)



0it [00:00, ?it/s]



2it [00:00, 56.25it/s]



Test set: Average loss: 0.0018, Accuracy: 245/330 (74.24%)



0it [00:00, ?it/s]



2it [00:00, 69.43it/s]



Test set: Average loss: 0.0017, Accuracy: 256/330 (77.58%)



0it [00:00, ?it/s]



2it [00:00, 21.80it/s]



Test set: Average loss: 0.0016, Accuracy: 267/330 (80.91%)



2it [00:00, 69.58it/s]



Test set: Average loss: 0.0016, Accuracy: 276/330 (83.64%)



2it [00:00, 72.99it/s]







Test set: Average loss: 0.0015, Accuracy: 287/330 (86.97%)



0it [00:00, ?it/s]



2it [00:00, 66.53it/s]



Test set: Average loss: 0.0015, Accuracy: 293/330 (88.79%)



2it [00:00, 51.50it/s]



Test set: Average loss: 0.0014, Accuracy: 305/330 (92.42%)



2it [00:00, 56.43it/s]







Test set: Average loss: 0.0014, Accuracy: 309/330 (93.64%)



2it [00:00, 68.34it/s]



Test set: Average loss: 0.0013, Accuracy: 316/330 (95.76%)



0it [00:00, ?it/s]



2it [00:00, 58.14it/s]



Test set: Average loss: 0.0013, Accuracy: 321/330 (97.27%)



2it [00:00, 52.49it/s]



Test set: Average loss: 0.0013, Accuracy: 325/330 (98.48%)



2it [00:00, 64.38it/s]



Test set: Average loss: 0.0012, Accuracy: 325/330 (98.48%)



0it [00:00, ?it/s]



2it [00:00, 51.93it/s]



Test set: Average loss: 0.0012, Accuracy: 328/330 (99.39%)



2it [00:00, 67.58it/s]



Test set: Average loss: 0.0012, Accuracy: 329/330 (99.70%)



2it [00:00, 63.90it/s]







Test set: Average loss: 0.0011, Accuracy: 329/330 (99.70%)



0it [00:00, ?it/s]



2it [00:00, 63.77it/s]



Test set: Average loss: 0.0011, Accuracy: 329/330 (99.70%)



0it [00:00, ?it/s]



2it [00:00, 56.91it/s]



Test set: Average loss: 0.0011, Accuracy: 328/330 (99.39%)



2it [00:00, 69.92it/s]







Test set: Average loss: 0.0011, Accuracy: 327/330 (99.09%)



2it [00:00, 71.87it/s]







Test set: Average loss: 0.0010, Accuracy: 327/330 (99.09%)



0it [00:00, ?it/s]



2it [00:00, 57.04it/s]



Test set: Average loss: 0.0010, Accuracy: 327/330 (99.09%)



0it [00:00, ?it/s]



2it [00:00, 67.10it/s]



Test set: Average loss: 0.0010, Accuracy: 327/330 (99.09%)



2it [00:00, 69.86it/s]



Test set: Average loss: 0.0010, Accuracy: 327/330 (99.09%)



2it [00:00, 70.90it/s]



Test set: Average loss: 0.0009, Accuracy: 327/330 (99.09%)



2it [00:00, 72.26it/s]



Test set: Average loss: 0.0009, Accuracy: 327/330 (99.09%)



0it [00:00, ?it/s]



2it [00:00, 67.13it/s]



Test set: Average loss: 0.0009, Accuracy: 327/330 (99.09%)



2it [00:00, 74.15it/s]







Test set: Average loss: 0.0009, Accuracy: 327/330 (99.09%)



0it [00:00, ?it/s]



2it [00:00, 58.01it/s]



Test set: Average loss: 0.0009, Accuracy: 327/330 (99.09%)



2it [00:00, 65.39it/s]



Test set: Average loss: 0.0009, Accuracy: 327/330 (99.09%)



2it [00:00, 56.85it/s]







Test set: Average loss: 0.0008, Accuracy: 327/330 (99.09%)



2it [00:00, 68.56it/s]







Test set: Average loss: 0.0008, Accuracy: 327/330 (99.09%)



0it [00:00, ?it/s]



2it [00:00, 61.63it/s]



Test set: Average loss: 0.0008, Accuracy: 327/330 (99.09%)



0it [00:00, ?it/s]



2it [00:00, 63.14it/s]



Test set: Average loss: 0.0008, Accuracy: 327/330 (99.09%)



0it [00:00, ?it/s]



2it [00:00, 65.16it/s]



Test set: Average loss: 0.0008, Accuracy: 327/330 (99.09%)



0it [00:00, ?it/s]



2it [00:00, 63.10it/s]



Test set: Average loss: 0.0008, Accuracy: 327/330 (99.09%)



0it [00:00, ?it/s]



2it [00:00, 64.57it/s]



Test set: Average loss: 0.0008, Accuracy: 327/330 (99.09%)



0it [00:00, ?it/s]



2it [00:00, 65.42it/s]



Test set: Average loss: 0.0007, Accuracy: 327/330 (99.09%)



0it [00:00, ?it/s]



2it [00:00, 63.95it/s]



Test set: Average loss: 0.0007, Accuracy: 327/330 (99.09%)



0it [00:00, ?it/s]



2it [00:00, 63.35it/s]



Test set: Average loss: 0.0007, Accuracy: 327/330 (99.09%)



2it [00:00, 50.87it/s]







Test set: Average loss: 0.0007, Accuracy: 327/330 (99.09%)



2it [00:00, 66.02it/s]







Test set: Average loss: 0.0007, Accuracy: 327/330 (99.09%)



2it [00:00, 66.56it/s]







Test set: Average loss: 0.0007, Accuracy: 327/330 (99.09%)



2it [00:00, 63.95it/s]







Test set: Average loss: 0.0007, Accuracy: 327/330 (99.09%)



0it [00:00, ?it/s]



2it [00:00, 68.97it/s]



Test set: Average loss: 0.0007, Accuracy: 327/330 (99.09%)



0it [00:00, ?it/s]



2it [00:00, 57.62it/s]



Test set: Average loss: 0.0007, Accuracy: 327/330 (99.09%)



2it [00:00, 74.37it/s]







Test set: Average loss: 0.0007, Accuracy: 327/330 (99.09%)



2it [00:00, 72.23it/s]







Test set: Average loss: 0.0006, Accuracy: 327/330 (99.09%)



2it [00:00, 60.85it/s]







Test set: Average loss: 0.0006, Accuracy: 327/330 (99.09%)



2it [00:00, 67.86it/s]







Test set: Average loss: 0.0006, Accuracy: 327/330 (99.09%)



2it [00:00, 59.73it/s]







Test set: Average loss: 0.0006, Accuracy: 327/330 (99.09%)



0it [00:00, ?it/s]



2it [00:00, 68.64it/s]



Test set: Average loss: 0.0006, Accuracy: 327/330 (99.09%)



0it [00:00, ?it/s]



2it [00:00, 62.40it/s]



Test set: Average loss: 0.0006, Accuracy: 327/330 (99.09%)



0it [00:00, ?it/s]



2it [00:00, 66.74it/s]



Test set: Average loss: 0.0006, Accuracy: 327/330 (99.09%)



0it [00:00, ?it/s]



2it [00:00, 63.53it/s]



Test set: Average loss: 0.0006, Accuracy: 327/330 (99.09%)



2it [00:00, 68.63it/s]







Test set: Average loss: 0.0006, Accuracy: 327/330 (99.09%)



2it [00:00, 67.89it/s]







Test set: Average loss: 0.0006, Accuracy: 327/330 (99.09%)



2it [00:00, 59.49it/s]







Test set: Average loss: 0.0006, Accuracy: 327/330 (99.09%)



0it [00:00, ?it/s]



2it [00:00, 61.29it/s]



Test set: Average loss: 0.0006, Accuracy: 327/330 (99.09%)



0it [00:00, ?it/s]



2it [00:00, 63.47it/s]



Test set: Average loss: 0.0006, Accuracy: 327/330 (99.09%)



0it [00:00, ?it/s]



2it [00:00, 68.94it/s]



Test set: Average loss: 0.0006, Accuracy: 327/330 (99.09%)



0it [00:00, ?it/s]



2it [00:00, 69.69it/s]



Test set: Average loss: 0.0006, Accuracy: 327/330 (99.09%)



2it [00:00, 71.27it/s]



Test set: Average loss: 0.0005, Accuracy: 327/330 (99.09%)



0it [00:00, ?it/s]



2it [00:00, 69.87it/s]



Test set: Average loss: 0.0005, Accuracy: 327/330 (99.09%)



0it [00:00, ?it/s]



2it [00:00, 65.53it/s]



Test set: Average loss: 0.0005, Accuracy: 327/330 (99.09%)



0it [00:00, ?it/s]



2it [00:00, 67.08it/s]



Test set: Average loss: 0.0005, Accuracy: 327/330 (99.09%)



0it [00:00, ?it/s]



2it [00:00, 67.57it/s]



Test set: Average loss: 0.0005, Accuracy: 327/330 (99.09%)



0it [00:00, ?it/s]



2it [00:00, 58.88it/s]



Test set: Average loss: 0.0005, Accuracy: 327/330 (99.09%)



2it [00:00, 65.67it/s]







Test set: Average loss: 0.0005, Accuracy: 327/330 (99.09%)



2it [00:00, 66.82it/s]







Test set: Average loss: 0.0005, Accuracy: 327/330 (99.09%)



2it [00:00, 69.47it/s]







Test set: Average loss: 0.0005, Accuracy: 327/330 (99.09%)



0it [00:00, ?it/s]



2it [00:00, 55.75it/s]


Test set: Average loss: 0.0005, Accuracy: 327/330 (99.09%)




0it [00:00, ?it/s]



2it [00:00, 67.89it/s]



Test set: Average loss: 0.0005, Accuracy: 327/330 (99.09%)



0it [00:00, ?it/s]



2it [00:00, 65.65it/s]



Test set: Average loss: 0.0005, Accuracy: 327/330 (99.09%)



0it [00:00, ?it/s]



2it [00:00, 66.16it/s]



Test set: Average loss: 0.0005, Accuracy: 327/330 (99.09%)



0it [00:00, ?it/s]



2it [00:00, 71.20it/s]



Test set: Average loss: 0.0005, Accuracy: 327/330 (99.09%)



2it [00:00, 68.46it/s]


Test set: Average loss: 0.0005, Accuracy: 327/330 (99.09%)




2it [00:00, 70.29it/s]



Test set: Average loss: 0.0005, Accuracy: 327/330 (99.09%)



2it [00:00, 69.52it/s]



Test set: Average loss: 0.0005, Accuracy: 327/330 (99.09%)



0it [00:00, ?it/s]



2it [00:00, 70.50it/s]



Test set: Average loss: 0.0005, Accuracy: 327/330 (99.09%)



0it [00:00, ?it/s]



2it [00:00, 70.58it/s]



Test set: Average loss: 0.0005, Accuracy: 327/330 (99.09%)



2it [00:00, 64.65it/s]



Test set: Average loss: 0.0005, Accuracy: 327/330 (99.09%)



2it [00:00, 66.38it/s]



Test set: Average loss: 0.0005, Accuracy: 327/330 (99.09%)



2it [00:00, 67.93it/s]



Test set: Average loss: 0.0005, Accuracy: 327/330 (99.09%)



2it [00:00, 66.31it/s]



Test set: Average loss: 0.0005, Accuracy: 327/330 (99.09%)



0it [00:00, ?it/s]



2it [00:00, 47.74it/s]



Test set: Average loss: 0.0005, Accuracy: 327/330 (99.09%)



0it [00:00, ?it/s]



2it [00:00, 65.27it/s]



Test set: Average loss: 0.0005, Accuracy: 327/330 (99.09%)



0it [00:00, ?it/s]



2it [00:00, 66.02it/s]



Test set: Average loss: 0.0004, Accuracy: 327/330 (99.09%)



0it [00:00, ?it/s]



2it [00:00, 68.87it/s]



Test set: Average loss: 0.0004, Accuracy: 327/330 (99.09%)



0it [00:00, ?it/s]



2it [00:00, 56.89it/s]



Test set: Average loss: 0.0004, Accuracy: 327/330 (99.09%)



2it [00:00, 65.07it/s]







Test set: Average loss: 0.0004, Accuracy: 327/330 (99.09%)



2it [00:00, 64.81it/s]







Test set: Average loss: 0.0004, Accuracy: 326/330 (98.79%)



2it [00:00, 64.66it/s]







Test set: Average loss: 0.0004, Accuracy: 326/330 (98.79%)



2it [00:00, 68.51it/s]







Test set: Average loss: 0.0004, Accuracy: 326/330 (98.79%)



0it [00:00, ?it/s]



2it [00:00, 59.95it/s]



Test set: Average loss: 0.0004, Accuracy: 326/330 (98.79%)



2it [00:00, 71.25it/s]







Test set: Average loss: 0.0004, Accuracy: 326/330 (98.79%)



2it [00:00, 66.29it/s]


Test set: Average loss: 0.0004, Accuracy: 326/330 (98.79%)






In [142]:
for name, param in model1.named_parameters():
    if param.requires_grad:
        print(name, param.data)

linear.weight tensor([[ 2.2254, -0.0598,  0.0259]], device='cuda:0')
linear.bias tensor([-0.4209], device='cuda:0')
