In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from itertools import islice


In [2]:
def weights_init(m):
    for name, param in m.named_parameters(): 
        nn.init.normal_(param)
        print(name)

In [3]:
# Reading in file and adding it to array to one hot encode. 
#file = open('samplemusic.txt')
file = open('train.txt')
tag = False
part_tag = False
array = []
tag_string = ''
while 1:
    char = file.read(1)  # read by character
    if not char: break
        
    # if char is beginning of tag
    if char == '<':
        part_tag = True
        tag_string += char
        
    # if char is end of tag
    elif char == '>':
        # Checks if > is part of <start>/<end> tags or lone char
        if tag_string == '<start' or tag_string == '<end':
            tag = False
            tag_string += char
            array.append(tag_string)
        # > is not part of tag, just lone char so we append it and reset the tag_string
        else: 
            array.append(char)
        tag_string = ''
        
    
    # checks if char is still part of tag
    elif part_tag == True:
        # If next char after < is either s or e, we assume this is a tag.
        if char == 's' or char == 'e':
            tag = True
            tag_string += char
        # If not, < is just a lone char so append both separately
        else:
            array.append(tag_string)
            array.append(char)
            tag_string = ''
        part_tag = False
        
    # If inside tag, keep adding chars to create full tag
    elif tag == True:
        tag_string += char
            
    # if char is not part of tag, just add char
    elif tag == False:
        array.append(char)
    
file.close()

In [4]:
# One-hot encoding of data
array = np.array(array).reshape(-1,1)
print('Shape or original data array:', array.shape)
print('Number of unique chars in array:', len(np.unique(array)))
onehot_encoder = OneHotEncoder(sparse=False)
X_data = onehot_encoder.fit_transform(array) # Contains One hot encoding of all chars in text 

# Example of inversing one hot to get letter
inverted = onehot_encoder.inverse_transform(X_data[0, :].reshape(1,-1))
print(inverted)
print('Shape of one-hot encoded data, X_data:', X_data.shape)
num_chars = X_data.shape[0]
num_hot_encoding = X_data.shape[1]

Shape or original data array: (185410, 1)
Number of unique chars in array: 153
[['<start>']]
Shape of one-hot encoded data, X_data: (185410, 153)


In [5]:
# Turning songs from data into chunks 
train_data = []
song = []
temp = []
for idx in range(len(X_data)):
    temp.append(X_data[idx]) # Add 100 chars into temp to create chunks 
    chara = onehot_encoder.inverse_transform(X_data[idx, :].reshape(1,-1))[0][0]
    # Checks if char is at the end of a song 
    #print(chara)
    if chara == '<end>':
        song.append(np.array(temp))
        train_data.append(np.array(song))
        temp = []
        song = []
    
    # If 100 chars in temp, add chunk to train_data
    if len(temp) == 100:
        song.append(np.array(temp))
        temp = []

train_data = np.array(train_data)
print(train_data.shape[0])

333


In [6]:
class LSTM(nn.Module):

    def __init__(self, num_classes, input_size, hidden_size, num_layers):
        super(LSTM, self).__init__()

        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        #self.sequence_length = sequence_length

        self.lstm = nn.LSTM(input_size=self.input_size, hidden_size=self.hidden_size, batch_first=True)

    def forward(self, x, *args):
        # Initialize hidden and cell states
        # (num_layers * num_directions, batch, hidden_size) for batch_first=True
        hidden = args[0][0]
        cell = args[0][1]
        # Reshape input
        x.view(1, x.shape[1], self.input_size)
        out, (hidden, cell) = self.lstm(x, (hidden,cell))
        return out.view(-1, num_classes), (hidden,cell)
    
    def init_hidden(self):
        if torch.cuda.is_available():
            hidden = torch.zeros(self.num_layers, 1, self.hidden_size).cuda()
            cell = torch.zeros(self.num_layers, 1, self.hidden_size).cuda()
        else:
            hidden = torch.zeros(self.num_layers, 1, self.hidden_size)
            cell = torch.zeros(self.num_layers, 1, self.hidden_size)


        return (Variable(hidden), Variable(cell))

In [7]:
# Check if your system supports CUDA
use_cuda = torch.cuda.is_available()

# Setup GPU optimization if CUDA is supported
if use_cuda:
    device = torch.device("cuda")
    extras = {"num_workers": 1, "pin_memory": True}
    print("CUDA is supported")
else: # Otherwise, train on the CPU
    computing_device = torch.device("cpu")
    extras = False
    print("CUDA NOT supported")

CUDA is supported


In [25]:
torch.manual_seed(777)  # reproducibility
num_classes = num_hot_encoding
input_size = num_hot_encoding  # one-hot size
hidden_size = num_hot_encoding  # output from the LSTM. 5 to directly predict one-hot
batch_size = 1   # one sentence
#sequence_length = 100  # One hundred sized chunks of text 
num_layers = 1  # one-layer rnn

# Instantiate RNN model
lstm = LSTM(num_classes, input_size, hidden_size, num_layers).to(device)
lstm.apply(weights_init)
print(lstm)

# Set loss and optimizer function
# CrossEntropyLoss = LogSoftmax + NLLLoss
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(lstm.parameters(), lr=0.01)


# Train the model
for epoch in range(1):
    correct = 0
    total = 0
    song_count = 0
    song_loss = []
    # Loop through every song in the dataset
    for song in train_data:
        hidden_cont = None
        chunk_loss = 0.0
        #optimizer.zero_grad()
        # Loop through every chunk, songs are broken up into chunks 
        for chunk_count, chunk in enumerate(song):
            targets = np.roll(chunk, -1, axis=0) # Targets is the chunk left shifted once
            #print('chunk', chunk)
            #print('targets', targets)
            # If chunk is first one in song, init hidden state, else keep hidden state for next chunk
            if chunk_count == 0:
                #print('init')
                hidden = lstm.init_hidden() # Inits hidden state
            else:
                #print('cont')
                hidden = hidden_cont
                
            optimizer.zero_grad()
            letter = torch.Tensor(chunk).view(1,-1,153).to(device)
            target_letter = torch.LongTensor(targets).view(-1,153).to(device)
            output, hidden = lstm(letter, hidden)
            hidden_cont = (hidden[0].detach(), hidden[1].detach())
            #print('out:', torch.argmax(output, dim=1))
            #print('targ:', torch.argmax(target_letter, dim=1))
            loss = criterion(output, torch.argmax(target_letter, dim=1))
            loss.backward()
            optimizer.step()
            chunk_loss += loss.item()
            
            #trained_weights = lstm.lstm.weight_ih_l0
            #print(trained_weights)
            
        #print("Finished", song_count + 1, "/333 songs of training")
        #print('Avg chunk loss: %.3f' % (chunk_loss/chunk_count+1))
        song_loss.append(chunk_loss/(chunk_count+1))
        chunk_loss = 0.0
        song_count += 1
        
     
    avg_song_loss = np.mean(song_loss)
    print("epoch: %d, avg loss: %1.3f" % (epoch + 1, avg_song_loss))

print("Learning finished!")

weight_ih_l0
weight_hh_l0
bias_ih_l0
bias_hh_l0
lstm.weight_ih_l0
lstm.weight_hh_l0
lstm.bias_ih_l0
lstm.bias_hh_l0
LSTM(
  (lstm): LSTM(153, 153, batch_first=True)
)
Parameter containing:
tensor([[ 0.4473, -1.1195, -1.9564,  ..., -0.1344, -0.3040, -1.2460],
        [-0.1194,  0.4202, -1.5847,  ...,  0.7577, -0.1025, -0.9679],
        [ 1.1497, -1.5898,  1.1344,  ...,  0.4415, -0.1360,  0.7439],
        ...,
        [ 0.1148,  1.6949,  1.3711,  ...,  0.4732,  0.2266, -0.0565],
        [-0.8032,  0.3582, -1.7010,  ...,  0.6959,  0.2755,  0.3027],
        [ 0.7656,  0.0362, -1.4344,  ..., -0.6544,  0.7901, -0.5764]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.1262, -1.9564,  ..., -0.1270, -0.3114, -1.2460],
        [-0.1194,  0.4269, -1.5847,  ...,  0.7652, -0.0950, -0.9679],
        [ 1.1497, -1.5965,  1.1344,  ...,  0.4489, -0.1434,  0.7439],
        ...,
        [ 0.1148,  1.7016,  1.3711,  ...,  0.4807,  0.2340, -0.0565],
        [-0.8032, 

Parameter containing:
tensor([[ 0.4473, -1.1557, -1.9564,  ..., -0.0883, -0.2706, -1.2460],
        [-0.1194,  0.4361, -1.5847,  ...,  0.8099, -0.1359, -0.9679],
        [ 1.1497, -1.6310,  1.1344,  ...,  0.4781, -0.1026,  0.7439],
        ...,
        [ 0.1148,  1.7311,  1.3711,  ...,  0.5339,  0.1932, -0.0565],
        [-0.8032,  0.3917, -1.7010,  ...,  0.6503,  0.2421,  0.3027],
        [ 0.7656, -0.0036, -1.4344,  ..., -0.7150,  0.7418, -0.5764]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.1564, -1.9564,  ..., -0.0866, -0.2694, -1.2460],
        [-0.1194,  0.4358, -1.5847,  ...,  0.8116, -0.1370, -0.9679],
        [ 1.1497, -1.6320,  1.1344,  ...,  0.4798, -0.1014,  0.7439],
        ...,
        [ 0.1148,  1.7318,  1.3711,  ...,  0.5356,  0.1921, -0.0565],
        [-0.8032,  0.3923, -1.7010,  ...,  0.6486,  0.2410,  0.3027],
        [ 0.7656, -0.0046, -1.4344,  ..., -0.7167,  0.7406, -0.5764]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.1652, -1.9564,  ..., -0.1056, -0.2586, -1.2460],
        [-0.1194,  0.4330, -1.5847,  ...,  0.7957, -0.1722, -0.9679],
        [ 1.1497, -1.5927,  1.1344,  ...,  0.4960, -0.0903,  0.7439],
        ...,
        [ 0.1148,  1.7234,  1.3711,  ...,  0.5471,  0.1812, -0.0565],
        [-0.8032,  0.4014, -1.7010,  ...,  0.6369,  0.2301,  0.3027],
        [ 0.7656, -0.0140, -1.4344,  ..., -0.6810,  0.7298, -0.5764]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.1653, -1.9564,  ..., -0.1121, -0.2585, -1.2460],
        [-0.1194,  0.4330, -1.5847,  ...,  0.7903, -0.1758, -0.9679],
        [ 1.1497, -1.5916,  1.1344,  ...,  0.5030, -0.0903,  0.7439],
        ...,
        [ 0.1148,  1.7231,  1.3711,  ...,  0.5540,  0.1812, -0.0565],
        [-0.8032,  0.4014, -1.7010,  ...,  0.6321,  0.2300,  0.3027],
        [ 0.7656, -0.0145, -1.4344,  ..., -0.6879,  0.7298, -0.5764]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.1659, -1.9564,  ..., -0.1222, -0.2581, -1.2460],
        [-0.1194,  0.4330, -1.5847,  ...,  0.7527, -0.2096, -0.9679],
        [ 1.1497, -1.5814,  1.1344,  ...,  0.5677, -0.0894,  0.7439],
        ...,
        [ 0.1148,  1.7202,  1.3711,  ...,  0.6181,  0.1807, -0.0565],
        [-0.8032,  0.4022, -1.7010,  ...,  0.5882,  0.2295,  0.3027],
        [ 0.7656, -0.0184, -1.4344,  ..., -0.7583,  0.7293, -0.5764]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.1659, -1.9564,  ..., -0.1204, -0.2581, -1.2460],
        [-0.1194,  0.4330, -1.5847,  ...,  0.7527, -0.2098, -0.9679],
        [ 1.1497, -1.5814,  1.1344,  ...,  0.5680, -0.0894,  0.7439],
        ...,
        [ 0.1148,  1.7202,  1.3711,  ...,  0.6185,  0.1807, -0.0565],
        [-0.8032,  0.4022, -1.7010,  ...,  0.5879,  0.2295,  0.3027],
        [ 0.7656, -0.0185, -1.4344,  ..., -0.7589,  0.7293, -0.5764]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.1663, -1.9564,  ..., -0.1046, -0.2581, -1.2460],
        [-0.1194,  0.4331, -1.5847,  ...,  0.7315, -0.2116, -0.9679],
        [ 1.1497, -1.5808,  1.1344,  ...,  0.5721, -0.0894,  0.7439],
        ...,
        [ 0.1148,  1.7205,  1.3711,  ...,  0.6244,  0.1807, -0.0565],
        [-0.8032,  0.4021, -1.7010,  ...,  0.5200,  0.2295,  0.3027],
        [ 0.7656, -0.0189, -1.4344,  ..., -0.7615,  0.7293, -0.5764]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.1663, -1.9564,  ..., -0.1045, -0.2581, -1.2460],
        [-0.1194,  0.4331, -1.5847,  ...,  0.7309, -0.2116, -0.9679],
        [ 1.1497, -1.5808,  1.1344,  ...,  0.5721, -0.0894,  0.7439],
        ...,
        [ 0.1148,  1.7205,  1.3711,  ...,  0.6245,  0.1807, -0.0565],
        [-0.8032,  0.4021, -1.7010,  ...,  0.5193,  0.2295,  0.3027],
        [ 0.7656, -0.0189, -1.4344,  ..., -0.7629,  0.7293, -0.5764]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.1808, -1.9564,  ..., -0.0592, -0.2581, -1.2460],
        [-0.1194,  0.4338, -1.5847,  ...,  0.6914, -0.2117, -0.9679],
        [ 1.1497, -1.6934,  1.1344,  ...,  0.5724, -0.0894,  0.7439],
        ...,
        [ 0.1148,  1.7013,  1.3711,  ...,  0.6006,  0.1807, -0.0565],
        [-0.8032,  0.4156, -1.7010,  ...,  0.4947,  0.2295,  0.3027],
        [ 0.7656, -0.1020, -1.4344,  ..., -0.7873,  0.7293, -0.5764]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.1805, -1.9564,  ..., -0.0587, -0.2581, -1.2460],
        [-0.1194,  0.4338, -1.5847,  ...,  0.6911, -0.2117, -0.9679],
        [ 1.1497, -1.6950,  1.1344,  ...,  0.5724, -0.0894,  0.7439],
        ...,
        [ 0.1148,  1.7013,  1.3711,  ...,  0.6003,  0.1807, -0.0565],
        [-0.8032,  0.4155, -1.7010,  ...,  0.4946,  0.2295,  0.3027],
        [ 0.7656, -0.1037, -1.4344,  ..., -0.7874,  0.7293, -0.5764]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.1659, -1.9564,  ..., -0.0552, -0.2581, -1.2460],
        [-0.1194,  0.4343, -1.5847,  ...,  0.6886, -0.2117, -0.9679],
        [ 1.1497, -1.7077,  1.1344,  ...,  0.5725, -0.0894,  0.7439],
        ...,
        [ 0.1148,  1.6984,  1.3711,  ...,  0.5891,  0.1807, -0.0565],
        [-0.8032,  0.4183, -1.7010,  ...,  0.4985,  0.2295,  0.3027],
        [ 0.7656, -0.1442, -1.4344,  ..., -0.7951,  0.7293, -0.5764]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.1649, -1.9564,  ..., -0.0554, -0.2581, -1.2460],
        [-0.1194,  0.4344, -1.5847,  ...,  0.6886, -0.2117, -0.9679],
        [ 1.1497, -1.7078,  1.1344,  ...,  0.5725, -0.0894,  0.7439],
        ...,
        [ 0.1148,  1.6984,  1.3711,  ...,  0.5866,  0.1807, -0.0565],
        [-0.8032,  0.4186, -1.7010,  ...,  0.4994,  0.2295,  0.3027],
        [ 0.7656, -0.1476, -1.4344,  ..., -0.7969,  0.7293, -0.5764]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.1061, -1.9564,  ..., -0.0569, -0.2581, -1.2460],
        [-0.1194,  0.4687, -1.5847,  ...,  0.6907, -0.2117, -0.9679],
        [ 1.1497, -1.7315,  1.1344,  ...,  0.5727, -0.0894,  0.7439],
        ...,
        [ 0.1148,  1.7145,  1.3711,  ...,  0.5649,  0.1807, -0.0565],
        [-0.8032,  0.3965, -1.7010,  ...,  0.5148,  0.2295,  0.3027],
        [ 0.7656, -0.1643, -1.4344,  ..., -0.8111,  0.7293, -0.5764]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.1025, -1.9564,  ..., -0.0566, -0.2581, -1.2460],
        [-0.1194,  0.4692, -1.5847,  ...,  0.6916, -0.2117, -0.9679],
        [ 1.1497, -1.7335,  1.1344,  ...,  0.5727, -0.0894,  0.7439],
        ...,
        [ 0.1148,  1.7156,  1.3711,  ...,  0.5649,  0.1807, -0.0565],
        [-0.8032,  0.3946, -1.7010,  ...,  0.5150,  0.2295,  0.3027],
        [ 0.7656, -0.1645, -1.4344,  ..., -0.8103,  0.7293, -0.5764]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.0629, -1.9564,  ..., -0.0612, -0.2581, -1.2460],
        [-0.1194,  0.4720, -1.5847,  ...,  0.6604, -0.2117, -0.9679],
        [ 1.1497, -1.7510,  1.1344,  ...,  0.5747, -0.0894,  0.7439],
        ...,
        [ 0.1148,  1.7259,  1.3711,  ...,  0.5118,  0.1807, -0.0565],
        [-0.8032,  0.3839, -1.7010,  ...,  0.5085,  0.2295,  0.3027],
        [ 0.7656, -0.1546, -1.4344,  ..., -0.8007,  0.7293, -0.5764]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.0624, -1.9564,  ..., -0.0614, -0.2581, -1.2460],
        [-0.1194,  0.4718, -1.5847,  ...,  0.6532, -0.2117, -0.9679],
        [ 1.1497, -1.7511,  1.1344,  ...,  0.5748, -0.0894,  0.7439],
        ...,
        [ 0.1148,  1.7260,  1.3711,  ...,  0.5117,  0.1807, -0.0565],
        [-0.8032,  0.3839, -1.7010,  ...,  0.5085,  0.2295,  0.3027],
        [ 0.7656, -0.1548, -1.4344,  ..., -0.8002,  0.7293, -0.5764]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.0586, -1.9564,  ..., -0.0630, -0.2581, -1.2460],
        [-0.1194,  0.4705, -1.5847,  ...,  0.5992, -0.2117, -0.9679],
        [ 1.1497, -1.7547,  1.1344,  ...,  0.5748, -0.0894,  0.7439],
        ...,
        [ 0.1148,  1.7264,  1.3711,  ...,  0.5245,  0.1807, -0.0565],
        [-0.8032,  0.3789, -1.7010,  ...,  0.5082,  0.2295,  0.3027],
        [ 0.7656, -0.1489, -1.4344,  ..., -0.7966,  0.7293, -0.5764]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.0583, -1.9564,  ..., -0.0630, -0.2581, -1.2460],
        [-0.1194,  0.4704, -1.5847,  ...,  0.5980, -0.2117, -0.9679],
        [ 1.1497, -1.7551,  1.1344,  ...,  0.5748, -0.0894,  0.7439],
        ...,
        [ 0.1148,  1.7263,  1.3711,  ...,  0.5251,  0.1807, -0.0565],
        [-0.8032,  0.3784, -1.7010,  ...,  0.5082,  0.2295,  0.3027],
        [ 0.7656, -0.1475, -1.4344,  ..., -0.7966,  0.7293, -0.5764]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.2023, -1.9564,  ..., -0.0633, -0.2581, -1.2460],
        [-0.1194,  0.4690, -1.5847,  ...,  0.5748, -0.2117, -0.9679],
        [ 1.1497, -1.7544,  1.1344,  ...,  0.5748, -0.0894,  0.7439],
        ...,
        [ 0.1148,  1.7110,  1.3711,  ...,  0.5330,  0.1807, -0.0565],
        [-0.8032,  0.3653, -1.7010,  ...,  0.5109,  0.2295,  0.3027],
        [ 0.7656, -0.0992, -1.4344,  ..., -0.7984,  0.7293, -0.5764]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.2109, -1.9564,  ..., -0.0629, -0.2581, -1.2460],
        [-0.1194,  0.4687, -1.5847,  ...,  0.5742, -0.2117, -0.9679],
        [ 1.1497, -1.7598,  1.1344,  ...,  0.5748, -0.0894,  0.7439],
        ...,
        [ 0.1148,  1.7085,  1.3711,  ...,  0.5333,  0.1807, -0.0565],
        [-0.8032,  0.3646, -1.7010,  ...,  0.5123,  0.2295,  0.3027],
        [ 0.7656, -0.0960, -1.4344,  ..., -0.7985,  0.7293, -0.5764]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.2930, -1.9564,  ..., -0.2062, -0.2581, -1.2460],
        [-0.1194,  0.3241, -1.5847,  ...,  0.4784, -0.1855, -0.9679],
        [ 1.1497, -1.7919,  1.1344,  ...,  0.5751, -0.0892,  0.7439],
        ...,
        [ 0.1148,  1.7640,  1.3711,  ...,  0.5627,  0.1819, -0.0565],
        [-0.8032,  0.3433, -1.7010,  ...,  0.6327,  0.2270,  0.3027],
        [ 0.7656, -0.0451, -1.4344,  ..., -0.7957,  0.6342, -0.5764]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.2934, -1.9564,  ..., -0.2074, -0.2581, -1.2460],
        [-0.1194,  0.3221, -1.5847,  ...,  0.4776, -0.1843, -0.9679],
        [ 1.1497, -1.7920,  1.1344,  ...,  0.5751, -0.0892,  0.7439],
        ...,
        [ 0.1148,  1.7651,  1.3711,  ...,  0.5629,  0.1819, -0.0565],
        [-0.8032,  0.3429, -1.7010,  ...,  0.6337,  0.2269,  0.3027],
        [ 0.7656, -0.0442, -1.4344,  ..., -0.7956,  0.6298, -0.5764]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.2961, -1.9564,  ..., -0.2189, -0.2581, -1.2460],
        [-0.1194,  0.3044, -1.5847,  ...,  0.4740, -0.1737, -0.9679],
        [ 1.1497, -1.7935,  1.1344,  ...,  0.5751, -0.0892,  0.7439],
        ...,
        [ 0.1148,  1.7755,  1.3711,  ...,  0.5651,  0.1824, -0.0565],
        [-0.8032,  0.3414, -1.7010,  ...,  0.7328,  0.2259,  0.3027],
        [ 0.7656, -0.0369, -1.4344,  ..., -0.7706,  0.5909, -0.5764]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.2955, -1.9564,  ..., -0.2190, -0.2581, -1.2460],
        [-0.1194,  0.3043, -1.5847,  ...,  0.4740, -0.1736, -0.9679],
        [ 1.1497, -1.7942,  1.1344,  ...,  0.5751, -0.0892,  0.7439],
        ...,
        [ 0.1148,  1.7757,  1.3711,  ...,  0.5651,  0.1824, -0.0565],
        [-0.8032,  0.3421, -1.7010,  ...,  0.7339,  0.2259,  0.3027],
        [ 0.7656, -0.0370, -1.4344,  ..., -0.7697,  0.5907, -0.5764]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.2975, -1.9564,  ..., -0.2195, -0.2581, -1.2460],
        [-0.1194,  0.3037, -1.5847,  ...,  0.4742, -0.1733, -0.9679],
        [ 1.1497, -1.8008,  1.1344,  ...,  0.5749, -0.0892,  0.7439],
        ...,
        [ 0.1148,  1.7806,  1.3711,  ...,  0.5653,  0.1824, -0.0565],
        [-0.8032,  0.3463, -1.7010,  ...,  0.7449,  0.2259,  0.3027],
        [ 0.7656, -0.0332, -1.4344,  ..., -0.7621,  0.5892, -0.5764]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.2976, -1.9564,  ..., -0.2195, -0.2581, -1.2460],
        [-0.1194,  0.3037, -1.5847,  ...,  0.4742, -0.1733, -0.9679],
        [ 1.1497, -1.8010,  1.1344,  ...,  0.5749, -0.0892,  0.7439],
        ...,
        [ 0.1148,  1.7806,  1.3711,  ...,  0.5653,  0.1824, -0.0565],
        [-0.8032,  0.3462, -1.7010,  ...,  0.7452,  0.2259,  0.3027],
        [ 0.7656, -0.0330, -1.4344,  ..., -0.7621,  0.5892, -0.5764]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.3476, -1.9564,  ..., -0.2195, -0.2581, -1.2460],
        [-0.1194,  0.3050, -1.5847,  ...,  0.4687, -0.1749, -0.9679],
        [ 1.1497, -1.8345,  1.1344,  ...,  0.5685, -0.0892,  0.7439],
        ...,
        [ 0.1148,  1.7806,  1.3711,  ...,  0.5641,  0.1817, -0.0565],
        [-0.8032,  0.3191, -1.7010,  ...,  0.6906,  0.2106,  0.3027],
        [ 0.7656, -0.0250, -1.4344,  ..., -0.6972,  0.5919, -0.5764]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.3485, -1.9564,  ..., -0.2195, -0.2581, -1.2460],
        [-0.1194,  0.3051, -1.5847,  ...,  0.4684, -0.1749, -0.9679],
        [ 1.1497, -1.8361,  1.1344,  ...,  0.5682, -0.0892,  0.7439],
        ...,
        [ 0.1148,  1.7800,  1.3711,  ...,  0.5640,  0.1817, -0.0565],
        [-0.8032,  0.3176, -1.7010,  ...,  0.6861,  0.2105,  0.3027],
        [ 0.7656, -0.0259, -1.4344,  ..., -0.6924,  0.5919, -0.5764]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.3565, -1.9564,  ..., -0.2196, -0.2456, -1.2460],
        [-0.1194,  0.3108, -1.5847,  ...,  0.4657, -0.2910, -0.9679],
        [ 1.1497, -1.8467,  1.1344,  ...,  0.5657, -0.0916,  0.7439],
        ...,
        [ 0.1148,  1.6336,  1.3711,  ...,  0.5641,  0.2429, -0.0565],
        [-0.8032,  0.2237, -1.7010,  ...,  0.6460,  0.1975,  0.3027],
        [ 0.7656, -0.0527, -1.4344,  ..., -0.6505,  0.6747, -0.5764]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.3566, -1.9564,  ..., -0.2196, -0.2455, -1.2460],
        [-0.1194,  0.3108, -1.5847,  ...,  0.4657, -0.2919, -0.9679],
        [ 1.1497, -1.8467,  1.1344,  ...,  0.5657, -0.0916,  0.7439],
        ...,
        [ 0.1148,  1.6322,  1.3711,  ...,  0.5642,  0.2434, -0.0565],
        [-0.8032,  0.2231, -1.7010,  ...,  0.6459,  0.1974,  0.3027],
        [ 0.7656, -0.0527, -1.4344,  ..., -0.6503,  0.6753, -0.5764]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.3850, -1.9564,  ..., -0.2196, -0.2447, -1.2460],
        [-0.1194,  0.3109, -1.5847,  ...,  0.4939, -0.3000, -0.9679],
        [ 1.1497, -1.8399,  1.1344,  ...,  0.5659, -0.0917,  0.7439],
        ...,
        [ 0.1148,  1.6197,  1.3711,  ...,  0.5629,  0.2476, -0.0565],
        [-0.8032,  0.2276, -1.7010,  ...,  0.6444,  0.1965,  0.3027],
        [ 0.7656, -0.0077, -1.4344,  ..., -0.6490,  0.6811, -0.5764]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.3852, -1.9564,  ..., -0.2196, -0.2447, -1.2460],
        [-0.1194,  0.3108, -1.5847,  ...,  0.4948, -0.3000, -0.9679],
        [ 1.1497, -1.8399,  1.1344,  ...,  0.5659, -0.0917,  0.7439],
        ...,
        [ 0.1148,  1.6196,  1.3711,  ...,  0.5628,  0.2476, -0.0565],
        [-0.8032,  0.2276, -1.7010,  ...,  0.6443,  0.1965,  0.3027],
        [ 0.7656, -0.0074, -1.4344,  ..., -0.6490,  0.6811, -0.5764]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.3779, -1.9564,  ..., -0.2195, -0.2446, -1.2460],
        [-0.1194,  0.3115, -1.5847,  ...,  0.5021, -0.3003, -0.9679],
        [ 1.1497, -1.8353,  1.1344,  ...,  0.5660, -0.0917,  0.7439],
        ...,
        [ 0.1148,  1.6202,  1.3711,  ...,  0.5623,  0.2478, -0.0565],
        [-0.8032,  0.2349, -1.7010,  ...,  0.6439,  0.1965,  0.3027],
        [ 0.7656, -0.0914, -1.4344,  ..., -0.6491,  0.6813, -0.5764]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.3777, -1.9564,  ..., -0.2195, -0.2446, -1.2460],
        [-0.1194,  0.3116, -1.5847,  ...,  0.5022, -0.3003, -0.9679],
        [ 1.1497, -1.8353,  1.1344,  ...,  0.5660, -0.0917,  0.7439],
        ...,
        [ 0.1148,  1.6203,  1.3711,  ...,  0.5623,  0.2478, -0.0565],
        [-0.8032,  0.2351, -1.7010,  ...,  0.6438,  0.1965,  0.3027],
        [ 0.7656, -0.0931, -1.4344,  ..., -0.6491,  0.6813, -0.5764]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.3761, -1.9564,  ..., -0.2195, -0.2446, -1.2460],
        [-0.1194,  0.3119, -1.5847,  ...,  0.5025, -0.3003, -0.9679],
        [ 1.1497, -1.8351,  1.1344,  ...,  0.5660, -0.0917,  0.7439],
        ...,
        [ 0.1148,  1.6207,  1.3711,  ...,  0.5626,  0.2478, -0.0565],
        [-0.8032,  0.2365, -1.7010,  ...,  0.6426,  0.1965,  0.3027],
        [ 0.7656, -0.1079, -1.4344,  ..., -0.6490,  0.6813, -0.5764]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.3761, -1.9564,  ..., -0.2195, -0.2446, -1.2460],
        [-0.1194,  0.3119, -1.5847,  ...,  0.5025, -0.3003, -0.9679],
        [ 1.1497, -1.8351,  1.1344,  ...,  0.5660, -0.0917,  0.7439],
        ...,
        [ 0.1148,  1.6207,  1.3711,  ...,  0.5627,  0.2478, -0.0565],
        [-0.8032,  0.2365, -1.7010,  ...,  0.6425,  0.1965,  0.3027],
        [ 0.7656, -0.1080, -1.4344,  ..., -0.6490,  0.6813, -0.5764]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.3946, -1.9564,  ..., -0.2201, -0.2446, -1.2460],
        [-0.1194,  0.3121, -1.5847,  ...,  0.5178, -0.3003, -0.9679],
        [ 1.1497, -1.8570,  1.1344,  ...,  0.5667, -0.0917,  0.7439],
        ...,
        [ 0.1148,  1.6317,  1.3711,  ...,  0.5659,  0.2478, -0.0565],
        [-0.8032,  0.2254, -1.7010,  ...,  0.6525,  0.1965,  0.3027],
        [ 0.7656, -0.1339, -1.4344,  ..., -0.6419,  0.6813, -0.5764]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.3953, -1.9564,  ..., -0.2201, -0.2446, -1.2460],
        [-0.1194,  0.3121, -1.5847,  ...,  0.5178, -0.3003, -0.9679],
        [ 1.1497, -1.8578,  1.1344,  ...,  0.5667, -0.0917,  0.7439],
        ...,
        [ 0.1148,  1.6321,  1.3711,  ...,  0.5659,  0.2478, -0.0565],
        [-0.8032,  0.2251, -1.7010,  ...,  0.6535,  0.1965,  0.3027],
        [ 0.7656, -0.1353, -1.4344,  ..., -0.6417,  0.6813, -0.5764]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.2469, -1.9564,  ..., -0.2202, -0.2446, -1.2460],
        [-0.1194,  0.3289, -1.5847,  ...,  0.5178, -0.3003, -0.9679],
        [ 1.1497, -1.7663,  1.1344,  ...,  0.5664, -0.0917,  0.7439],
        ...,
        [ 0.1148,  1.6929,  1.3711,  ...,  0.5687,  0.2478, -0.0565],
        [-0.8032,  0.1480, -1.7010,  ...,  0.6642,  0.1965,  0.3027],
        [ 0.7656, -0.2870, -1.4344,  ..., -0.6402,  0.6813, -0.5764]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.2431, -1.9564,  ..., -0.2202, -0.2446, -1.2460],
        [-0.1194,  0.3293, -1.5847,  ...,  0.5178, -0.3003, -0.9679],
        [ 1.1497, -1.7638,  1.1344,  ...,  0.5664, -0.0917,  0.7439],
        ...,
        [ 0.1148,  1.6947,  1.3711,  ...,  0.5688,  0.2478, -0.0565],
        [-0.8032,  0.1459, -1.7010,  ...,  0.6643,  0.1965,  0.3027],
        [ 0.7656, -0.2913, -1.4344,  ..., -0.6402,  0.6813, -0.5764]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.2164, -1.9564,  ..., -0.2202, -0.2446, -1.4244],
        [-0.1194,  0.3325, -1.5847,  ...,  0.5178, -0.3003, -1.1413],
        [ 1.1497, -1.7435,  1.1344,  ...,  0.5663, -0.0917,  0.9172],
        ...,
        [ 0.1148,  1.7086,  1.3711,  ...,  0.5683,  0.2478,  0.1169],
        [-0.8032,  0.1303, -1.7010,  ...,  0.6648,  0.1965,  0.4761],
        [ 0.7656, -0.3236, -1.4344,  ..., -0.6408,  0.6813, -0.7498]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.2161, -1.9564,  ..., -0.2202, -0.2446, -1.4285],
        [-0.1194,  0.3326, -1.5847,  ...,  0.5178, -0.3003, -1.1453],
        [ 1.1497, -1.7426,  1.1344,  ...,  0.5663, -0.0917,  0.9212],
        ...,
        [ 0.1148,  1.7090,  1.3711,  ...,  0.5683,  0.2478,  0.1209],
        [-0.8032,  0.1297, -1.7010,  ...,  0.6648,  0.1965,  0.4801],
        [ 0.7656, -0.3245, -1.4344,  ..., -0.6408,  0.6813, -0.7538]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.2118, -1.9564,  ..., -0.2202, -0.2446, -1.4591],
        [-0.1194,  0.3334, -1.5847,  ...,  0.5178, -0.3003, -1.1748],
        [ 1.1497, -1.7394,  1.1344,  ...,  0.5659, -0.0917,  0.9507],
        ...,
        [ 0.1148,  1.7123,  1.3711,  ...,  0.5668,  0.2478,  0.1504],
        [-0.8032,  0.1246, -1.7010,  ...,  0.6649,  0.1965,  0.5095],
        [ 0.7656, -0.3326, -1.4344,  ..., -0.6407,  0.6813, -0.7832]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.2114, -1.9564,  ..., -0.2202, -0.2446, -1.4598],
        [-0.1194,  0.3335, -1.5847,  ...,  0.5178, -0.3003, -1.1754],
        [ 1.1497, -1.7398,  1.1344,  ...,  0.5659, -0.0917,  0.9514],
        ...,
        [ 0.1148,  1.7125,  1.3711,  ...,  0.5668,  0.2478,  0.1510],
        [-0.8032,  0.1243, -1.7010,  ...,  0.6649,  0.1965,  0.5102],
        [ 0.7656, -0.3329, -1.4344,  ..., -0.6407,  0.6813, -0.7839]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.2051, -1.9564,  ..., -0.2203, -0.2446, -1.5360],
        [-0.1194,  0.3295, -1.5847,  ...,  0.5177, -0.3003, -1.3105],
        [ 1.1497, -1.8832,  1.1344,  ...,  0.5655, -0.0917,  0.9570],
        ...,
        [ 0.1148,  1.6914,  1.3711,  ...,  0.5658,  0.2478,  0.3523],
        [-0.8032,  0.0841, -1.7010,  ...,  0.6669,  0.1965,  0.7175],
        [ 0.7656, -0.2802, -1.4344,  ..., -0.6404,  0.6813, -0.9190]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.2050, -1.9564,  ..., -0.2203, -0.2446, -1.5379],
        [-0.1194,  0.3295, -1.5847,  ...,  0.5177, -0.3003, -1.3131],
        [ 1.1497, -1.8842,  1.1344,  ...,  0.5655, -0.0917,  0.9570],
        ...,
        [ 0.1148,  1.6913,  1.3711,  ...,  0.5657,  0.2478,  0.3543],
        [-0.8032,  0.0838, -1.7010,  ...,  0.6670,  0.1965,  0.7195],
        [ 0.7656, -0.2798, -1.4344,  ..., -0.6404,  0.6813, -0.9217]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.2048, -1.9564,  ..., -0.2204, -0.2446, -1.5516],
        [-0.1194,  0.3293, -1.5847,  ...,  0.5177, -0.3003, -1.3329],
        [ 1.1497, -1.8915,  1.1344,  ...,  0.5652, -0.0917,  0.9572],
        ...,
        [ 0.1148,  1.6901,  1.3711,  ...,  0.5661,  0.2478,  0.3687],
        [-0.8032,  0.0829, -1.7010,  ...,  0.6680,  0.1965,  0.7346],
        [ 0.7656, -0.2773, -1.4344,  ..., -0.6404,  0.6813, -0.9415]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.2047, -1.9564,  ..., -0.2204, -0.2446, -1.5519],
        [-0.1194,  0.3293, -1.5847,  ...,  0.5177, -0.3003, -1.3334],
        [ 1.1497, -1.8917,  1.1344,  ...,  0.5652, -0.0917,  0.9572],
        ...,
        [ 0.1148,  1.6901,  1.3711,  ...,  0.5661,  0.2478,  0.3691],
        [-0.8032,  0.0829, -1.7010,  ...,  0.6680,  0.1965,  0.7350],
        [ 0.7656, -0.2773, -1.4344,  ..., -0.6404,  0.6813, -0.9419]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.2047, -1.9564,  ..., -0.2216, -0.2446, -1.5546],
        [-0.1194,  0.3292, -1.5847,  ...,  0.5177, -0.3003, -1.3374],
        [ 1.1497, -1.8932,  1.1344,  ...,  0.5652, -0.0917,  0.9572],
        ...,
        [ 0.1148,  1.6901,  1.3711,  ...,  0.5665,  0.2478,  0.3720],
        [-0.8032,  0.0877, -1.7010,  ...,  0.6682,  0.1965,  0.7380],
        [ 0.7656, -0.2768, -1.4344,  ..., -0.6377,  0.6813, -0.9459]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.2047, -1.9564,  ..., -0.2216, -0.2446, -1.5547],
        [-0.1194,  0.3292, -1.5847,  ...,  0.5177, -0.3003, -1.3374],
        [ 1.1497, -1.8932,  1.1344,  ...,  0.5652, -0.0917,  0.9572],
        ...,
        [ 0.1148,  1.6901,  1.3711,  ...,  0.5665,  0.2478,  0.3720],
        [-0.8032,  0.0877, -1.7010,  ...,  0.6683,  0.1965,  0.7380],
        [ 0.7656, -0.2768, -1.4344,  ..., -0.6377,  0.6813, -0.9459]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.2045, -1.9564,  ..., -0.2217, -0.2446, -1.5548],
        [-0.1194,  0.3304, -1.5847,  ...,  0.5177, -0.3003, -1.3375],
        [ 1.1497, -1.8931,  1.1344,  ...,  0.5652, -0.0917,  0.9572],
        ...,
        [ 0.1148,  1.6901,  1.3711,  ...,  0.5669,  0.2478,  0.3721],
        [-0.8032,  0.0885, -1.7010,  ...,  0.6683,  0.1965,  0.7381],
        [ 0.7656, -0.2768, -1.4344,  ..., -0.6376,  0.6813, -0.9461]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.2045, -1.9564,  ..., -0.2217, -0.2446, -1.5548],
        [-0.1194,  0.3305, -1.5847,  ...,  0.5177, -0.3003, -1.3375],
        [ 1.1497, -1.8931,  1.1344,  ...,  0.5652, -0.0917,  0.9572],
        ...,
        [ 0.1148,  1.6901,  1.3711,  ...,  0.5669,  0.2478,  0.3721],
        [-0.8032,  0.0885, -1.7010,  ...,  0.6683,  0.1965,  0.7381],
        [ 0.7656, -0.2768, -1.4344,  ..., -0.6376,  0.6813, -0.9461]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.2045, -1.9564,  ..., -0.2218, -0.2446, -1.5548],
        [-0.1194,  0.3306, -1.5847,  ...,  0.5177, -0.3003, -1.3375],
        [ 1.1497, -1.8931,  1.1344,  ...,  0.5652, -0.0917,  0.9572],
        ...,
        [ 0.1148,  1.6902,  1.3711,  ...,  0.5669,  0.2478,  0.3721],
        [-0.8032,  0.0885, -1.7010,  ...,  0.6683,  0.1965,  0.7381],
        [ 0.7656, -0.2768, -1.4344,  ..., -0.6376,  0.6813, -0.9461]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.2045, -1.9564,  ..., -0.2218, -0.2446, -1.5548],
        [-0.1194,  0.3306, -1.5847,  ...,  0.5177, -0.3003, -1.3375],
        [ 1.1497, -1.8932,  1.1344,  ...,  0.5652, -0.0917,  0.9572],
        ...,
        [ 0.1148,  1.6902,  1.3711,  ...,  0.5669,  0.2478,  0.3721],
        [-0.8032,  0.0885, -1.7010,  ...,  0.6683,  0.1965,  0.7381],
        [ 0.7656, -0.2768, -1.4344,  ..., -0.6376,  0.6813, -0.9461]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.1896, -1.9564,  ..., -0.2216, -0.2446, -1.5548],
        [-0.1194,  0.3309, -1.5847,  ...,  0.5170, -0.3003, -1.3375],
        [ 1.1497, -1.8957,  1.1344,  ...,  0.5652, -0.0917,  0.9572],
        ...,
        [ 0.1148,  1.6892,  1.3711,  ...,  0.5665,  0.2478,  0.3721],
        [-0.8032,  0.0885, -1.7010,  ...,  0.6683,  0.1965,  0.7381],
        [ 0.7656, -0.2787, -1.4344,  ..., -0.6311,  0.6813, -0.9461]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.1894, -1.9564,  ..., -0.2216, -0.2446, -1.5548],
        [-0.1194,  0.3308, -1.5847,  ...,  0.5170, -0.3003, -1.3375],
        [ 1.1497, -1.8957,  1.1344,  ...,  0.5652, -0.0917,  0.9572],
        ...,
        [ 0.1148,  1.6892,  1.3711,  ...,  0.5665,  0.2478,  0.3721],
        [-0.8032,  0.0885, -1.7010,  ...,  0.6683,  0.1965,  0.7381],
        [ 0.7656, -0.2787, -1.4344,  ..., -0.6309,  0.6813, -0.9461]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.1875, -1.9564,  ..., -0.2216, -0.2446, -1.5548],
        [-0.1194,  0.3300, -1.5847,  ...,  0.5168, -0.3003, -1.3375],
        [ 1.1497, -1.8961,  1.1344,  ...,  0.5652, -0.0917,  0.9572],
        ...,
        [ 0.1148,  1.6893,  1.3711,  ...,  0.5669,  0.2478,  0.3721],
        [-0.8032,  0.0886, -1.7010,  ...,  0.6676,  0.1965,  0.7381],
        [ 0.7656, -0.2791, -1.4344,  ..., -0.6287,  0.6813, -0.9461]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.1875, -1.9564,  ..., -0.2216, -0.2446, -1.5548],
        [-0.1194,  0.3299, -1.5847,  ...,  0.5168, -0.3003, -1.3375],
        [ 1.1497, -1.8961,  1.1344,  ...,  0.5652, -0.0917,  0.9572],
        ...,
        [ 0.1148,  1.6893,  1.3711,  ...,  0.5669,  0.2478,  0.3721],
        [-0.8032,  0.0886, -1.7010,  ...,  0.6676,  0.1965,  0.7381],
        [ 0.7656, -0.2791, -1.4344,  ..., -0.6287,  0.6813, -0.9461]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.1880, -1.9564,  ..., -0.2216, -0.2446, -1.5548],
        [-0.1194,  0.3289, -1.5847,  ...,  0.5167, -0.3003, -1.3375],
        [ 1.1497, -1.8978,  1.1344,  ...,  0.5652, -0.0917,  0.9572],
        ...,
        [ 0.1148,  1.6901,  1.3711,  ...,  0.5674,  0.2478,  0.3721],
        [-0.8032,  0.0870, -1.7010,  ...,  0.6670,  0.1965,  0.7381],
        [ 0.7656, -0.2795, -1.4344,  ..., -0.6286,  0.6813, -0.9461]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.1881, -1.9564,  ..., -0.2216, -0.2446, -1.5548],
        [-0.1194,  0.3289, -1.5847,  ...,  0.5167, -0.3003, -1.3375],
        [ 1.1497, -1.8980,  1.1344,  ...,  0.5652, -0.0917,  0.9572],
        ...,
        [ 0.1148,  1.6902,  1.3711,  ...,  0.5674,  0.2478,  0.3721],
        [-0.8032,  0.0868, -1.7010,  ...,  0.6670,  0.1965,  0.7381],
        [ 0.7656, -0.2795, -1.4344,  ..., -0.6286,  0.6813, -0.9461]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.1876, -1.9564,  ..., -0.2217, -0.2446, -1.5548],
        [-0.1194,  0.3286, -1.5847,  ...,  0.5168, -0.3003, -1.3375],
        [ 1.1497, -1.9000,  1.1344,  ...,  0.5420, -0.0917,  0.9572],
        ...,
        [ 0.1148,  1.6906,  1.3711,  ...,  0.5004,  0.2478,  0.3721],
        [-0.8032,  0.0859, -1.7010,  ...,  0.7400,  0.1965,  0.7381],
        [ 0.7656, -0.2805, -1.4344,  ..., -0.5168,  0.6813, -0.9461]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.1874, -1.9564,  ..., -0.2217, -0.2446, -1.5548],
        [-0.1194,  0.3286, -1.5847,  ...,  0.5168, -0.3003, -1.3375],
        [ 1.1497, -1.9001,  1.1344,  ...,  0.5412, -0.0917,  0.9572],
        ...,
        [ 0.1148,  1.6905,  1.3711,  ...,  0.4981,  0.2478,  0.3721],
        [-0.8032,  0.0859, -1.7010,  ...,  0.7425,  0.1965,  0.7381],
        [ 0.7656, -0.2806, -1.4344,  ..., -0.5130,  0.6813, -0.9461]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.1863, -1.9564,  ..., -0.2217, -0.2446, -1.5548],
        [-0.1194,  0.3285, -1.5847,  ...,  0.5175, -0.3003, -1.3375],
        [ 1.1497, -1.9007,  1.1344,  ...,  0.5342, -0.0917,  0.9572],
        ...,
        [ 0.1148,  1.6900,  1.3711,  ...,  0.4778,  0.2478,  0.3721],
        [-0.8032,  0.0862, -1.7010,  ...,  0.7643,  0.1965,  0.7381],
        [ 0.7656, -0.2811, -1.4344,  ..., -0.4797,  0.6813, -0.9461]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.1863, -1.9564,  ..., -0.2217, -0.2446, -1.5548],
        [-0.1194,  0.3285, -1.5847,  ...,  0.5178, -0.3003, -1.3375],
        [ 1.1497, -1.9007,  1.1344,  ...,  0.5341, -0.0917,  0.9572],
        ...,
        [ 0.1148,  1.6900,  1.3711,  ...,  0.4777,  0.2478,  0.3721],
        [-0.8032,  0.0862, -1.7010,  ...,  0.7644,  0.1965,  0.7381],
        [ 0.7656, -0.2812, -1.4344,  ..., -0.4795,  0.6813, -0.9461]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.1862, -1.9564,  ..., -0.2191, -0.2446, -1.5548],
        [-0.1194,  0.3265, -1.5847,  ...,  0.5710, -0.3003, -1.3375],
        [ 1.1497, -1.9007,  1.1344,  ...,  0.5335, -0.0917,  0.9572],
        ...,
        [ 0.1148,  1.6900,  1.3711,  ...,  0.3976,  0.2478,  0.3721],
        [-0.8032,  0.0861, -1.7010,  ...,  0.7652,  0.1965,  0.7381],
        [ 0.7656, -0.2812, -1.4344,  ..., -0.4601,  0.6813, -0.9461]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.1862, -1.9564,  ..., -0.2190, -0.2446, -1.5548],
        [-0.1194,  0.3265, -1.5847,  ...,  0.5721, -0.3003, -1.3375],
        [ 1.1497, -1.9007,  1.1344,  ...,  0.5335, -0.0917,  0.9572],
        ...,
        [ 0.1148,  1.6900,  1.3711,  ...,  0.3955,  0.2478,  0.3721],
        [-0.8032,  0.0861, -1.7010,  ...,  0.7652,  0.1965,  0.7381],
        [ 0.7656, -0.2812, -1.4344,  ..., -0.4600,  0.6813, -0.9461]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.1861, -1.9564,  ..., -0.2184, -0.2446, -1.5548],
        [-0.1194,  0.3266, -1.5847,  ...,  0.5822, -0.3003, -1.3375],
        [ 1.1497, -1.9007,  1.1344,  ...,  0.5334, -0.0917,  0.9572],
        ...,
        [ 0.1148,  1.6900,  1.3711,  ...,  0.3765,  0.2478,  0.3721],
        [-0.8032,  0.0861, -1.7010,  ...,  0.7652,  0.1965,  0.7381],
        [ 0.7656, -0.2812, -1.4344,  ..., -0.4596,  0.6813, -0.9461]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.1861, -1.9564,  ..., -0.2184, -0.2446, -1.5548],
        [-0.1194,  0.3267, -1.5847,  ...,  0.5823, -0.3003, -1.3375],
        [ 1.1497, -1.9007,  1.1344,  ...,  0.5334, -0.0917,  0.9572],
        ...,
        [ 0.1148,  1.6900,  1.3711,  ...,  0.3764,  0.2478,  0.3721],
        [-0.8032,  0.0861, -1.7010,  ...,  0.7652,  0.1965,  0.7381],
        [ 0.7656, -0.2812, -1.4344,  ..., -0.4596,  0.6813, -0.9461]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.1860, -1.9564,  ..., -0.2184, -0.2446, -1.5548],
        [-0.1194,  0.3267, -1.5847,  ...,  0.5825, -0.3003, -1.3375],
        [ 1.1497, -1.9007,  1.1344,  ...,  0.5334, -0.0917,  0.9572],
        ...,
        [ 0.1148,  1.6900,  1.3711,  ...,  0.3754,  0.2478,  0.3721],
        [-0.8032,  0.0861, -1.7010,  ...,  0.7652,  0.1965,  0.7381],
        [ 0.7656, -0.2814, -1.4344,  ..., -0.4598,  0.6813, -0.9461]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.1860, -1.9564,  ..., -0.2184, -0.2446, -1.5548],
        [-0.1194,  0.3267, -1.5847,  ...,  0.5824, -0.3003, -1.3375],
        [ 1.1497, -1.9007,  1.1344,  ...,  0.5334, -0.0917,  0.9572],
        ...,
        [ 0.1148,  1.6900,  1.3711,  ...,  0.3754,  0.2478,  0.3721],
        [-0.8032,  0.0861, -1.7010,  ...,  0.7652,  0.1965,  0.7381],
        [ 0.7656, -0.2814, -1.4344,  ..., -0.4598,  0.6813, -0.9461]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.1872, -1.9564,  ..., -0.2184, -0.2446, -1.5548],
        [-0.1194,  0.3265, -1.5847,  ...,  0.5825, -0.3003, -1.3375],
        [ 1.1497, -1.9001,  1.1344,  ...,  0.5333, -0.0917,  0.9572],
        ...,
        [ 0.1148,  1.6900,  1.3711,  ...,  0.3754,  0.2478,  0.3721],
        [-0.8032,  0.0865, -1.7010,  ...,  0.7655,  0.1965,  0.7381],
        [ 0.7656, -0.2809, -1.4344,  ..., -0.4598,  0.6813, -0.9461]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.1872, -1.9564,  ..., -0.2184, -0.2446, -1.5548],
        [-0.1194,  0.3265, -1.5847,  ...,  0.5825, -0.3003, -1.3375],
        [ 1.1497, -1.9001,  1.1344,  ...,  0.5333, -0.0917,  0.9572],
        ...,
        [ 0.1148,  1.6900,  1.3711,  ...,  0.3754,  0.2478,  0.3721],
        [-0.8032,  0.0865, -1.7010,  ...,  0.7655,  0.1965,  0.7381],
        [ 0.7656, -0.2809, -1.4344,  ..., -0.4598,  0.6813, -0.9461]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.1873, -1.9564,  ..., -0.2183, -0.2446, -1.5548],
        [-0.1194,  0.3266, -1.5847,  ...,  0.5825, -0.3003, -1.3375],
        [ 1.1497, -1.8999,  1.1344,  ...,  0.5333, -0.0917,  0.9572],
        ...,
        [ 0.1148,  1.6899,  1.3711,  ...,  0.3752,  0.2478,  0.3721],
        [-0.8032,  0.0867, -1.7010,  ...,  0.7650,  0.1965,  0.7381],
        [ 0.7656, -0.2807, -1.4344,  ..., -0.4598,  0.6813, -0.9461]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.1873, -1.9564,  ..., -0.2183, -0.2446, -1.5548],
        [-0.1194,  0.3266, -1.5847,  ...,  0.5825, -0.3003, -1.3375],
        [ 1.1497, -1.8999,  1.1344,  ...,  0.5333, -0.0917,  0.9572],
        ...,
        [ 0.1148,  1.6899,  1.3711,  ...,  0.3752,  0.2478,  0.3721],
        [-0.8032,  0.0867, -1.7010,  ...,  0.7650,  0.1965,  0.7381],
        [ 0.7656, -0.2807, -1.4344,  ..., -0.4598,  0.6813, -0.9461]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.1872, -1.9564,  ..., -0.2186, -0.2446, -1.5548],
        [-0.1194,  0.3266, -1.5847,  ...,  0.5852, -0.3003, -1.3375],
        [ 1.1497, -1.8999,  1.1344,  ...,  0.5333, -0.0917,  0.9572],
        ...,
        [ 0.1148,  1.6901,  1.3711,  ...,  0.3830,  0.2478,  0.3721],
        [-0.8032,  0.0866, -1.7010,  ...,  0.7650,  0.1965,  0.7381],
        [ 0.7656, -0.2806, -1.4344,  ..., -0.4596,  0.6813, -0.9461]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.1872, -1.9564,  ..., -0.2186, -0.2446, -1.5548],
        [-0.1194,  0.3266, -1.5847,  ...,  0.5852, -0.3003, -1.3375],
        [ 1.1497, -1.8999,  1.1344,  ...,  0.5333, -0.0917,  0.9572],
        ...,
        [ 0.1148,  1.6901,  1.3711,  ...,  0.3831,  0.2478,  0.3721],
        [-0.8032,  0.0866, -1.7010,  ...,  0.7650,  0.1965,  0.7381],
        [ 0.7656, -0.2806, -1.4344,  ..., -0.4596,  0.6813, -0.9461]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.1871, -1.9564,  ..., -0.2186, -0.2446, -1.5548],
        [-0.1194,  0.3266, -1.5847,  ...,  0.5849, -0.3003, -1.3375],
        [ 1.1497, -1.8999,  1.1344,  ...,  0.5333, -0.0917,  0.9572],
        ...,
        [ 0.1148,  1.6900,  1.3711,  ...,  0.3854,  0.2478,  0.3721],
        [-0.8032,  0.0865, -1.7010,  ...,  0.7651,  0.1965,  0.7381],
        [ 0.7656, -0.2786, -1.4344,  ..., -0.4595,  0.6813, -0.9461]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.1871, -1.9564,  ..., -0.2186, -0.2446, -1.5548],
        [-0.1194,  0.3266, -1.5847,  ...,  0.5849, -0.3003, -1.3375],
        [ 1.1497, -1.8999,  1.1344,  ...,  0.5333, -0.0917,  0.9572],
        ...,
        [ 0.1148,  1.6900,  1.3711,  ...,  0.3854,  0.2478,  0.3721],
        [-0.8032,  0.0865, -1.7010,  ...,  0.7651,  0.1965,  0.7381],
        [ 0.7656, -0.2786, -1.4344,  ..., -0.4595,  0.6813, -0.9461]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.1731, -1.9564,  ..., -0.2186, -0.2446, -1.5548],
        [-0.1194,  0.3269, -1.5847,  ...,  0.5812, -0.3003, -1.3375],
        [ 1.1497, -1.8946,  1.1344,  ...,  0.5333, -0.0917,  0.9572],
        ...,
        [ 0.1148,  1.6917,  1.3711,  ...,  0.3853,  0.2478,  0.3721],
        [-0.8032,  0.0873, -1.7010,  ...,  0.7651,  0.1965,  0.7381],
        [ 0.7656, -0.2793, -1.4344,  ..., -0.4597,  0.6813, -0.9461]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.1726, -1.9564,  ..., -0.2186, -0.2446, -1.5548],
        [-0.1194,  0.3270, -1.5847,  ...,  0.5811, -0.3003, -1.3375],
        [ 1.1497, -1.8945,  1.1344,  ...,  0.5333, -0.0917,  0.9572],
        ...,
        [ 0.1148,  1.6918,  1.3711,  ...,  0.3853,  0.2478,  0.3721],
        [-0.8032,  0.0873, -1.7010,  ...,  0.7651,  0.1965,  0.7381],
        [ 0.7656, -0.2793, -1.4344,  ..., -0.4597,  0.6813, -0.9461]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.4473, -1.1701, -1.9564,  ..., -0.2186, -0.2446, -1.5548],
        [-0.1194,  0.3267, -1.5847,  ...,  0.5803, -0.3003, -1.3375],
        [ 1.1497, -1.8936,  1.1344,  ...,  0.5333, -0.0917,  0.9572],
        ...,
        [ 0.1148,  1.6928,  1.3711,  ...,  0.3856,  0.2478,  0.3721],
        [-0.8032,  0.0881, -1.7010,  ...,  0.7651,  0.1965,  0.7381],
        [ 0.7656, -0.2806, -1.4344,  ..., -0.4598,  0.6813, -0.9461]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.4473, -1.1700, -1.9564,  ..., -0.2186, -0.2446, -1.5548],
        [-0.1194,  0.3267, -1.5847,  ...,  0.5803, -0.3003, -1.3375],
        [ 1.1497, -1.8936,  1.1344,  ...,  0.5333, -0.0917,  0.9572],
        ...,
        [ 0.1148,  1.6928,  1.3711,  ...,  0.3856,  0.2478,  0.3721],
        [-0.8032,  0.0881, -1.7010,  ...,  0.7651,  0.1965,  0.7381],
        [ 0.7656, -0.2806, -1.4344,  ..., -0.4598,  0.6813, -0.9461]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.6862, -1.1697, -1.9564,  ..., -0.2185, -0.2446, -1.7937],
        [ 0.1196,  0.3266, -1.5847,  ...,  0.5802, -0.3003, -1.0986],
        [ 0.9129, -1.8939,  1.1344,  ...,  0.5332, -0.0917,  1.1962],
        ...,
        [ 0.3538,  1.6929,  1.3711,  ...,  0.3866,  0.2478,  0.6110],
        [-1.0421,  0.0875, -1.7010,  ...,  0.7648,  0.1965,  0.4992],
        [ 1.0045, -0.2814, -1.4344,  ..., -0.4597,  0.6813, -1.1849]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.6889, -1.1697, -1.9564,  ..., -0.2185, -0.2446, -1.7963],
        [ 0.1222,  0.3266, -1.5847,  ...,  0.5802, -0.3003, -1.0960],
        [ 0.9102, -1.8939,  1.1344,  ...,  0.5332, -0.0917,  1.1988],
        ...,
        [ 0.3564,  1.6929,  1.3711,  ...,  0.3868,  0.2478,  0.6137],
        [-1.0447,  0.0875, -1.7010,  ...,  0.7650,  0.1965,  0.4966],
        [ 1.0071, -0.2815, -1.4344,  ..., -0.4596,  0.6813, -1.1876]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.7118, -1.1707, -1.9564,  ..., -0.2186, -0.1282, -1.8193],
        [ 0.1452,  0.3267, -1.5847,  ...,  0.5767, -0.2695, -1.0730],
        [ 0.8875, -1.8939,  1.1344,  ...,  0.5332, -0.0456,  1.2218],
        ...,
        [ 0.3794,  1.6928,  1.3711,  ...,  0.3912,  0.0504,  0.6366],
        [-1.0677,  0.0870, -1.7010,  ...,  0.7669,  0.0441,  0.4736],
        [ 1.0301, -0.2815, -1.4344,  ..., -0.4583,  0.6590, -1.2105]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.7119, -1.1708, -1.9564,  ..., -0.2186, -0.1250, -1.8194],
        [ 0.1453,  0.3267, -1.5847,  ...,  0.5767, -0.2683, -1.0729],
        [ 0.8874, -1.8939,  1.1344,  ...,  0.5332, -0.0440,  1.2219],
        ...,
        [ 0.3795,  1.6928,  1.3711,  ...,  0.3914,  0.0436,  0.6367],
        [-1.0678,  0.0870, -1.7010,  ...,  0.7669,  0.0373,  0.4735],
        [ 1.0302, -0.2815, -1.4344,  ..., -0.4583,  0.6533, -1.2106]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.7126, -1.1682, -1.9564,  ..., -0.2186, -0.1018, -1.8200],
        [ 0.1459,  0.3268, -1.5847,  ...,  0.5764, -0.2744, -1.0722],
        [ 0.8867, -1.8955,  1.1344,  ...,  0.5330, -0.0313,  1.2225],
        ...,
        [ 0.3801,  1.6932,  1.3711,  ...,  0.3920, -0.0054,  0.6374],
        [-1.0684,  0.0852, -1.7010,  ...,  0.7647, -0.0116,  0.4728],
        [ 1.0309, -0.2819, -1.4344,  ..., -0.4583,  0.6118, -1.2113]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.7126, -1.1681, -1.9564,  ..., -0.2186, -0.1012, -1.8201],
        [ 0.1460,  0.3268, -1.5847,  ...,  0.5763, -0.2749, -1.0722],
        [ 0.8867, -1.8955,  1.1344,  ...,  0.5330, -0.0310,  1.2226],
        ...,
        [ 0.3802,  1.6932,  1.3711,  ...,  0.3917, -0.0067,  0.6374],
        [-1.0685,  0.0852, -1.7010,  ...,  0.7646, -0.0129,  0.4728],
        [ 1.0309, -0.2819, -1.4344,  ..., -0.4583,  0.6107, -1.2113]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.7128, -1.1578, -1.9564,  ..., -0.2186, -0.0960, -1.8202],
        [ 0.1461,  0.3269, -1.5847,  ...,  0.5748, -0.2798, -1.0721],
        [ 0.8865, -1.8875,  1.1344,  ...,  0.5329, -0.0281,  1.2227],
        ...,
        [ 0.3803,  1.6971,  1.3711,  ...,  0.3892, -0.0176,  0.6376],
        [-1.0686,  0.0917, -1.7010,  ...,  0.7636, -0.0238,  0.4727],
        [ 1.0311, -0.2803, -1.4344,  ..., -0.4586,  0.6015, -1.2115]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.7128, -1.1566, -1.9564,  ..., -0.2186, -0.0959, -1.8202],
        [ 0.1461,  0.3269, -1.5847,  ...,  0.5748, -0.2798, -1.0721],
        [ 0.8865, -1.8859,  1.1344,  ...,  0.5329, -0.0280,  1.2227],
        ...,
        [ 0.3803,  1.6977,  1.3711,  ...,  0.3891, -0.0177,  0.6376],
        [-1.0686,  0.0930, -1.7010,  ...,  0.7636, -0.0239,  0.4727],
        [ 1.0311, -0.2800, -1.4344,  ..., -0.4585,  0.6014, -1.2115]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.7128, -1.1472, -1.9564,  ..., -0.2186, -0.0957, -1.8202],
        [ 0.1461,  0.3270, -1.5847,  ...,  0.5747, -0.2801, -1.0720],
        [ 0.8865, -1.8726,  1.1344,  ...,  0.5329, -0.0279,  1.2227],
        ...,
        [ 0.3803,  1.7031,  1.3711,  ...,  0.3951, -0.0182,  0.6376],
        [-1.0686,  0.0978, -1.7010,  ...,  0.7640, -0.0244,  0.4727],
        [ 1.0311, -0.2779, -1.4344,  ..., -0.4565,  0.6010, -1.2115]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.7128, -1.1471, -1.9564,  ..., -0.2186, -0.0957, -1.8202],
        [ 0.1461,  0.3270, -1.5847,  ...,  0.5747, -0.2801, -1.0720],
        [ 0.8865, -1.8725,  1.1344,  ...,  0.5329, -0.0279,  1.2227],
        ...,
        [ 0.3803,  1.7032,  1.3711,  ...,  0.3952, -0.0182,  0.6376],
        [-1.0686,  0.0977, -1.7010,  ...,  0.7640, -0.0244,  0.4727],
        [ 1.0311, -0.2779, -1.4344,  ..., -0.4565,  0.6010, -1.2115]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.7128, -1.1436, -1.9564,  ..., -0.2186, -0.0957, -1.8202],
        [ 0.1461,  0.3269, -1.5847,  ...,  0.5743, -0.2801, -1.0720],
        [ 0.8865, -1.8734,  1.1344,  ...,  0.5328, -0.0279,  1.2227],
        ...,
        [ 0.3803,  1.7055,  1.3711,  ...,  0.3959, -0.0183,  0.6376],
        [-1.0686,  0.0941, -1.7010,  ...,  0.7627, -0.0244,  0.4727],
        [ 1.0311, -0.2799, -1.4344,  ..., -0.4566,  0.6009, -1.2115]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.7128, -1.1434, -1.9564,  ..., -0.2186, -0.0957, -1.8202],
        [ 0.1461,  0.3269, -1.5847,  ...,  0.5743, -0.2801, -1.0720],
        [ 0.8865, -1.8736,  1.1344,  ...,  0.5328, -0.0279,  1.2227],
        ...,
        [ 0.3803,  1.7055,  1.3711,  ...,  0.3959, -0.0183,  0.6376],
        [-1.0686,  0.0939, -1.7010,  ...,  0.7627, -0.0244,  0.4727],
        [ 1.0311, -0.2800, -1.4344,  ..., -0.4566,  0.6009, -1.2115]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.7128, -1.1415, -1.9564,  ..., -0.2185, -0.0957, -1.8202],
        [ 0.1461,  0.3269, -1.5847,  ...,  0.5742, -0.2801, -1.0720],
        [ 0.8865, -1.8750,  1.1344,  ...,  0.5327, -0.0279,  1.2227],
        ...,
        [ 0.3803,  1.7058,  1.3711,  ...,  0.3956, -0.0183,  0.6376],
        [-1.0686,  0.0920, -1.7010,  ...,  0.7627, -0.0244,  0.4727],
        [ 1.0311, -0.2813, -1.4344,  ..., -0.4566,  0.6009, -1.2115]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.7128, -1.1414, -1.9564,  ..., -0.2185, -0.0957, -1.8202],
        [ 0.1461,  0.3269, -1.5847,  ...,  0.5742, -0.2801, -1.0720],
        [ 0.8865, -1.8750,  1.1344,  ...,  0.5327, -0.0279,  1.2227],
        ...,
        [ 0.3803,  1.7058,  1.3711,  ...,  0.3956, -0.0183,  0.6376],
        [-1.0686,  0.0920, -1.7010,  ...,  0.7627, -0.0244,  0.4727],
        [ 1.0311, -0.2813, -1.4344,  ..., -0.4566,  0.6009, -1.2115]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.7128, -1.1417, -1.9564,  ..., -0.2185, -0.0957, -1.8202],
        [ 0.1461,  0.3278, -1.5847,  ...,  0.5758, -0.2801, -1.0720],
        [ 0.8865, -1.8753,  1.1344,  ...,  0.5328, -0.0279,  1.2227],
        ...,
        [ 0.3803,  1.7059,  1.3711,  ...,  0.3953, -0.0183,  0.6376],
        [-1.0686,  0.0914, -1.7010,  ...,  0.7627, -0.0244,  0.4727],
        [ 1.0311, -0.2817, -1.4344,  ..., -0.4566,  0.6009, -1.2115]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.7128, -1.1417, -1.9564,  ..., -0.2185, -0.0957, -1.8202],
        [ 0.1461,  0.3278, -1.5847,  ...,  0.5758, -0.2801, -1.0720],
        [ 0.8865, -1.8753,  1.1344,  ...,  0.5328, -0.0279,  1.2227],
        ...,
        [ 0.3803,  1.7060,  1.3711,  ...,  0.3953, -0.0183,  0.6376],
        [-1.0686,  0.0914, -1.7010,  ...,  0.7627, -0.0244,  0.4727],
        [ 1.0311, -0.2817, -1.4344,  ..., -0.4566,  0.6009, -1.2115]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.7128, -1.1418, -1.9564,  ..., -0.2180, -0.0957, -1.8202],
        [ 0.1461,  0.3278, -1.5847,  ...,  0.5775, -0.2801, -1.0720],
        [ 0.8865, -1.8744,  1.1344,  ...,  0.5327, -0.0279,  1.2227],
        ...,
        [ 0.3803,  1.7066,  1.3711,  ...,  0.3930, -0.0183,  0.6376],
        [-1.0686,  0.0924, -1.7010,  ...,  0.7634, -0.0244,  0.4727],
        [ 1.0310, -0.2808, -1.4344,  ..., -0.4502,  0.6009, -1.2115]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.7128, -1.1418, -1.9564,  ..., -0.2180, -0.0957, -1.8202],
        [ 0.1461,  0.3278, -1.5847,  ...,  0.5774, -0.2801, -1.0720],
        [ 0.8865, -1.8742,  1.1344,  ...,  0.5327, -0.0279,  1.2227],
        ...,
        [ 0.3803,  1.7066,  1.3711,  ...,  0.3929, -0.0183,  0.6376],
        [-1.0686,  0.0925, -1.7010,  ...,  0.7634, -0.0244,  0.4727],
        [ 1.0310, -0.2807, -1.4344,  ..., -0.4500,  0.6009, -1.2115]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.7128, -1.1420, -1.9564,  ..., -0.2179, -0.0957, -1.8202],
        [ 0.1461,  0.3278, -1.5847,  ...,  0.5771, -0.2801, -1.0720],
        [ 0.8866, -1.8733,  1.1344,  ...,  0.5327, -0.0279,  1.2227],
        ...,
        [ 0.3803,  1.7064,  1.3711,  ...,  0.3924, -0.0183,  0.6376],
        [-1.0686,  0.0935, -1.7010,  ...,  0.7636, -0.0244,  0.4727],
        [ 1.0310, -0.2797, -1.4344,  ..., -0.4488,  0.6009, -1.2115]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.7128, -1.1420, -1.9564,  ..., -0.2179, -0.0957, -1.8202],
        [ 0.1461,  0.3278, -1.5847,  ...,  0.5771, -0.2801, -1.0720],
        [ 0.8866, -1.8732,  1.1344,  ...,  0.5327, -0.0279,  1.2227],
        ...,
        [ 0.3803,  1.7064,  1.3711,  ...,  0.3924, -0.0183,  0.6376],
        [-1.0686,  0.0935, -1.7010,  ...,  0.7636, -0.0244,  0.4727],
        [ 1.0310, -0.2796, -1.4344,  ..., -0.4488,  0.6009, -1.2115]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.7128, -1.1534, -1.9564,  ..., -0.2179, -0.0957, -1.8202],
        [ 0.1461,  0.3276, -1.5847,  ...,  0.5770, -0.2801, -1.0720],
        [ 0.8866, -1.8742,  1.1344,  ...,  0.5327, -0.0279,  1.2227],
        ...,
        [ 0.3803,  1.7054,  1.3711,  ...,  0.3923, -0.0183,  0.6376],
        [-1.0686,  0.0944, -1.7010,  ...,  0.7636, -0.0244,  0.4727],
        [ 1.0310, -0.2791, -1.4344,  ..., -0.4486,  0.6009, -1.2115]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.7128, -1.1537, -1.9564,  ..., -0.2179, -0.0957, -1.8202],
        [ 0.1461,  0.3276, -1.5847,  ...,  0.5770, -0.2801, -1.0720],
        [ 0.8866, -1.8742,  1.1344,  ...,  0.5327, -0.0279,  1.2227],
        ...,
        [ 0.3803,  1.7053,  1.3711,  ...,  0.3923, -0.0183,  0.6376],
        [-1.0686,  0.0946, -1.7010,  ...,  0.7636, -0.0244,  0.4727],
        [ 1.0310, -0.2791, -1.4344,  ..., -0.4486,  0.6009, -1.2115]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.7128, -1.1555, -1.9564,  ..., -0.2179, -0.0957, -1.8202],
        [ 0.1461,  0.3276, -1.5847,  ...,  0.5781, -0.2801, -1.0720],
        [ 0.8866, -1.8746,  1.1344,  ...,  0.5326, -0.0279,  1.2227],
        ...,
        [ 0.3803,  1.7046,  1.3711,  ...,  0.3903, -0.0183,  0.6376],
        [-1.0686,  0.0953, -1.7010,  ...,  0.7622, -0.0244,  0.4727],
        [ 1.0310, -0.2790, -1.4344,  ..., -0.4482,  0.6009, -1.2115]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.7128, -1.1555, -1.9564,  ..., -0.2179, -0.0957, -1.8202],
        [ 0.1461,  0.3276, -1.5847,  ...,  0.5781, -0.2801, -1.0720],
        [ 0.8866, -1.8746,  1.1344,  ...,  0.5326, -0.0279,  1.2227],
        ...,
        [ 0.3803,  1.7046,  1.3711,  ...,  0.3903, -0.0183,  0.6376],
        [-1.0686,  0.0953, -1.7010,  ...,  0.7621, -0.0244,  0.4727],
        [ 1.0310, -0.2790, -1.4344,  ..., -0.4482,  0.6009, -1.2115]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.7128, -1.1548, -1.9564,  ..., -0.2179, -0.0957, -1.8202],
        [ 0.1461,  0.3277, -1.5847,  ...,  0.5782, -0.2801, -1.0720],
        [ 0.8866, -1.8755,  1.1344,  ...,  0.5325, -0.0279,  1.2227],
        ...,
        [ 0.3803,  1.7035,  1.3711,  ...,  0.3899, -0.0183,  0.6376],
        [-1.0686,  0.0971, -1.7010,  ...,  0.7618, -0.0244,  0.4727],
        [ 1.0310, -0.2786, -1.4344,  ..., -0.4482,  0.6009, -1.2115]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.7128, -1.1548, -1.9564,  ..., -0.2179, -0.0957, -1.8202],
        [ 0.1461,  0.3278, -1.5847,  ...,  0.5782, -0.2801, -1.0720],
        [ 0.8866, -1.8756,  1.1344,  ...,  0.5325, -0.0279,  1.2227],
        ...,
        [ 0.3803,  1.7035,  1.3711,  ...,  0.3899, -0.0183,  0.6376],
        [-1.0686,  0.0972, -1.7010,  ...,  0.7618, -0.0244,  0.4727],
        [ 1.0310, -0.2786, -1.4344,  ..., -0.4482,  0.6009, -1.2115]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.7128, -1.1779, -1.9564,  ..., -0.2179, -0.0957, -1.8202],
        [ 0.1461,  0.3281, -1.5847,  ...,  0.5786, -0.2801, -1.0720],
        [ 0.8866, -1.8811,  1.1344,  ...,  0.5326, -0.0279,  1.2227],
        ...,
        [ 0.3803,  1.6992,  1.3711,  ...,  0.3900, -0.0183,  0.6376],
        [-1.0686,  0.1017, -1.7010,  ...,  0.7617, -0.0244,  0.4727],
        [ 1.0310, -0.2806, -1.4344,  ..., -0.4482,  0.6009, -1.2115]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.7128, -1.1783, -1.9564,  ..., -0.2179, -0.0957, -1.8202],
        [ 0.1461,  0.3281, -1.5847,  ...,  0.5786, -0.2801, -1.0720],
        [ 0.8866, -1.8813,  1.1344,  ...,  0.5326, -0.0279,  1.2227],
        ...,
        [ 0.3803,  1.6991,  1.3711,  ...,  0.3900, -0.0183,  0.6376],
        [-1.0686,  0.1017, -1.7010,  ...,  0.7617, -0.0244,  0.4727],
        [ 1.0310, -0.2807, -1.4344,  ..., -0.4483,  0.6009, -1.2115]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.7128, -1.1812, -1.9564,  ..., -0.2179, -0.0957, -1.8202],
        [ 0.1461,  0.3282, -1.5847,  ...,  0.5793, -0.2801, -1.0720],
        [ 0.8866, -1.8833,  1.1344,  ...,  0.5327, -0.0279,  1.2227],
        ...,
        [ 0.3803,  1.6982,  1.3711,  ...,  0.3902, -0.0183,  0.6376],
        [-1.0686,  0.1017, -1.7010,  ...,  0.7660, -0.0244,  0.4727],
        [ 1.0310, -0.2814, -1.4344,  ..., -0.4489,  0.6009, -1.2115]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.7128, -1.1813, -1.9564,  ..., -0.2179, -0.0957, -1.8202],
        [ 0.1461,  0.3282, -1.5847,  ...,  0.5793, -0.2801, -1.0720],
        [ 0.8866, -1.8833,  1.1344,  ...,  0.5327, -0.0279,  1.2227],
        ...,
        [ 0.3803,  1.6982,  1.3711,  ...,  0.3902, -0.0183,  0.6376],
        [-1.0686,  0.1017, -1.7010,  ...,  0.7661, -0.0244,  0.4727],
        [ 1.0310, -0.2814, -1.4344,  ..., -0.4489,  0.6009, -1.2115]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.7128, -1.1823, -1.9564,  ..., -0.2179, -0.0957, -1.5517],
        [ 0.1461,  0.3282, -1.5847,  ...,  0.5796, -0.2801, -0.8298],
        [ 0.8866, -1.8826,  1.1344,  ...,  0.5328, -0.0279,  1.4912],
        ...,
        [ 0.3803,  1.6980,  1.3711,  ...,  0.3902, -0.0183,  0.9061],
        [-1.0686,  0.1030, -1.7010,  ...,  0.7676, -0.0244,  0.7400],
        [ 1.0310, -0.2811, -1.4344,  ..., -0.4491,  0.6009, -1.4800]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.7128, -1.1823, -1.9564,  ..., -0.2179, -0.0957, -1.5502],
        [ 0.1461,  0.3282, -1.5847,  ...,  0.5796, -0.2801, -0.8285],
        [ 0.8866, -1.8824,  1.1344,  ...,  0.5328, -0.0279,  1.4927],
        ...,
        [ 0.3803,  1.6980,  1.3711,  ...,  0.3902, -0.0183,  0.9076],
        [-1.0686,  0.1031, -1.7010,  ...,  0.7676, -0.0244,  0.7415],
        [ 1.0310, -0.2811, -1.4344,  ..., -0.4491,  0.6009, -1.4815]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.7128, -1.1817, -1.9564,  ..., -0.2179, -0.0957, -1.5392],
        [ 0.1461,  0.3281, -1.5847,  ...,  0.5764, -0.2801, -0.8195],
        [ 0.8866, -1.8822,  1.1344,  ...,  0.5328, -0.0279,  1.5038],
        ...,
        [ 0.3803,  1.6994,  1.3711,  ...,  0.3901, -0.0183,  0.9186],
        [-1.0686,  0.1047, -1.7010,  ...,  0.7676, -0.0244,  0.7525],
        [ 1.0310, -0.2697, -1.4344,  ..., -0.4493,  0.6009, -1.4925]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.7128, -1.1816, -1.9564,  ..., -0.2179, -0.0957, -1.5389],
        [ 0.1461,  0.3281, -1.5847,  ...,  0.5758, -0.2801, -0.8192],
        [ 0.8866, -1.8822,  1.1344,  ...,  0.5328, -0.0279,  1.5040],
        ...,
        [ 0.3803,  1.6995,  1.3711,  ...,  0.3901, -0.0183,  0.9189],
        [-1.0686,  0.1048, -1.7010,  ...,  0.7676, -0.0244,  0.7527],
        [ 1.0310, -0.2686, -1.4344,  ..., -0.4494,  0.6009, -1.4928]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.7128, -1.1814, -1.9564,  ..., -0.2179, -0.0957, -1.5371],
        [ 0.1461,  0.3281, -1.5847,  ...,  0.5715, -0.2801, -0.8177],
        [ 0.8866, -1.8822,  1.1344,  ...,  0.5328, -0.0279,  1.5059],
        ...,
        [ 0.3803,  1.7003,  1.3711,  ...,  0.3901, -0.0183,  0.9207],
        [-1.0686,  0.1051, -1.7010,  ...,  0.7676, -0.0244,  0.7546],
        [ 1.0310, -0.2610, -1.4344,  ..., -0.4496,  0.6009, -1.4946]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.7128, -1.1814, -1.9564,  ..., -0.2179, -0.0957, -1.5370],
        [ 0.1461,  0.3281, -1.5847,  ...,  0.5714, -0.2801, -0.8177],
        [ 0.8866, -1.8822,  1.1344,  ...,  0.5328, -0.0279,  1.5059],
        ...,
        [ 0.3803,  1.7003,  1.3711,  ...,  0.3900, -0.0183,  0.9208],
        [-1.0686,  0.1051, -1.7010,  ...,  0.7676, -0.0244,  0.7546],
        [ 1.0310, -0.2608, -1.4344,  ..., -0.4496,  0.6009, -1.4947]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.7128, -1.2246, -1.9564,  ..., -0.2179, -0.0957, -1.5366],
        [ 0.1461,  0.3281, -1.5847,  ...,  0.5706, -0.2801, -0.8176],
        [ 0.8866, -1.8804,  1.1344,  ...,  0.5328, -0.0279,  1.5063],
        ...,
        [ 0.3803,  1.6996,  1.3711,  ...,  0.3897, -0.0183,  0.9212],
        [-1.0686,  0.1173, -1.7010,  ...,  0.7683, -0.0244,  0.5493],
        [ 1.0310, -0.2681, -1.4344,  ..., -0.4493,  0.6009, -1.5275]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.7128, -1.2250, -1.9564,  ..., -0.2179, -0.0957, -1.5366],
        [ 0.1461,  0.3280, -1.5847,  ...,  0.5706, -0.2801, -0.8176],
        [ 0.8866, -1.8803,  1.1344,  ...,  0.5328, -0.0279,  1.5063],
        ...,
        [ 0.3803,  1.6994,  1.3711,  ...,  0.3897, -0.0183,  0.9212],
        [-1.0686,  0.1177, -1.7010,  ...,  0.7683, -0.0244,  0.5465],
        [ 1.0310, -0.2682, -1.4344,  ..., -0.4493,  0.6009, -1.5280]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.7128, -1.2269, -1.9564,  ..., -0.2179, -0.0957, -1.5366],
        [ 0.1461,  0.3279, -1.5847,  ...,  0.5705, -0.2801, -0.8176],
        [ 0.8866, -1.8792,  1.1344,  ...,  0.5326, -0.0279,  1.5063],
        ...,
        [ 0.3803,  1.6991,  1.3711,  ...,  0.3900, -0.0183,  0.9212],
        [-1.0686,  0.1200, -1.7010,  ...,  0.7684, -0.0244,  0.5244],
        [ 1.0310, -0.2693, -1.4344,  ..., -0.4507,  0.6009, -1.5315]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.7128, -1.2268, -1.9564,  ..., -0.2179, -0.0957, -1.5366],
        [ 0.1461,  0.3279, -1.5847,  ...,  0.5705, -0.2801, -0.8176],
        [ 0.8866, -1.8791,  1.1344,  ...,  0.5325, -0.0279,  1.5063],
        ...,
        [ 0.3803,  1.6991,  1.3711,  ...,  0.3900, -0.0183,  0.9212],
        [-1.0686,  0.1200, -1.7010,  ...,  0.7684, -0.0244,  0.5240],
        [ 1.0310, -0.2693, -1.4344,  ..., -0.4508,  0.6009, -1.5316]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.7128, -1.2261, -1.9564,  ..., -0.2179, -0.0957, -1.5366],
        [ 0.1461,  0.3280, -1.5847,  ...,  0.5581, -0.2801, -0.8176],
        [ 0.8866, -1.8787,  1.1344,  ...,  0.5332, -0.0279,  1.5063],
        ...,
        [ 0.3803,  1.6994,  1.3711,  ...,  0.3903, -0.0183,  0.9212],
        [-1.0686,  0.1203, -1.7010,  ...,  0.7684, -0.0244,  0.5211],
        [ 1.0310, -0.2695, -1.4344,  ..., -0.4512,  0.6009, -1.5320]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.7128, -1.2261, -1.9564,  ..., -0.2179, -0.0957, -1.5366],
        [ 0.1461,  0.3280, -1.5847,  ...,  0.5567, -0.2801, -0.8176],
        [ 0.8866, -1.8787,  1.1344,  ...,  0.5332, -0.0279,  1.5063],
        ...,
        [ 0.3803,  1.6994,  1.3711,  ...,  0.3903, -0.0183,  0.9212],
        [-1.0686,  0.1203, -1.7010,  ...,  0.7684, -0.0244,  0.5210],
        [ 1.0310, -0.2695, -1.4344,  ..., -0.4511,  0.6009, -1.5320]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.7128, -1.2352, -1.9564,  ..., -0.2179, -0.0957, -1.5366],
        [ 0.1461,  0.3279, -1.5847,  ...,  0.5445, -0.2801, -0.8176],
        [ 0.8866, -1.8660,  1.1344,  ...,  0.5336, -0.0279,  1.5063],
        ...,
        [ 0.3803,  1.7052,  1.3711,  ...,  0.3907, -0.0183,  0.9212],
        [-1.0686,  0.1285, -1.7010,  ...,  0.7684, -0.0244,  0.5205],
        [ 1.0310, -0.2690, -1.4344,  ..., -0.4509,  0.6009, -1.5321]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.7128, -1.2353, -1.9564,  ..., -0.2179, -0.0957, -1.5366],
        [ 0.1461,  0.3279, -1.5847,  ...,  0.5444, -0.2801, -0.8176],
        [ 0.8866, -1.8658,  1.1344,  ...,  0.5336, -0.0279,  1.5063],
        ...,
        [ 0.3803,  1.7053,  1.3711,  ...,  0.3907, -0.0183,  0.9212],
        [-1.0686,  0.1286, -1.7010,  ...,  0.7684, -0.0244,  0.5205],
        [ 1.0310, -0.2690, -1.4344,  ..., -0.4509,  0.6009, -1.5321]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.7128, -1.2381, -1.9564,  ..., -0.2179, -0.0957, -1.5366],
        [ 0.1461,  0.3278, -1.5847,  ...,  0.5440, -0.2801, -0.8176],
        [ 0.8866, -1.8641,  1.1344,  ...,  0.5336, -0.0279,  1.5063],
        ...,
        [ 0.3803,  1.7068,  1.3711,  ...,  0.3948, -0.0183,  0.9212],
        [-1.0686,  0.1296, -1.7010,  ...,  0.7684, -0.0244,  0.5205],
        [ 1.0310, -0.2693, -1.4344,  ..., -0.4552,  0.6009, -1.5321]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.7128, -1.2384, -1.9564,  ..., -0.2179, -0.0957, -1.5366],
        [ 0.1461,  0.3278, -1.5847,  ...,  0.5440, -0.2801, -0.8176],
        [ 0.8866, -1.8640,  1.1344,  ...,  0.5336, -0.0279,  1.5063],
        ...,
        [ 0.3803,  1.7069,  1.3711,  ...,  0.3950, -0.0183,  0.9212],
        [-1.0686,  0.1297, -1.7010,  ...,  0.7684, -0.0244,  0.5205],
        [ 1.0310, -0.2693, -1.4344,  ..., -0.4554,  0.6009, -1.5321]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.7128, -1.2402, -1.9564,  ..., -0.2179, -0.0957, -1.5366],
        [ 0.1461,  0.3277, -1.5847,  ...,  0.5445, -0.2801, -0.8176],
        [ 0.8866, -1.8629,  1.1344,  ...,  0.5336, -0.0279,  1.5063],
        ...,
        [ 0.3803,  1.7080,  1.3711,  ...,  0.3968, -0.0183,  0.9212],
        [-1.0686,  0.1318, -1.7010,  ...,  0.7677, -0.0244,  0.5205],
        [ 1.0310, -0.2700, -1.4344,  ..., -0.4570,  0.6009, -1.5321]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.7128, -1.2401, -1.9564,  ..., -0.2179, -0.0957, -1.5366],
        [ 0.1461,  0.3277, -1.5847,  ...,  0.5451, -0.2801, -0.8176],
        [ 0.8866, -1.8628,  1.1344,  ...,  0.5336, -0.0279,  1.5063],
        ...,
        [ 0.3803,  1.7080,  1.3711,  ...,  0.3972, -0.0183,  0.9212],
        [-1.0686,  0.1321, -1.7010,  ...,  0.7672, -0.0244,  0.5205],
        [ 1.0310, -0.2701, -1.4344,  ..., -0.4572,  0.6009, -1.5321]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.7128, -1.2381, -1.9564,  ..., -0.2178, -0.0957, -1.5366],
        [ 0.1461,  0.3277, -1.5847,  ...,  0.5498, -0.2801, -0.8176],
        [ 0.8866, -1.8597,  1.1344,  ...,  0.5336, -0.0279,  1.5063],
        ...,
        [ 0.3803,  1.7058,  1.3711,  ...,  0.4009, -0.0183,  0.9212],
        [-1.0686,  0.1363, -1.7010,  ...,  0.7625, -0.0244,  0.5205],
        [ 1.0310, -0.2711, -1.4344,  ..., -0.4581,  0.6009, -1.5321]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.7128, -1.2381, -1.9564,  ..., -0.2178, -0.0957, -1.5366],
        [ 0.1461,  0.3277, -1.5847,  ...,  0.5498, -0.2801, -0.8176],
        [ 0.8866, -1.8596,  1.1344,  ...,  0.5336, -0.0279,  1.5063],
        ...,
        [ 0.3803,  1.7058,  1.3711,  ...,  0.4010, -0.0183,  0.9212],
        [-1.0686,  0.1363, -1.7010,  ...,  0.7624, -0.0244,  0.5205],
        [ 1.0310, -0.2711, -1.4344,  ..., -0.4581,  0.6009, -1.5321]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.7128, -1.2360, -1.9564,  ..., -0.2182, -0.0957, -1.5366],
        [ 0.1461,  0.3283, -1.5847,  ...,  0.5503, -0.2801, -0.8176],
        [ 0.8866, -1.8590,  1.1344,  ...,  0.5336, -0.0279,  1.5063],
        ...,
        [ 0.3803,  1.7054,  1.3711,  ...,  0.4003, -0.0183,  0.9212],
        [-1.0686,  0.1365, -1.7010,  ...,  0.7622, -0.0244,  0.5205],
        [ 1.0310, -0.2714, -1.4344,  ..., -0.4578,  0.6009, -1.5321]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.7128, -1.2359, -1.9564,  ..., -0.2182, -0.0957, -1.5366],
        [ 0.1461,  0.3284, -1.5847,  ...,  0.5504, -0.2801, -0.8176],
        [ 0.8866, -1.8590,  1.1344,  ...,  0.5336, -0.0279,  1.5063],
        ...,
        [ 0.3803,  1.7054,  1.3711,  ...,  0.4003, -0.0183,  0.9212],
        [-1.0686,  0.1365, -1.7010,  ...,  0.7622, -0.0244,  0.5205],
        [ 1.0310, -0.2714, -1.4344,  ..., -0.4578,  0.6009, -1.5321]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.7128, -1.2360, -1.9564,  ..., -0.2184, -0.0957, -1.5366],
        [ 0.1461,  0.3287, -1.5847,  ...,  0.5507, -0.2801, -0.8176],
        [ 0.8866, -1.8582,  1.1344,  ...,  0.5340, -0.0279,  1.5063],
        ...,
        [ 0.3803,  1.7046,  1.3711,  ...,  0.3991, -0.0183,  0.9212],
        [-1.0686,  0.1366, -1.7010,  ...,  0.7623, -0.0244,  0.5205],
        [ 1.0310, -0.2717, -1.4344,  ..., -0.4591,  0.6009, -1.5321]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.7128, -1.2361, -1.9564,  ..., -0.2184, -0.0957, -1.5366],
        [ 0.1461,  0.3287, -1.5847,  ...,  0.5507, -0.2801, -0.8176],
        [ 0.8866, -1.8580,  1.1344,  ...,  0.5340, -0.0279,  1.5063],
        ...,
        [ 0.3803,  1.7045,  1.3711,  ...,  0.3989, -0.0183,  0.9212],
        [-1.0686,  0.1366, -1.7010,  ...,  0.7623, -0.0244,  0.5205],
        [ 1.0310, -0.2718, -1.4344,  ..., -0.4591,  0.6009, -1.5321]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.7128, -1.2376, -1.9564,  ..., -0.2184, -0.0957, -1.5366],
        [ 0.1461,  0.3283, -1.5847,  ...,  0.5511, -0.2801, -0.8176],
        [ 0.8866, -1.8581,  1.1344,  ...,  0.5341, -0.0279,  1.5063],
        ...,
        [ 0.3803,  1.7035,  1.3711,  ...,  0.3942, -0.0183,  0.9212],
        [-1.0686,  0.1367, -1.7010,  ...,  0.7623, -0.0244,  0.5205],
        [ 1.0310, -0.2726, -1.4344,  ..., -0.4596,  0.6009, -1.5321]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.7128, -1.2376, -1.9564,  ..., -0.2184, -0.0957, -1.5366],
        [ 0.1461,  0.3283, -1.5847,  ...,  0.5511, -0.2801, -0.8176],
        [ 0.8866, -1.8582,  1.1344,  ...,  0.5341, -0.0279,  1.5063],
        ...,
        [ 0.3803,  1.7035,  1.3711,  ...,  0.3941, -0.0183,  0.9212],
        [-1.0686,  0.1367, -1.7010,  ...,  0.7623, -0.0244,  0.5205],
        [ 1.0310, -0.2726, -1.4344,  ..., -0.4596,  0.6009, -1.5321]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.7128, -1.2380, -1.9564,  ..., -0.2184, -0.0957, -1.5366],
        [ 0.1461,  0.2466, -1.5847,  ...,  0.5514, -0.2801, -0.8176],
        [ 0.8866, -1.8574,  1.1344,  ...,  0.5341, -0.0284,  1.5063],
        ...,
        [ 0.3803,  1.7035,  1.3711,  ...,  0.3935, -0.0183,  0.9212],
        [-1.0686,  0.1368, -1.7010,  ...,  0.7623, -0.0243,  0.5205],
        [ 1.0310, -0.2726, -1.4344,  ..., -0.4597,  0.6009, -1.5321]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.7128, -1.2380, -1.9564,  ..., -0.2184, -0.0957, -1.5366],
        [ 0.1461,  0.2463, -1.5847,  ...,  0.5514, -0.2801, -0.8176],
        [ 0.8866, -1.8575,  1.1344,  ...,  0.5341, -0.0284,  1.5063],
        ...,
        [ 0.3803,  1.7035,  1.3711,  ...,  0.3935, -0.0183,  0.9212],
        [-1.0686,  0.1368, -1.7010,  ...,  0.7623, -0.0243,  0.5205],
        [ 1.0310, -0.2726, -1.4344,  ..., -0.4597,  0.6009, -1.5321]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.7128, -1.2379, -1.9564,  ..., -0.2184, -0.0957, -1.5366],
        [ 0.1461,  0.2441, -1.5847,  ...,  0.5514, -0.2801, -0.8176],
        [ 0.8866, -1.8576,  1.1344,  ...,  0.5341, -0.0284,  1.5063],
        ...,
        [ 0.3803,  1.7055,  1.3711,  ...,  0.3935, -0.0183,  0.9212],
        [-1.0686,  0.1368, -1.7010,  ...,  0.7623, -0.0243,  0.5205],
        [ 1.0310, -0.2733, -1.4344,  ..., -0.4598,  0.6009, -1.5321]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.7128, -1.2379, -1.9564,  ..., -0.2184, -0.0957, -1.5366],
        [ 0.1461,  0.2441, -1.5847,  ...,  0.5514, -0.2801, -0.8176],
        [ 0.8866, -1.8576,  1.1344,  ...,  0.5341, -0.0284,  1.5063],
        ...,
        [ 0.3803,  1.7055,  1.3711,  ...,  0.3935, -0.0183,  0.9212],
        [-1.0686,  0.1368, -1.7010,  ...,  0.7623, -0.0243,  0.5205],
        [ 1.0310, -0.2733, -1.4344,  ..., -0.4598,  0.6009, -1.5321]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.7128, -1.2412, -1.9564,  ..., -0.2184, -0.0957, -1.5366],
        [ 0.1461,  0.2437, -1.5847,  ...,  0.5514, -0.2801, -0.8176],
        [ 0.8866, -1.8572,  1.1344,  ...,  0.5341, -0.0284,  1.5063],
        ...,
        [ 0.3803,  1.7073,  1.3711,  ...,  0.3935, -0.0183,  0.9212],
        [-1.0686,  0.1362, -1.7010,  ...,  0.7623, -0.0243,  0.5205],
        [ 1.0310, -0.2734, -1.4344,  ..., -0.4598,  0.6009, -1.5321]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.7128, -1.2416, -1.9564,  ..., -0.2184, -0.0957, -1.5366],
        [ 0.1461,  0.2437, -1.5847,  ...,  0.5514, -0.2801, -0.8176],
        [ 0.8866, -1.8571,  1.1344,  ...,  0.5341, -0.0284,  1.5063],
        ...,
        [ 0.3803,  1.7074,  1.3711,  ...,  0.3935, -0.0183,  0.9212],
        [-1.0686,  0.1362, -1.7010,  ...,  0.7623, -0.0243,  0.5205],
        [ 1.0310, -0.2734, -1.4344,  ..., -0.4598,  0.6009, -1.5321]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.7128, -1.2440, -1.9564,  ..., -0.2183, -0.0957, -1.5366],
        [ 0.1461,  0.2437, -1.5847,  ...,  0.5514, -0.2801, -0.8176],
        [ 0.8866, -1.8566,  1.1344,  ...,  0.5333, -0.0284,  1.5063],
        ...,
        [ 0.3803,  1.7083,  1.3711,  ...,  0.3936, -0.0183,  0.9212],
        [-1.0686,  0.1360, -1.7010,  ...,  0.7623, -0.0243,  0.5205],
        [ 1.0310, -0.2734, -1.4344,  ..., -0.4596,  0.6009, -1.5321]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.7128, -1.2440, -1.9564,  ..., -0.2183, -0.0957, -1.5366],
        [ 0.1461,  0.2437, -1.5847,  ...,  0.5514, -0.2801, -0.8176],
        [ 0.8866, -1.8566,  1.1344,  ...,  0.5333, -0.0284,  1.5063],
        ...,
        [ 0.3803,  1.7083,  1.3711,  ...,  0.3936, -0.0183,  0.9212],
        [-1.0686,  0.1360, -1.7010,  ...,  0.7623, -0.0243,  0.5205],
        [ 1.0310, -0.2733, -1.4344,  ..., -0.4596,  0.6009, -1.5321]],
       device='cuda:0', requires_grad=True)


Parameter containing:
tensor([[ 0.7128, -1.2445, -1.9564,  ..., -0.2183, -0.0957, -1.5366],
        [ 0.1461,  0.2437, -1.5847,  ...,  0.5504, -0.2801, -0.8176],
        [ 0.8866, -1.8566,  1.1344,  ...,  0.5331, -0.0284,  1.5063],
        ...,
        [ 0.3803,  1.7085,  1.3711,  ...,  0.3936, -0.0183,  0.9212],
        [-1.0686,  0.1363, -1.7010,  ...,  0.7623, -0.0243,  0.5205],
        [ 1.0310, -0.2733, -1.4344,  ..., -0.4595,  0.6009, -1.5321]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[ 0.7128, -1.2445, -1.9564,  ..., -0.2183, -0.0957, -1.5366],
        [ 0.1461,  0.2437, -1.5847,  ...,  0.5503, -0.2801, -0.8176],
        [ 0.8866, -1.8566,  1.1344,  ...,  0.5331, -0.0284,  1.5063],
        ...,
        [ 0.3803,  1.7085,  1.3711,  ...,  0.3936, -0.0183,  0.9212],
        [-1.0686,  0.1363, -1.7010,  ...,  0.7623, -0.0243,  0.5205],
        [ 1.0310, -0.2733, -1.4344,  ..., -0.4595,  0.6009, -1.5321]],
       device='cuda:0', requires_grad=True)
