In [None]:
!pip install yfinance
!pip install transformers
!pip install wandb
!pip install beautifulsoup4


In [1]:
import yfinance as yf
import datetime
import numpy as np
import pandas as pd
from pylab import mpl, plt
import math, time
import itertools
from datetime import datetime
from operator import itemgetter
from tqdm import tqdm
from math import sqrt
import torch
import torch.nn as nn
from torch.autograd import Variable
from transformers import RobertaTokenizer, RobertaModel
import torch.nn.functional as F

In [2]:
from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'

In [3]:
import wandb
wandb.login()
wandb.init(project="stock_prediction")

[34m[1mwandb[0m: Currently logged in as: [33mvisriv[0m. Use [1m`wandb login --relogin`[0m to force relogin


### Hyperparams

In [4]:
no_of_days_to_lookforward = 1
no_of_days_to_lookback = 5
up_threshold = 0.015
down_threshold = -0.015
max_text_per_iter = 20
batch_size = 8
MAX_LEN = 10
num_epochs = 20

### Get stocks data for last N days

In [None]:
!mkdir data


In [5]:
stock_symbols = [ 'XOM']
no_of_days = 4*365

EXPORT_DATA_FOLDER = './data/'
# !mkdir data
# Set the start and end dates for the data
# here matching it with dates of news text available
train_start = datetime.strptime('2020/01/04', '%Y/%m/%d')
train_end = datetime.strptime('2022/09/30', '%Y/%m/%d')
test_start = datetime.strptime('2022/10/01', '%Y/%m/%d')
test_end = datetime.strptime('2023/01/04', '%Y/%m/%d')


# start = datetime.datetime.now() - datetime.timedelta(days=no_of_days)
# end = datetime.datetime.now()

# Get training data
for symbol in stock_symbols:
    # Download the historical price and volume data using yfinance
    train_data_raw = yf.download(symbol, start=train_start, end=train_end)

    # Normalize features by percent of changes between today and yesterday
    pct_change_open = train_data_raw['Open'].pct_change().fillna(0)
    pct_change_high = train_data_raw['High'].pct_change().fillna(0)
    pct_change_high_over_open = (train_data_raw['High']-train_data_raw['Open'])/train_data_raw['Open']
    pct_change_low = train_data_raw['Low'].pct_change().fillna(0)
    pct_change_low_over_open = (train_data_raw['Low']-train_data_raw['Open'])/train_data_raw['Open']
    pct_change_close = train_data_raw['Close'].pct_change().fillna(0)
    pct_change_close_over_open = (train_data_raw['Close']-train_data_raw['Open'])/train_data_raw['Open']
    pct_change_adjclose = train_data_raw['Adj Close'].pct_change().fillna(0)
    pct_change_adjclose_over_open = (train_data_raw['Adj Close']-train_data_raw['Open'])/train_data_raw['Open']
    pct_change_volume = train_data_raw['Volume'].pct_change().fillna(0)

    # Prepare labels: 2 means the close price of tomorow is higher than today's close price; 1 is down; 0 means the movement is between up_threshold and down_threshold
    label = np.where(pct_change_close > up_threshold, 2, np.where(pct_change_close < down_threshold, 1, 0))[1:]
    label = np.append(label, 0)

    # Construct a train_data_norm data frame
    train_data_norm = pd.DataFrame({'Open_norm':pct_change_open,
                              'High_norm':pct_change_high,
                              'Low_norm': pct_change_low,
                              'Close_norm':pct_change_close,
                              'Volume_norm':pct_change_volume,
                              'High-Open_norm':pct_change_high_over_open,
                              'Low-Open_norm':pct_change_low_over_open,
                              'Close-Open_norm':pct_change_close_over_open,
                              'Label_2up1down':label})

    # Normalize by min-max normalization after the pct normalization
    train_data_norm['Open_norm'] = train_data_norm['Open_norm'].apply(lambda x: (x - train_data_norm['Open_norm'].min()) / (train_data_norm['Open_norm'].max() - train_data_norm['Open_norm'].min()))
    train_data_norm['High_norm'] = train_data_norm['High_norm'].apply(lambda x: (x - train_data_norm['High_norm'].min()) / (train_data_norm['High_norm'].max() - train_data_norm['High_norm'].min()))
    train_data_norm['Low_norm'] = train_data_norm['Low_norm'].apply(lambda x: (x - train_data_norm['Low_norm'].min()) / (train_data_norm['Low_norm'].max() - train_data_norm['Low_norm'].min()))
    train_data_norm['Close_norm'] = train_data_norm['Close_norm'].apply(lambda x: (x - train_data_norm['Close_norm'].min()) / (train_data_norm['Close_norm'].max() - train_data_norm['Close_norm'].min()))
    train_data_norm['Volume_norm'] = train_data_norm['Volume_norm'].apply(lambda x: (x - train_data_norm['Volume_norm'].min()) / (train_data_norm['Volume_norm'].max() - train_data_norm['Volume_norm'].min()))
    train_data_norm['High-Open_norm'] = train_data_norm['High-Open_norm'].apply(lambda x: (x - train_data_norm['High-Open_norm'].min()) / (train_data_norm['High-Open_norm'].max() - train_data_norm['High-Open_norm'].min()))
    train_data_norm['Low-Open_norm'] = train_data_norm['Low-Open_norm'].apply(lambda x: (x - train_data_norm['Low-Open_norm'].min()) / (train_data_norm['Low-Open_norm'].max() - train_data_norm['Low-Open_norm'].min()))
    train_data_norm['Close-Open_norm'] = train_data_norm['Close-Open_norm'].apply(lambda x: (x - train_data_norm['Close-Open_norm'].min()) / (train_data_norm['Close-Open_norm'].max() - train_data_norm['Close-Open_norm'].min()))

    # Remove the first and the last row, becuase of NAN values
    train_data_raw = train_data_raw.iloc[1:-1]
    train_data_norm = train_data_norm.iloc[1:-1]

    train_data_raw.to_csv(EXPORT_DATA_FOLDER+symbol+'train_raw_data.csv', index=True)
    train_data_norm.to_csv(EXPORT_DATA_FOLDER+symbol+'train_norm_data.csv', index=True)


# Get test data
for symbol in stock_symbols:
    # Download the historical price and volume data using yfinance
    test_data_raw = yf.download(symbol, start=test_start, end=test_end)

    # Normalize features by percent of changes between today and yesterday
    pct_change_open = test_data_raw['Open'].pct_change().fillna(0)
    pct_change_high = test_data_raw['High'].pct_change().fillna(0)
    pct_change_high_over_open = (test_data_raw['High']-test_data_raw['Open'])/test_data_raw['Open']
    pct_change_low = test_data_raw['Low'].pct_change().fillna(0)
    pct_change_low_over_open = (test_data_raw['Low']-test_data_raw['Open'])/test_data_raw['Open']
    pct_change_close = test_data_raw['Close'].pct_change().fillna(0)
    pct_change_close_over_open = (test_data_raw['Close']-test_data_raw['Open'])/test_data_raw['Open']
    pct_change_adjclose = test_data_raw['Adj Close'].pct_change().fillna(0)
    pct_change_adjclose_over_open = (test_data_raw['Adj Close']-test_data_raw['Open'])/test_data_raw['Open']
    pct_change_volume = test_data_raw['Volume'].pct_change().fillna(0)

    # Prepare labels: 2 means the close price of tomorow is higher than today's close price; 1 is down; 0 means the movement is between up_threshold and down_threshold
    label = np.where(pct_change_close > up_threshold, 2, np.where(pct_change_close < down_threshold, 1, 0))[1:]
    label = np.append(label, 0)

    # Construct a test_data_norm data frame
    test_data_norm = pd.DataFrame({'Open_norm':pct_change_open,
                              'High_norm':pct_change_high,
                              'Low_norm': pct_change_low,
                              'Close_norm':pct_change_close,
                              'Volume_norm':pct_change_volume,
                              'High-Open_norm':pct_change_high_over_open,
                              'Low-Open_norm':pct_change_low_over_open,
                              'Close-Open_norm':pct_change_close_over_open,
                              'Label_2up1down':label})

    # Normalize by min-max normalization after the pct normalization
    test_data_norm['Open_norm'] = test_data_norm['Open_norm'].apply(lambda x: (x - test_data_norm['Open_norm'].min()) / (test_data_norm['Open_norm'].max() - test_data_norm['Open_norm'].min()))
    test_data_norm['High_norm'] = test_data_norm['High_norm'].apply(lambda x: (x - test_data_norm['High_norm'].min()) / (test_data_norm['High_norm'].max() - test_data_norm['High_norm'].min()))
    test_data_norm['Low_norm'] = test_data_norm['Low_norm'].apply(lambda x: (x - test_data_norm['Low_norm'].min()) / (test_data_norm['Low_norm'].max() - test_data_norm['Low_norm'].min()))
    test_data_norm['Close_norm'] = test_data_norm['Close_norm'].apply(lambda x: (x - test_data_norm['Close_norm'].min()) / (test_data_norm['Close_norm'].max() - test_data_norm['Close_norm'].min()))
    test_data_norm['Volume_norm'] = test_data_norm['Volume_norm'].apply(lambda x: (x - test_data_norm['Volume_norm'].min()) / (test_data_norm['Volume_norm'].max() - test_data_norm['Volume_norm'].min()))
    test_data_norm['High-Open_norm'] = test_data_norm['High-Open_norm'].apply(lambda x: (x - test_data_norm['High-Open_norm'].min()) / (test_data_norm['High-Open_norm'].max() - test_data_norm['High-Open_norm'].min()))
    test_data_norm['Low-Open_norm'] = test_data_norm['Low-Open_norm'].apply(lambda x: (x - test_data_norm['Low-Open_norm'].min()) / (test_data_norm['Low-Open_norm'].max() - test_data_norm['Low-Open_norm'].min()))
    test_data_norm['Close-Open_norm'] = test_data_norm['Close-Open_norm'].apply(lambda x: (x - test_data_norm['Close-Open_norm'].min()) / (test_data_norm['Close-Open_norm'].max() - test_data_norm['Close-Open_norm'].min()))

    # Remove the first and the last row, becuase of NAN values
    test_data_raw = test_data_raw.iloc[1:-1]
    test_data_norm = test_data_norm.iloc[1:-1]

    test_data_raw.to_csv(EXPORT_DATA_FOLDER+symbol+'test_raw_data.csv', index=True)
    test_data_norm.to_csv(EXPORT_DATA_FOLDER+symbol+'test_norm_data.csv', index=True)


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


## TODO

In [None]:
'''
(2023-06-05)
cuda support check
//read textual data into correct shape
hyperparam tuning: number of neurons: tune to right number of neurons in FC in model
//max_text_per_iter -> code in dataloader to maintain the size

(2023-06-07)
cuda check
roberta encoder fix
multi label - how to create target label?

'''

## Prep textual data

### Crawl textual news data from internet

In [None]:
# Import libraries
from bs4 import BeautifulSoup
from urllib.request import urlopen
from urllib.request import Request
from nltk.sentiment.vader import SentimentIntensityAnalyzer

# Parameters
n = 3 #the # of article headlines displayed per ticker
tickers = ['AAPL', 'TSLA', 'AMZN']



# Get Data
finviz_url = 'https://finviz.com/quote.ashx?t='
news_tables = {}

for ticker in tickers:
    url = finviz_url + ticker
    req = Request(url=url,
                  headers={'user-agent': 'Mozilla/5.0',
                                   'referer': 'https://...'})
    resp = urlopen(req)
    html = BeautifulSoup(resp, features="lxml")
    news_table = html.find(id='news-table')
    news_tables[ticker] = news_table

try:
    for ticker in tickers:
        df = news_tables[ticker]
        df_tr = df.findAll('tr')

        print ('\n')
        print ('Recent News Headlines for {}: '.format(ticker))

        for i, table_row in enumerate(df_tr):
            a_text = table_row.a.text
            td_text = table_row.td.text
            td_text = td_text.strip()
            print(a_text,'(',td_text,')')
            if i == n-1:
                break
except KeyError:
    pass


# Iterate through the news
parsed_news = []
for file_name, news_table in news_tables.items():
    for x in news_table.findAll('tr'):
        text = x.a.get_text()
        date_scrape = x.td.text.split()

        if len(date_scrape) == 1:
            time = date_scrape[0]

        else:
            date = date_scrape[0]
            time = date_scrape[1]

        ticker = file_name.split('_')[0]

        parsed_news.append([ticker, date, time, text])



### Read downloaded data from saved files

In [6]:
text_data_df = pd.read_csv('./data/XOM_20200401_20230401_medium.csv',
                           sep= ',',
                           header= 0,
                           engine='python',
                           on_bad_lines = 'skip')
text_data_df = text_data_df[['Date', 'News']]


text_data_df = text_data_df.groupby('Date')['News'].apply('$$$###'.join)

text_data_df.index = pd.to_datetime(text_data_df.index, dayfirst=True)
# text_data_df


In [7]:
all_train_df = train_data_norm.join(text_data_df, how = 'inner')
all_test_df = test_data_norm.join(text_data_df, how = 'inner')

print(all_train_df.index.min())
print(all_train_df.index.max())
print(all_test_df.index.min())
print(all_test_df.index.max())

2020-04-01 00:00:00
2022-09-28 00:00:00
2022-10-04 00:00:00
2022-12-30 00:00:00


### Merge textual and numerical data


In [8]:
all_train = all_train_df.values

window_size = no_of_days_to_lookback

X_numerical_train = []
y_train = []
X_text_train = []
X_text_train_curr = []





for i in range(window_size, len(all_train) - no_of_days_to_lookforward + 1):
    X_numerical_train.append(all_train[i-window_size: i, :-2])

    # split and append sequence of text
    curr_seq = all_train[i-window_size: i, -1]
    for j in range(window_size):
        split_curr_seq = curr_seq[window_size - 1 -j].split('$$$###')
        X_text_train_curr = X_text_train_curr + split_curr_seq

    if len(X_text_train_curr) > max_text_per_iter:
        X_text_train_curr = X_text_train_curr[:max_text_per_iter]

    X_text_train.append(X_text_train_curr)

    # target labels
    y_train.append(all_train[i:i+no_of_days_to_lookforward, -2])

X_numerical_train, y_train = np.array(X_numerical_train).astype(np.float16), np.array(y_train).astype(np.int32)
print(type(X_numerical_train))
print(type(y_train))

X_numerical_train = torch.from_numpy(X_numerical_train).type(torch.Tensor)
y_train = torch.from_numpy(y_train).long()

print(len(X_numerical_train))
print(len(X_text_train))
print(len(y_train))
print(X_numerical_train.shape)

print(len(X_text_train))
print(len(X_text_train[2]))
# print(X_text_train[2])

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
624
624
624
torch.Size([624, 5, 8])
624
20


In [9]:
all_test = all_test_df.values


X_numerical_test = []
y_test = []
X_text_test = []
X_text_test_curr = []

for i in range(window_size, len(all_test) - no_of_days_to_lookforward + 1):
    X_numerical_test.append(all_test[i-window_size: i, :-2])

    # split and append sequence of text (in reverse order to add the latest news first)
    curr_seq = all_test[i-window_size: i, -1]
    for j in range(window_size):
        split_curr_seq = curr_seq[window_size - 1 -j].split('$$$###')
        X_text_test_curr = X_text_test_curr + split_curr_seq

    if len(X_text_test_curr) > max_text_per_iter:
        X_text_test_curr = X_text_test_curr[:max_text_per_iter]

    X_text_test.append(X_text_test_curr)

    # target labels
    y_test.append(all_test[i:i+no_of_days_to_lookforward, -2])

X_numerical_test, y_test = np.array(X_numerical_test).astype(np.float16), np.array(y_test).astype(np.int32)
print(type(X_numerical_test))
print(type(y_test))

X_numerical_test = torch.from_numpy(X_numerical_test).type(torch.Tensor)
y_test = torch.from_numpy(y_test).long()

print(len(X_numerical_test))
print(len(X_text_test))
print(len(y_test))
print(X_numerical_test.shape)

print(len(X_text_test))
print(len(X_text_test[2]))
# print(X_text_test[2])

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
57
57
57
torch.Size([57, 5, 8])
57
20


## Data loader

In [10]:
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

tokenizer = RobertaTokenizer.from_pretrained('roberta-large', truncation=True, do_lower_case=True)

class SiameseDataloader(Dataset):

    def __init__(self, X_numerical_train, y_train, X_text_train, MAX_LEN, tokenizer):
        self.X_numerical_train = X_numerical_train
        self.X_text_train = X_text_train
        self.MAX_LEN = MAX_LEN
        self.tokenizer = tokenizer

    def __getitem__(self, index):


        input_ids = []
        attention_masks = []
        token_type_ids = []
        input_seq = []

        for sent in X_text_train[index]:
            encoded_sent = self.tokenizer.encode_plus(
                text=sent,
                add_special_tokens=True,        # Add `[CLS]` and `[SEP]` special tokens
                max_length=self.MAX_LEN,             # Choose max length to truncate/pad
                pad_to_max_length=True,         # Pad sentence to max length
                #return_attention_mask=True      # Return attention mask
                return_token_type_ids=True
                )
            input_ids.append(encoded_sent.get('input_ids'))
            attention_masks.append(encoded_sent.get('attention_mask'))
            token_type_ids.append(encoded_sent.get('token_type_ids'))

        # Convert lists to tensors
        input_ids = torch.tensor(input_ids)
        attention_masks = torch.tensor(attention_masks)
        token_type_ids = torch.tensor(token_type_ids)


        return {
            'x_numerical': X_numerical_train[index],
            'ids': torch.tensor(input_ids, dtype=torch.long),
            'mask': torch.tensor(attention_masks, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'targets': torch.tensor(y_train[index], dtype=torch.long)
        }





    def __len__(self):
        return len(self.X_numerical_train)

In [11]:
train_set = SiameseDataloader(X_numerical_train, y_train, X_text_train, MAX_LEN, tokenizer)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)


test_set = SiameseDataloader(X_numerical_test, y_test, X_text_test, MAX_LEN, tokenizer)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True)


## Build model


In [12]:



class SiameseModel(nn.Module):
    def __init__(self, input_dim1, input_dim2,
                 hidden_dim1, hidden_dim2, hidden_dim3, hidden_dim4,
                 num_layers1, num_layers2, output_dim1, output_dim2):
        super(SiameseModel, self).__init__()
        self.input_dim1 = input_dim1
        self.input_dim2 = input_dim2
        self.hidden_dim1 = hidden_dim1
        self.hidden_dim2 = hidden_dim2
        self.hidden_dim3 = hidden_dim3
        self.hidden_dim4 = hidden_dim4
        self.num_layers1 = num_layers1
        self.num_layers2 = num_layers2
        self.output_dim1 = output_dim1
        self.output_dim2 = output_dim2



        self.roberta = RobertaModel.from_pretrained("roberta-large").to(device)


        self.lstm1 = nn.LSTM(input_dim1, hidden_dim1, num_layers1, batch_first=True)
        self.lstm2 = nn.LSTM(input_dim2, hidden_dim2, num_layers2, batch_first=True)


        self.fc1 = nn.Linear(hidden_dim1, output_dim1)
        self.fc2 = nn.Linear(hidden_dim2, output_dim2)
        self.fc3 = nn.Linear(output_dim1+output_dim2, hidden_dim3)
        self.fc4 = nn.Linear(hidden_dim3, hidden_dim4)
        self.fc5 = nn.Linear(hidden_dim4, 3)



    def forward(self, x1, ids, masks, token_type_ids):
        #left tower with numerical features
        h_10 = Variable(torch.zeros(self.num_layers1, x1.size(0), self.hidden_dim1)).to(device)
        c_10 = Variable(torch.zeros(self.num_layers1, x1.size(0), self.hidden_dim1)).to(device)
        ula1, (h_out1, _) = self.lstm1(x1, (h_10, c_10))
        h_out1 = h_out1.view(-1, self.hidden_dim1)
        out1 = self.fc1(h_out1)




        # right tower with roberta on textual features
        #TODO
        batch_size_here = ids.shape[0]
        e2 = torch.zeros(batch_size_here, max_text_per_iter,1024).to(device)

        for k in range(ids.shape[1]):
            seq_ids = ids[:,k,:]
            seq_masks = masks[:,k,:]
            seq_token_type_ids = token_type_ids[:,k,:]


            e2k = self.roberta(input_ids= seq_ids, attention_mask=seq_masks, token_type_ids=seq_token_type_ids)
            # print(e2.shape)
            # print(e2k[1].shape)
            #first 0 is for last_hidden_state: https://huggingface.co/docs/transformers/main/en/model_doc/roberta#transformers.RobertaModel.forward.returns:~:text=transformers.modeling_outputs.-,BaseModelOutputWithPoolingAndCrossAttentions%20or%20tuple(torch.FloatTensor),-A%20transformers.modeling_outputs
            # the shape of e2k[0] is (batch_size, sequence_length (<=MAX_LEN), hidden_size (=1024))
            e2k1 = e2k[0][:, 0, :]
            e2[:,k,:] = e2k1


        print(e2.shape)
        h_20 = Variable(torch.zeros(self.num_layers2, e2.size(0), self.hidden_dim2)).to(device)
        c_20 = Variable(torch.zeros(self.num_layers2, e2.size(0), self.hidden_dim2)).to(device)
        ula2, (h_out2, _) = self.lstm2(e2, (h_20, c_20))
        h_out2 = h_out2.view(-1, self.hidden_dim2)
        out2 = self.fc2(h_out2)




        # siamese merging layers

        output = torch.cat((out1, out2),1)
        output = F.relu(self.fc3(output))
        output = F.relu(self.fc4(output))
        output = self.fc5(output)
        return output

#TODO : correct these values
model = SiameseModel(input_dim1 = 8, input_dim2 = 1024,
                 hidden_dim1 = 20, hidden_dim2 = 768, hidden_dim3 = 128, hidden_dim4 = 64,
                 num_layers1 = 1, num_layers2 = 1, output_dim1 = 10, output_dim2 = 256).to(device)






Some weights of the model checkpoint at roberta-large were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
print(model)
print(len(list(model.parameters())))
# for i in range(len(list(model.parameters()))):
#     print(list(model.parameters())[i].size())


## Train model

In [13]:
criterion = nn.CrossEntropyLoss()
optimiser = torch.optim.Adam(model.parameters(), lr=0.01)
loss_arr = np.zeros(num_epochs)

In [None]:
roberta = RobertaModel.from_pretrained("roberta-large").to(device)
train_loss_record = []
for epoch in range(num_epochs):
    train_loss = []
    train_loss_sum = []

    for idx, data in tqdm(enumerate(train_loader, 0)):
        x_numerical = data['x_numerical'].to(device, dtype = torch.float)
        ids = data['ids'].to(device, dtype = torch.long)
        masks = data['mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        targets = data['targets'].to(device, dtype = torch.long)

        # debugging roberta encoder and second lstm
        '''
        debug starts here
        '''
        # if idx > 1:
        #     break
        # batch_size_here = data['ids'].shape[0]
        # print('batch_size_here:', batch_size_here)
        # e2 = torch.zeros(batch_size_here, max_text_per_iter, 1024)
        # print('ids shape:', ids.shape)

        # for k in range(ids.shape[1]):  #number of sentences in sequence = max_text_per_iter
        #     print('k:', k)
        #     seq_ids = ids[:,k,:].to(device)
        #     seq_masks = masks[:,k,:].to(device)
        #     seq_token_type_ids = token_type_ids[:,k,:].to(device)


        #     e2k = roberta(input_ids= seq_ids, attention_mask=seq_masks, token_type_ids=seq_token_type_ids)
        #     print(e2.shape)
        #     print(e2k[1].shape)
        #     #first 0 is for last_hidden_state: https://huggingface.co/docs/transformers/main/en/model_doc/roberta#transformers.RobertaModel.forward.returns:~:text=transformers.modeling_outputs.-,BaseModelOutputWithPoolingAndCrossAttentions%20or%20tuple(torch.FloatTensor),-A%20transformers.modeling_outputs
        #     # the shape of e2k[0] is (batch_size, sequence_length (<=MAX_LEN), hidden_size (=1024))
        #     e2k1 = e2k[0][:, 0, :]
        #     e2[:,k,:] = e2k1




        # lstm2 = nn.LSTM(1024, 768, 1, batch_first=True)
        # fc2 = nn.Linear(768, 256)

        # h_20 = Variable(torch.zeros(1, e2.size(0), 768))
        # c_20 = Variable(torch.zeros(1, e2.size(0), 768))
        # ula2, (h_out2, _) = lstm2(e2, (h_20, c_20))
        # h_out2 = h_out2.view(-1, 768)
        # out2 = fc2(h_out2)


    #     print(ids.shape)
    #     print(masks.shape)
    #     print(token_type_ids.shape)

        # print(out2)



        '''
        debug ends here
        '''



        y_pred = model(x_numerical, ids, masks, token_type_ids)
        print('y_pred:', y_pred)
        loss = criterion(y_pred, targets.reshape(-1))

         # Zero out gradient, else they will accumulate between epochs
        optimiser.zero_grad()

        # Backward pass
        loss.backward()

        # Update parameters
        optimiser.step()



        train_loss.append(loss.data.cpu())
        train_loss_sum.append(loss.data.cpu())

        if epoch % 10 == 0 and epoch !=0:
            print("Epoch ", epoch, "CELoss: ", loss.item())

        wandb.log({'avg train loss in this batch': loss.item(), 'epoch': epoch, 'batch_id': idx})

    # Record at every epoch
    print('Train Loss at epoch {}: {}\n'.format(epoch, np.mean(train_loss_sum)))
    train_loss_record.append(np.mean(train_loss_sum))
    wandb.log({'avg train loss in this epoch': np.mean(train_loss_sum), 'epoch': epoch})






    # evaluate on test set every epoch
    test_loss = []
    test_loss_sum = []

    correct = 0
    total = 0

    for idx, data in tqdm(enumerate(test_loader, 0)):
        test_x_numerical = data['x_numerical'].to(device, dtype = torch.float)
        test_ids = data['ids'].to(device, dtype = torch.long)
        test_masks = data['mask'].to(device, dtype = torch.long)
        test_token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        test_targets = data['targets'].to(device, dtype = torch.long)

        y_pred = model(test_x_numerical, test_ids, test_masks, test_token_type_ids)
        _, pred_label = torch.max(y_pred.data, 1)

#         print('y_pred:', y_pred)
        tloss = criterion(y_pred, test_targets.reshape(-1))

        test_loss.append(tloss.data.cpu())
        test_loss_sum.append(tloss.data.cpu())

        wandb.log({'avg test loss in this batch': tloss.item(), 'epoch': epoch, 'batch_id': idx})

        # Get accuracy
        total += test_targets.reshape(-1).size(0)
        correct += (pred_label == test_targets.reshape(-1)).sum()
    accuracy = 100 * correct / total

    # Record at every epoch
    print('test Loss at epoch {}: {}\n'.format(epoch, np.mean(test_loss_sum)))
    wandb.log({'avg test loss in this epoch': np.mean(test_loss_sum), 'epoch': epoch})
    wandb.log({'test accuracy in this epoch': accuracy, 'epoch': epoch})











Some weights of the model checkpoint at roberta-large were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
0it [00:00, ?it/s]Truncation w

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.0044, -0.0489,  0.0496],
        [ 0.0043, -0.0489,  0.0497],
        [ 0.0044, -0.0489,  0.0497],
        [ 0.0044, -0.0490,  0.0498],
        [ 0.0043, -0.0489,  0.0497],
        [ 0.0044, -0.0489,  0.0497],
        [ 0.0044, -0.0489,  0.0496],
        [ 0.0043, -0.0489,  0.0497]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


1it [00:02,  2.34s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.0421, -0.0545,  0.0067],
        [ 0.0420, -0.0546,  0.0068],
        [ 0.0418, -0.0547,  0.0067],
        [ 0.0421, -0.0546,  0.0068],
        [ 0.0420, -0.0545,  0.0068],
        [ 0.0420, -0.0545,  0.0064],
        [ 0.0419, -0.0546,  0.0067],
        [ 0.0421, -0.0545,  0.0068]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


2it [00:04,  2.05s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 3.4769, -1.2000, -5.1874],
        [ 3.4767, -1.1998, -5.1869],
        [ 3.4768, -1.1999, -5.1872],
        [ 3.4768, -1.1997, -5.1869],
        [ 3.4766, -1.1997, -5.1867],
        [ 3.4768, -1.1998, -5.1871],
        [ 3.4770, -1.2001, -5.1877],
        [ 3.4769, -1.1998, -5.1872]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


3it [00:06,  1.95s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 3.8845, -1.7154, -1.7949],
        [ 3.8841, -1.7153, -1.7946],
        [ 3.8838, -1.7152, -1.7944],
        [ 3.8840, -1.7153, -1.7945],
        [ 3.8838, -1.7151, -1.7944],
        [ 3.8839, -1.7153, -1.7944],
        [ 3.8837, -1.7152, -1.7943],
        [ 3.8840, -1.7152, -1.7945]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


4it [00:07,  1.91s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[-1.1956,  0.5113,  1.8031],
        [-1.1950,  0.5111,  1.8027],
        [-1.1952,  0.5111,  1.8028],
        [-1.1957,  0.5114,  1.8033],
        [-1.1953,  0.5113,  1.8030],
        [-1.1957,  0.5114,  1.8034],
        [-1.1955,  0.5113,  1.8032],
        [-1.1956,  0.5114,  1.8033]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


5it [00:09,  1.98s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 1.4402,  0.7155, -0.7561],
        [ 1.4399,  0.7156, -0.7559],
        [ 1.4401,  0.7153, -0.7563],
        [ 1.4400,  0.7156, -0.7559],
        [ 1.4401,  0.7153, -0.7564],
        [ 1.4399,  0.7155, -0.7560],
        [ 1.4400,  0.7155, -0.7560],
        [ 1.4401,  0.7155, -0.7561]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


6it [00:12,  2.07s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[-1.7185,  2.2898,  1.3356],
        [-1.7184,  2.2897,  1.3356],
        [-1.7184,  2.2897,  1.3357],
        [-1.7185,  2.2897,  1.3357],
        [-1.7184,  2.2896,  1.3356],
        [-1.7184,  2.2898,  1.3356],
        [-1.7184,  2.2898,  1.3356],
        [-1.7185,  2.2897,  1.3356]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


7it [00:14,  2.00s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 1.5138,  1.2904, -1.0896],
        [ 1.5139,  1.2904, -1.0897],
        [ 1.5139,  1.2904, -1.0898],
        [ 1.5139,  1.2904, -1.0897],
        [ 1.5137,  1.2903, -1.0897],
        [ 1.5139,  1.2904, -1.0896],
        [ 1.5138,  1.2903, -1.0896],
        [ 1.5139,  1.2904, -1.0897]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


8it [00:15,  1.95s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 3.6284,  1.6337, -2.6436],
        [ 3.6284,  1.6337, -2.6437],
        [ 3.6284,  1.6337, -2.6437],
        [ 3.6283,  1.6336, -2.6436],
        [ 3.6284,  1.6337, -2.6438],
        [ 3.6284,  1.6337, -2.6438],
        [ 3.6284,  1.6337, -2.6438],
        [ 3.6284,  1.6337, -2.6437]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


9it [00:17,  1.93s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 4.8340,  1.3244, -1.9982],
        [ 4.8339,  1.3243, -1.9980],
        [ 4.8341,  1.3244, -1.9983],
        [ 4.8340,  1.3244, -1.9982],
        [ 4.8341,  1.3244, -1.9983],
        [ 4.8340,  1.3244, -1.9983],
        [ 4.8340,  1.3244, -1.9983],
        [ 4.8340,  1.3244, -1.9982]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


10it [00:19,  1.90s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 4.0562,  1.0708, -0.1114],
        [ 4.0562,  1.0708, -0.1113],
        [ 4.0561,  1.0708, -0.1113],
        [ 4.0561,  1.0707, -0.1113],
        [ 4.0562,  1.0709, -0.1114],
        [ 4.0562,  1.0709, -0.1114],
        [ 4.0561,  1.0707, -0.1113],
        [ 4.0561,  1.0708, -0.1113]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


11it [00:21,  1.95s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[2.2909, 0.8884, 1.4914],
        [2.2909, 0.8885, 1.4913],
        [2.2910, 0.8885, 1.4913],
        [2.2909, 0.8885, 1.4913],
        [2.2909, 0.8885, 1.4913],
        [2.2909, 0.8885, 1.4913],
        [2.2909, 0.8884, 1.4913],
        [2.2909, 0.8885, 1.4914]], device='cuda:0', grad_fn=<AddmmBackward0>)


12it [00:23,  2.00s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[1.5959, 0.9468, 1.4014],
        [1.5959, 0.9467, 1.4014],
        [1.5959, 0.9467, 1.4014],
        [1.5959, 0.9468, 1.4013],
        [1.5959, 0.9468, 1.4014],
        [1.5959, 0.9468, 1.4014],
        [1.5959, 0.9468, 1.4014],
        [1.5959, 0.9468, 1.4014]], device='cuda:0', grad_fn=<AddmmBackward0>)


13it [00:26,  2.09s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[1.1257, 0.7217, 1.1601],
        [1.1257, 0.7217, 1.1601],
        [1.1257, 0.7217, 1.1601],
        [1.1257, 0.7217, 1.1601],
        [1.1257, 0.7217, 1.1601],
        [1.1257, 0.7217, 1.1601],
        [1.1257, 0.7217, 1.1601],
        [1.1257, 0.7217, 1.1601]], device='cuda:0', grad_fn=<AddmmBackward0>)


14it [00:28,  2.03s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.9859, 0.7508, 0.5140],
        [0.9859, 0.7508, 0.5140],
        [0.9859, 0.7508, 0.5140],
        [0.9859, 0.7508, 0.5140],
        [0.9859, 0.7508, 0.5140],
        [0.9859, 0.7508, 0.5140],
        [0.9859, 0.7508, 0.5140],
        [0.9859, 0.7508, 0.5140]], device='cuda:0', grad_fn=<AddmmBackward0>)


15it [00:29,  1.99s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.8466, 0.6302, 0.1241],
        [0.8466, 0.6302, 0.1241],
        [0.8466, 0.6302, 0.1241],
        [0.8466, 0.6302, 0.1241],
        [0.8466, 0.6302, 0.1241],
        [0.8466, 0.6302, 0.1241],
        [0.8466, 0.6302, 0.1241],
        [0.8467, 0.6302, 0.1241]], device='cuda:0', grad_fn=<AddmmBackward0>)


16it [00:31,  1.96s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.7731,  0.2869, -0.4626],
        [ 0.7731,  0.2869, -0.4626],
        [ 0.7731,  0.2868, -0.4626],
        [ 0.7731,  0.2869, -0.4626],
        [ 0.7731,  0.2868, -0.4627],
        [ 0.7731,  0.2869, -0.4627],
        [ 0.7731,  0.2869, -0.4626],
        [ 0.7731,  0.2869, -0.4626]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


17it [00:33,  1.93s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.8188, -1.0256, -1.2428],
        [ 0.8188, -1.0256, -1.2427],
        [ 0.8188, -1.0256, -1.2427],
        [ 0.8188, -1.0256, -1.2427],
        [ 0.8188, -1.0256, -1.2427],
        [ 0.8188, -1.0256, -1.2427],
        [ 0.8188, -1.0256, -1.2427],
        [ 0.8188, -1.0256, -1.2427]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


18it [00:35,  1.92s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.7147, -0.4064, -1.2696],
        [ 0.7148, -0.4063, -1.2696],
        [ 0.7147, -0.4063, -1.2696],
        [ 0.7147, -0.4063, -1.2696],
        [ 0.7148, -0.4063, -1.2696],
        [ 0.7148, -0.4063, -1.2697],
        [ 0.7147, -0.4063, -1.2696],
        [ 0.7148, -0.4063, -1.2697]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


19it [00:37,  1.94s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.7189, -0.4096, -0.5353],
        [ 0.7189, -0.4096, -0.5353],
        [ 0.7188, -0.4095, -0.5352],
        [ 0.7188, -0.4095, -0.5352],
        [ 0.7189, -0.4095, -0.5353],
        [ 0.7188, -0.4096, -0.5353],
        [ 0.7189, -0.4095, -0.5353],
        [ 0.7189, -0.4096, -0.5353]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


20it [00:39,  2.01s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5404, -0.2596,  0.3696],
        [ 0.5404, -0.2596,  0.3696],
        [ 0.5404, -0.2596,  0.3695],
        [ 0.5404, -0.2596,  0.3696],
        [ 0.5404, -0.2596,  0.3695],
        [ 0.5404, -0.2596,  0.3695],
        [ 0.5404, -0.2596,  0.3695],
        [ 0.5404, -0.2596,  0.3695]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


21it [00:42,  2.10s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.6428, -0.4828,  0.6069],
        [ 0.6428, -0.4829,  0.6069],
        [ 0.6428, -0.4828,  0.6069],
        [ 0.6428, -0.4829,  0.6069],
        [ 0.6428, -0.4828,  0.6069],
        [ 0.6428, -0.4828,  0.6069],
        [ 0.6428, -0.4828,  0.6069],
        [ 0.6428, -0.4829,  0.6069]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


22it [00:43,  2.03s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5171, -0.2001,  0.5896],
        [ 0.5171, -0.2001,  0.5896],
        [ 0.5171, -0.2001,  0.5896],
        [ 0.5171, -0.2000,  0.5896],
        [ 0.5171, -0.2001,  0.5896],
        [ 0.5171, -0.2000,  0.5896],
        [ 0.5171, -0.2001,  0.5896],
        [ 0.5171, -0.2000,  0.5896]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


23it [00:45,  1.99s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4469, -0.0028,  0.5109],
        [ 0.4469, -0.0028,  0.5109],
        [ 0.4469, -0.0029,  0.5109],
        [ 0.4469, -0.0028,  0.5109],
        [ 0.4469, -0.0029,  0.5109],
        [ 0.4469, -0.0029,  0.5109],
        [ 0.4469, -0.0029,  0.5109],
        [ 0.4469, -0.0029,  0.5109]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


24it [00:47,  1.97s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.4344, 0.0094, 0.5277],
        [0.4344, 0.0095, 0.5277],
        [0.4344, 0.0094, 0.5277],
        [0.4344, 0.0094, 0.5277],
        [0.4344, 0.0094, 0.5277],
        [0.4344, 0.0095, 0.5277],
        [0.4344, 0.0094, 0.5277],
        [0.4344, 0.0095, 0.5277]], device='cuda:0', grad_fn=<AddmmBackward0>)


25it [00:49,  1.95s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3958, -0.1334,  0.6564],
        [ 0.3958, -0.1334,  0.6564],
        [ 0.3958, -0.1334,  0.6564],
        [ 0.3958, -0.1334,  0.6564],
        [ 0.3958, -0.1334,  0.6564],
        [ 0.3958, -0.1334,  0.6564],
        [ 0.3958, -0.1334,  0.6564],
        [ 0.3958, -0.1334,  0.6565]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


26it [00:51,  1.93s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4494, -0.2855,  0.7184],
        [ 0.4494, -0.2855,  0.7184],
        [ 0.4494, -0.2855,  0.7184],
        [ 0.4494, -0.2855,  0.7184],
        [ 0.4494, -0.2855,  0.7184],
        [ 0.4494, -0.2855,  0.7184],
        [ 0.4494, -0.2855,  0.7184],
        [ 0.4494, -0.2855,  0.7184]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


27it [00:53,  2.00s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5604, -0.2298,  0.6092],
        [ 0.5604, -0.2298,  0.6091],
        [ 0.5604, -0.2298,  0.6092],
        [ 0.5604, -0.2298,  0.6092],
        [ 0.5604, -0.2298,  0.6092],
        [ 0.5604, -0.2298,  0.6092],
        [ 0.5604, -0.2298,  0.6091],
        [ 0.5604, -0.2298,  0.6092]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


28it [00:55,  2.10s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.6982, -0.4026,  0.6831],
        [ 0.6983, -0.4026,  0.6831],
        [ 0.6983, -0.4026,  0.6831],
        [ 0.6982, -0.4026,  0.6831],
        [ 0.6983, -0.4026,  0.6831],
        [ 0.6983, -0.4026,  0.6831],
        [ 0.6983, -0.4026,  0.6831],
        [ 0.6983, -0.4026,  0.6831]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


29it [00:57,  2.03s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 1.0073, -0.4722,  0.7399],
        [ 1.0073, -0.4721,  0.7398],
        [ 1.0073, -0.4721,  0.7399],
        [ 1.0073, -0.4721,  0.7398],
        [ 1.0073, -0.4721,  0.7399],
        [ 1.0073, -0.4722,  0.7398],
        [ 1.0073, -0.4722,  0.7398],
        [ 1.0073, -0.4722,  0.7399]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


30it [00:59,  1.98s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 1.4437, -0.4867,  0.7554],
        [ 1.4437, -0.4867,  0.7554],
        [ 1.4437, -0.4867,  0.7554],
        [ 1.4438, -0.4867,  0.7554],
        [ 1.4437, -0.4867,  0.7554],
        [ 1.4437, -0.4867,  0.7554],
        [ 1.4437, -0.4867,  0.7554],
        [ 1.4437, -0.4867,  0.7554]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


31it [01:01,  1.95s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 1.4878, -0.5428,  0.7054],
        [ 1.4878, -0.5428,  0.7053],
        [ 1.4878, -0.5428,  0.7054],
        [ 1.4878, -0.5428,  0.7054],
        [ 1.4878, -0.5428,  0.7054],
        [ 1.4878, -0.5428,  0.7054],
        [ 1.4878, -0.5428,  0.7054],
        [ 1.4878, -0.5428,  0.7054]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


32it [01:03,  1.92s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 1.3763, -0.7646,  0.6884],
        [ 1.3763, -0.7647,  0.6884],
        [ 1.3763, -0.7646,  0.6884],
        [ 1.3763, -0.7647,  0.6884],
        [ 1.3763, -0.7647,  0.6884],
        [ 1.3763, -0.7647,  0.6884],
        [ 1.3763, -0.7646,  0.6884],
        [ 1.3763, -0.7646,  0.6884]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


33it [01:05,  1.92s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.9601, -0.5790,  0.5170],
        [ 0.9601, -0.5790,  0.5170],
        [ 0.9601, -0.5790,  0.5170],
        [ 0.9600, -0.5790,  0.5170],
        [ 0.9600, -0.5790,  0.5170],
        [ 0.9600, -0.5790,  0.5170],
        [ 0.9600, -0.5790,  0.5170],
        [ 0.9601, -0.5790,  0.5170]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


34it [01:09,  2.64s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.7893, -0.3736,  0.4033],
        [ 0.7893, -0.3737,  0.4033],
        [ 0.7893, -0.3736,  0.4033],
        [ 0.7893, -0.3736,  0.4033],
        [ 0.7892, -0.3736,  0.4033],
        [ 0.7892, -0.3736,  0.4033],
        [ 0.7893, -0.3736,  0.4033],
        [ 0.7893, -0.3736,  0.4033]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


35it [01:11,  2.53s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.4819, 0.0524, 0.4053],
        [0.4819, 0.0524, 0.4053],
        [0.4819, 0.0524, 0.4053],
        [0.4819, 0.0525, 0.4054],
        [0.4819, 0.0524, 0.4053],
        [0.4819, 0.0524, 0.4053],
        [0.4819, 0.0524, 0.4053],
        [0.4819, 0.0524, 0.4053]], device='cuda:0', grad_fn=<AddmmBackward0>)


36it [01:13,  2.33s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.3992, 0.5446, 0.5412],
        [0.3992, 0.5446, 0.5412],
        [0.3992, 0.5446, 0.5412],
        [0.3992, 0.5446, 0.5412],
        [0.3992, 0.5446, 0.5412],
        [0.3992, 0.5446, 0.5412],
        [0.3992, 0.5446, 0.5412],
        [0.3992, 0.5446, 0.5412]], device='cuda:0', grad_fn=<AddmmBackward0>)


37it [01:15,  2.19s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.5430, 0.7146, 0.4689],
        [0.5430, 0.7147, 0.4689],
        [0.5430, 0.7146, 0.4689],
        [0.5430, 0.7146, 0.4689],
        [0.5430, 0.7146, 0.4688],
        [0.5430, 0.7146, 0.4689],
        [0.5430, 0.7147, 0.4689],
        [0.5430, 0.7146, 0.4689]], device='cuda:0', grad_fn=<AddmmBackward0>)


38it [01:17,  2.09s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.6250, 0.8685, 0.4336],
        [0.6250, 0.8685, 0.4336],
        [0.6250, 0.8685, 0.4336],
        [0.6250, 0.8685, 0.4336],
        [0.6250, 0.8685, 0.4336],
        [0.6250, 0.8685, 0.4336],
        [0.6250, 0.8685, 0.4336],
        [0.6250, 0.8685, 0.4336]], device='cuda:0', grad_fn=<AddmmBackward0>)


39it [01:19,  2.01s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.5997, 0.6962, 0.3377],
        [0.5997, 0.6962, 0.3377],
        [0.5997, 0.6962, 0.3377],
        [0.5997, 0.6962, 0.3377],
        [0.5997, 0.6962, 0.3377],
        [0.5997, 0.6962, 0.3377],
        [0.5997, 0.6962, 0.3377],
        [0.5997, 0.6962, 0.3377]], device='cuda:0', grad_fn=<AddmmBackward0>)


40it [01:21,  1.97s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.7893, 0.8632, 0.3595],
        [0.7892, 0.8632, 0.3595],
        [0.7893, 0.8632, 0.3595],
        [0.7893, 0.8632, 0.3595],
        [0.7893, 0.8632, 0.3595],
        [0.7893, 0.8632, 0.3595],
        [0.7893, 0.8632, 0.3595],
        [0.7893, 0.8632, 0.3595]], device='cuda:0', grad_fn=<AddmmBackward0>)


41it [01:23,  2.02s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.6643, 0.6430, 0.2646],
        [0.6643, 0.6430, 0.2646],
        [0.6643, 0.6430, 0.2646],
        [0.6643, 0.6430, 0.2646],
        [0.6643, 0.6430, 0.2646],
        [0.6643, 0.6430, 0.2646],
        [0.6643, 0.6430, 0.2646],
        [0.6643, 0.6430, 0.2646]], device='cuda:0', grad_fn=<AddmmBackward0>)


42it [01:25,  2.09s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.3819, 0.2463, 0.1017],
        [0.3819, 0.2463, 0.1017],
        [0.3819, 0.2463, 0.1017],
        [0.3819, 0.2463, 0.1017],
        [0.3819, 0.2463, 0.1017],
        [0.3819, 0.2463, 0.1017],
        [0.3819, 0.2463, 0.1017],
        [0.3819, 0.2463, 0.1017]], device='cuda:0', grad_fn=<AddmmBackward0>)


43it [01:27,  2.01s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.3031, 0.0959, 0.0218],
        [0.3031, 0.0959, 0.0218],
        [0.3031, 0.0959, 0.0218],
        [0.3031, 0.0959, 0.0218],
        [0.3031, 0.0959, 0.0218],
        [0.3031, 0.0959, 0.0218],
        [0.3031, 0.0959, 0.0218],
        [0.3031, 0.0959, 0.0218]], device='cuda:0', grad_fn=<AddmmBackward0>)


44it [01:29,  1.96s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.5475, 0.3321, 0.1013],
        [0.5475, 0.3321, 0.1013],
        [0.5475, 0.3321, 0.1013],
        [0.5475, 0.3321, 0.1013],
        [0.5475, 0.3321, 0.1013],
        [0.5475, 0.3321, 0.1013],
        [0.5474, 0.3321, 0.1013],
        [0.5474, 0.3321, 0.1013]], device='cuda:0', grad_fn=<AddmmBackward0>)


45it [01:31,  1.92s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.9288, 0.5886, 0.1586],
        [0.9288, 0.5886, 0.1586],
        [0.9288, 0.5886, 0.1586],
        [0.9288, 0.5886, 0.1586],
        [0.9288, 0.5886, 0.1586],
        [0.9288, 0.5886, 0.1586],
        [0.9288, 0.5886, 0.1586],
        [0.9287, 0.5886, 0.1586]], device='cuda:0', grad_fn=<AddmmBackward0>)


46it [01:32,  1.90s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[1.3582, 0.8261, 0.2040],
        [1.3582, 0.8260, 0.2040],
        [1.3582, 0.8260, 0.2040],
        [1.3582, 0.8260, 0.2040],
        [1.3582, 0.8260, 0.2040],
        [1.3582, 0.8260, 0.2040],
        [1.3582, 0.8260, 0.2040],
        [1.3582, 0.8260, 0.2040]], device='cuda:0', grad_fn=<AddmmBackward0>)


47it [01:34,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[1.7148, 0.9164, 0.1908],
        [1.7148, 0.9164, 0.1908],
        [1.7148, 0.9164, 0.1908],
        [1.7148, 0.9164, 0.1908],
        [1.7148, 0.9164, 0.1908],
        [1.7148, 0.9164, 0.1908],
        [1.7148, 0.9164, 0.1908],
        [1.7148, 0.9164, 0.1908]], device='cuda:0', grad_fn=<AddmmBackward0>)


48it [01:36,  1.87s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[1.6055, 0.8406, 0.1296],
        [1.6055, 0.8406, 0.1296],
        [1.6055, 0.8406, 0.1296],
        [1.6055, 0.8406, 0.1296],
        [1.6055, 0.8406, 0.1296],
        [1.6055, 0.8406, 0.1296],
        [1.6055, 0.8406, 0.1296],
        [1.6055, 0.8406, 0.1296]], device='cuda:0', grad_fn=<AddmmBackward0>)


49it [01:38,  1.96s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[1.2963, 0.6593, 0.1437],
        [1.2964, 0.6593, 0.1437],
        [1.2964, 0.6594, 0.1437],
        [1.2964, 0.6593, 0.1437],
        [1.2964, 0.6594, 0.1437],
        [1.2964, 0.6594, 0.1437],
        [1.2964, 0.6594, 0.1437],
        [1.2964, 0.6594, 0.1437]], device='cuda:0', grad_fn=<AddmmBackward0>)


50it [01:41,  2.05s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[1.0520, 0.5260, 0.1525],
        [1.0520, 0.5260, 0.1525],
        [1.0519, 0.5259, 0.1525],
        [1.0520, 0.5260, 0.1525],
        [1.0520, 0.5260, 0.1525],
        [1.0520, 0.5260, 0.1525],
        [1.0520, 0.5260, 0.1525],
        [1.0520, 0.5260, 0.1525]], device='cuda:0', grad_fn=<AddmmBackward0>)


51it [01:42,  1.99s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.7427, 0.3602, 0.1396],
        [0.7427, 0.3602, 0.1396],
        [0.7427, 0.3602, 0.1396],
        [0.7427, 0.3602, 0.1396],
        [0.7427, 0.3602, 0.1396],
        [0.7427, 0.3602, 0.1396],
        [0.7427, 0.3602, 0.1396],
        [0.7427, 0.3602, 0.1396]], device='cuda:0', grad_fn=<AddmmBackward0>)


52it [01:44,  1.94s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.5545, 0.2476, 0.1253],
        [0.5545, 0.2476, 0.1253],
        [0.5545, 0.2476, 0.1253],
        [0.5545, 0.2476, 0.1253],
        [0.5545, 0.2476, 0.1253],
        [0.5545, 0.2476, 0.1253],
        [0.5545, 0.2476, 0.1253],
        [0.5545, 0.2476, 0.1253]], device='cuda:0', grad_fn=<AddmmBackward0>)


53it [01:46,  1.91s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.4453, 0.1793, 0.1150],
        [0.4453, 0.1793, 0.1150],
        [0.4453, 0.1793, 0.1150],
        [0.4453, 0.1793, 0.1150],
        [0.4453, 0.1793, 0.1150],
        [0.4453, 0.1793, 0.1150],
        [0.4453, 0.1793, 0.1150],
        [0.4453, 0.1793, 0.1150]], device='cuda:0', grad_fn=<AddmmBackward0>)


54it [01:48,  1.89s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.3717, 0.1259, 0.1069],
        [0.3717, 0.1259, 0.1068],
        [0.3717, 0.1259, 0.1068],
        [0.3717, 0.1259, 0.1069],
        [0.3717, 0.1259, 0.1069],
        [0.3717, 0.1259, 0.1068],
        [0.3717, 0.1259, 0.1069],
        [0.3717, 0.1259, 0.1068]], device='cuda:0', grad_fn=<AddmmBackward0>)


55it [01:50,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.2748, 0.0611, 0.0864],
        [0.2748, 0.0611, 0.0864],
        [0.2748, 0.0611, 0.0864],
        [0.2748, 0.0611, 0.0864],
        [0.2748, 0.0611, 0.0864],
        [0.2748, 0.0611, 0.0864],
        [0.2748, 0.0611, 0.0864],
        [0.2748, 0.0611, 0.0864]], device='cuda:0', grad_fn=<AddmmBackward0>)


56it [01:52,  1.94s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.2325, 0.0292, 0.0768],
        [0.2325, 0.0292, 0.0768],
        [0.2325, 0.0292, 0.0768],
        [0.2325, 0.0292, 0.0768],
        [0.2325, 0.0292, 0.0768],
        [0.2325, 0.0292, 0.0768],
        [0.2325, 0.0292, 0.0768],
        [0.2325, 0.0292, 0.0768]], device='cuda:0', grad_fn=<AddmmBackward0>)


57it [01:54,  2.04s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.2342, 0.0228, 0.0807],
        [0.2342, 0.0228, 0.0807],
        [0.2342, 0.0228, 0.0807],
        [0.2341, 0.0228, 0.0807],
        [0.2341, 0.0228, 0.0807],
        [0.2341, 0.0228, 0.0807],
        [0.2341, 0.0228, 0.0807],
        [0.2342, 0.0228, 0.0807]], device='cuda:0', grad_fn=<AddmmBackward0>)


58it [01:56,  2.02s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.2717, 0.0354, 0.0983],
        [0.2717, 0.0354, 0.0983],
        [0.2717, 0.0354, 0.0983],
        [0.2717, 0.0354, 0.0983],
        [0.2717, 0.0354, 0.0983],
        [0.2717, 0.0354, 0.0983],
        [0.2717, 0.0354, 0.0983],
        [0.2717, 0.0354, 0.0983]], device='cuda:0', grad_fn=<AddmmBackward0>)


59it [01:58,  1.97s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.3245, 0.0562, 0.1219],
        [0.3245, 0.0562, 0.1219],
        [0.3245, 0.0562, 0.1219],
        [0.3245, 0.0562, 0.1219],
        [0.3245, 0.0562, 0.1219],
        [0.3245, 0.0562, 0.1219],
        [0.3245, 0.0562, 0.1219],
        [0.3245, 0.0562, 0.1219]], device='cuda:0', grad_fn=<AddmmBackward0>)


60it [02:00,  1.93s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.4020, 0.0874, 0.1552],
        [0.4020, 0.0874, 0.1552],
        [0.4020, 0.0874, 0.1552],
        [0.4020, 0.0874, 0.1552],
        [0.4020, 0.0874, 0.1552],
        [0.4020, 0.0874, 0.1552],
        [0.4020, 0.0874, 0.1552],
        [0.4020, 0.0874, 0.1552]], device='cuda:0', grad_fn=<AddmmBackward0>)


61it [02:02,  1.91s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.4909, 0.1239, 0.1913],
        [0.4909, 0.1239, 0.1913],
        [0.4909, 0.1239, 0.1913],
        [0.4909, 0.1239, 0.1913],
        [0.4909, 0.1239, 0.1913],
        [0.4909, 0.1239, 0.1913],
        [0.4909, 0.1239, 0.1913],
        [0.4909, 0.1239, 0.1913]], device='cuda:0', grad_fn=<AddmmBackward0>)


62it [02:04,  1.89s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.6072, 0.1662, 0.2373],
        [0.6072, 0.1662, 0.2373],
        [0.6072, 0.1662, 0.2373],
        [0.6072, 0.1662, 0.2373],
        [0.6072, 0.1662, 0.2373],
        [0.6072, 0.1662, 0.2373],
        [0.6072, 0.1662, 0.2373],
        [0.6072, 0.1662, 0.2373]], device='cuda:0', grad_fn=<AddmmBackward0>)


63it [02:05,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.7256, 0.1997, 0.2858],
        [0.7256, 0.1997, 0.2858],
        [0.7256, 0.1997, 0.2858],
        [0.7256, 0.1997, 0.2858],
        [0.7256, 0.1997, 0.2858],
        [0.7256, 0.1997, 0.2858],
        [0.7256, 0.1997, 0.2858],
        [0.7256, 0.1997, 0.2858]], device='cuda:0', grad_fn=<AddmmBackward0>)


64it [02:08,  1.95s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.7947, 0.2111, 0.3248],
        [0.7947, 0.2111, 0.3248],
        [0.7947, 0.2111, 0.3248],
        [0.7947, 0.2111, 0.3248],
        [0.7947, 0.2111, 0.3248],
        [0.7947, 0.2111, 0.3248],
        [0.7947, 0.2111, 0.3248],
        [0.7947, 0.2111, 0.3248]], device='cuda:0', grad_fn=<AddmmBackward0>)


65it [02:10,  2.06s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.8665, 0.2216, 0.3632],
        [0.8665, 0.2216, 0.3632],
        [0.8665, 0.2216, 0.3632],
        [0.8665, 0.2216, 0.3632],
        [0.8665, 0.2216, 0.3632],
        [0.8665, 0.2216, 0.3632],
        [0.8665, 0.2216, 0.3632],
        [0.8665, 0.2216, 0.3632]], device='cuda:0', grad_fn=<AddmmBackward0>)


66it [02:12,  1.99s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.9172, 0.2091, 0.4147],
        [0.9172, 0.2090, 0.4147],
        [0.9172, 0.2091, 0.4147],
        [0.9173, 0.2091, 0.4147],
        [0.9173, 0.2091, 0.4147],
        [0.9173, 0.2091, 0.4147],
        [0.9173, 0.2091, 0.4147],
        [0.9173, 0.2091, 0.4147]], device='cuda:0', grad_fn=<AddmmBackward0>)


67it [02:14,  1.95s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.9890, 0.2115, 0.4537],
        [0.9890, 0.2114, 0.4537],
        [0.9890, 0.2115, 0.4537],
        [0.9890, 0.2115, 0.4537],
        [0.9890, 0.2114, 0.4537],
        [0.9890, 0.2115, 0.4537],
        [0.9890, 0.2115, 0.4537],
        [0.9890, 0.2115, 0.4537]], device='cuda:0', grad_fn=<AddmmBackward0>)


68it [02:15,  1.92s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[1.0868, 0.2212, 0.4895],
        [1.0868, 0.2212, 0.4895],
        [1.0868, 0.2212, 0.4895],
        [1.0868, 0.2212, 0.4895],
        [1.0868, 0.2212, 0.4895],
        [1.0868, 0.2212, 0.4895],
        [1.0868, 0.2212, 0.4895],
        [1.0868, 0.2212, 0.4895]], device='cuda:0', grad_fn=<AddmmBackward0>)


69it [02:17,  1.90s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[1.2091, 0.2385, 0.5187],
        [1.2091, 0.2385, 0.5187],
        [1.2090, 0.2385, 0.5187],
        [1.2090, 0.2385, 0.5187],
        [1.2090, 0.2385, 0.5187],
        [1.2090, 0.2385, 0.5187],
        [1.2090, 0.2385, 0.5187],
        [1.2090, 0.2385, 0.5187]], device='cuda:0', grad_fn=<AddmmBackward0>)


70it [02:19,  1.89s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[1.4048, 0.2605, 0.5551],
        [1.4048, 0.2605, 0.5551],
        [1.4048, 0.2605, 0.5551],
        [1.4048, 0.2605, 0.5551],
        [1.4048, 0.2605, 0.5551],
        [1.4048, 0.2605, 0.5551],
        [1.4048, 0.2605, 0.5551],
        [1.4048, 0.2605, 0.5551]], device='cuda:0', grad_fn=<AddmmBackward0>)


71it [02:21,  1.94s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[1.5033, 0.2696, 0.5726],
        [1.5033, 0.2696, 0.5726],
        [1.5033, 0.2696, 0.5726],
        [1.5033, 0.2696, 0.5726],
        [1.5033, 0.2696, 0.5726],
        [1.5033, 0.2696, 0.5726],
        [1.5033, 0.2696, 0.5725],
        [1.5033, 0.2696, 0.5726]], device='cuda:0', grad_fn=<AddmmBackward0>)


72it [02:23,  2.03s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[1.7412, 0.2755, 0.6160],
        [1.7412, 0.2755, 0.6160],
        [1.7412, 0.2755, 0.6160],
        [1.7412, 0.2755, 0.6160],
        [1.7412, 0.2755, 0.6160],
        [1.7412, 0.2755, 0.6160],
        [1.7412, 0.2755, 0.6160],
        [1.7412, 0.2755, 0.6160]], device='cuda:0', grad_fn=<AddmmBackward0>)


73it [02:25,  2.05s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[1.8912, 0.2565, 0.6612],
        [1.8912, 0.2565, 0.6612],
        [1.8912, 0.2565, 0.6612],
        [1.8912, 0.2565, 0.6612],
        [1.8912, 0.2565, 0.6612],
        [1.8912, 0.2565, 0.6612],
        [1.8912, 0.2565, 0.6612],
        [1.8912, 0.2565, 0.6612]], device='cuda:0', grad_fn=<AddmmBackward0>)


74it [02:27,  1.99s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[1.7725, 0.1970, 0.6967],
        [1.7725, 0.1970, 0.6967],
        [1.7726, 0.1970, 0.6967],
        [1.7725, 0.1970, 0.6967],
        [1.7726, 0.1970, 0.6967],
        [1.7725, 0.1970, 0.6967],
        [1.7725, 0.1970, 0.6967],
        [1.7725, 0.1970, 0.6967]], device='cuda:0', grad_fn=<AddmmBackward0>)


75it [02:29,  1.96s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[1.4654, 0.1622, 0.6641],
        [1.4654, 0.1622, 0.6641],
        [1.4654, 0.1622, 0.6641],
        [1.4654, 0.1622, 0.6641],
        [1.4654, 0.1622, 0.6641],
        [1.4654, 0.1622, 0.6641],
        [1.4654, 0.1622, 0.6641],
        [1.4654, 0.1622, 0.6641]], device='cuda:0', grad_fn=<AddmmBackward0>)


76it [02:31,  1.93s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[1.2041, 0.1240, 0.6266],
        [1.2041, 0.1240, 0.6266],
        [1.2041, 0.1240, 0.6266],
        [1.2041, 0.1240, 0.6266],
        [1.2041, 0.1240, 0.6266],
        [1.2041, 0.1240, 0.6266],
        [1.2041, 0.1240, 0.6266],
        [1.2041, 0.1240, 0.6266]], device='cuda:0', grad_fn=<AddmmBackward0>)


77it [02:33,  1.92s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.9948, 0.0836, 0.5877],
        [0.9948, 0.0836, 0.5877],
        [0.9948, 0.0836, 0.5878],
        [0.9948, 0.0836, 0.5878],
        [0.9948, 0.0836, 0.5878],
        [0.9948, 0.0836, 0.5878],
        [0.9948, 0.0836, 0.5878],
        [0.9948, 0.0836, 0.5877]], device='cuda:0', grad_fn=<AddmmBackward0>)


78it [02:35,  1.99s/it]


Train Loss at epoch 0: 1.133993148803711



1it [00:00,  1.15it/s]

torch.Size([8, 20, 1024])


2it [00:02,  1.14s/it]

torch.Size([8, 20, 1024])


3it [00:03,  1.12s/it]

torch.Size([8, 20, 1024])


4it [00:04,  1.03s/it]

torch.Size([8, 20, 1024])


5it [00:04,  1.09it/s]

torch.Size([8, 20, 1024])


6it [00:05,  1.20it/s]

torch.Size([8, 20, 1024])


7it [00:06,  1.26it/s]

torch.Size([8, 20, 1024])


8it [00:06,  1.16it/s]


torch.Size([1, 20, 1024])
test Loss at epoch 0: 1.1873648166656494



0it [00:00, ?it/s]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.8053, 0.0585, 0.4996],
        [0.8053, 0.0585, 0.4996],
        [0.8053, 0.0585, 0.4996],
        [0.8053, 0.0585, 0.4996],
        [0.8053, 0.0585, 0.4996],
        [0.8053, 0.0585, 0.4996],
        [0.8053, 0.0585, 0.4996],
        [0.8053, 0.0585, 0.4996]], device='cuda:0', grad_fn=<AddmmBackward0>)


1it [00:01,  1.90s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.6702, 0.0264, 0.4427],
        [0.6702, 0.0264, 0.4427],
        [0.6702, 0.0264, 0.4426],
        [0.6702, 0.0264, 0.4426],
        [0.6702, 0.0264, 0.4427],
        [0.6702, 0.0264, 0.4427],
        [0.6702, 0.0264, 0.4427],
        [0.6702, 0.0264, 0.4427]], device='cuda:0', grad_fn=<AddmmBackward0>)


2it [00:03,  1.87s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.5860, 0.0160, 0.3829],
        [0.5860, 0.0160, 0.3829],
        [0.5860, 0.0160, 0.3829],
        [0.5859, 0.0160, 0.3828],
        [0.5860, 0.0160, 0.3829],
        [0.5860, 0.0160, 0.3829],
        [0.5860, 0.0160, 0.3829],
        [0.5860, 0.0160, 0.3829]], device='cuda:0', grad_fn=<AddmmBackward0>)


3it [00:05,  1.86s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.5479, 0.0062, 0.3609],
        [0.5479, 0.0062, 0.3609],
        [0.5479, 0.0062, 0.3609],
        [0.5479, 0.0062, 0.3609],
        [0.5479, 0.0062, 0.3609],
        [0.5479, 0.0062, 0.3609],
        [0.5479, 0.0062, 0.3609],
        [0.5479, 0.0062, 0.3609]], device='cuda:0', grad_fn=<AddmmBackward0>)


4it [00:07,  1.86s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.5521, 0.0012, 0.3769],
        [0.5521, 0.0012, 0.3769],
        [0.5521, 0.0012, 0.3769],
        [0.5521, 0.0012, 0.3769],
        [0.5521, 0.0012, 0.3769],
        [0.5521, 0.0012, 0.3769],
        [0.5521, 0.0012, 0.3769],
        [0.5521, 0.0012, 0.3769]], device='cuda:0', grad_fn=<AddmmBackward0>)


5it [00:09,  1.96s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5595, -0.0032,  0.3863],
        [ 0.5595, -0.0032,  0.3863],
        [ 0.5595, -0.0032,  0.3863],
        [ 0.5595, -0.0032,  0.3863],
        [ 0.5595, -0.0032,  0.3863],
        [ 0.5595, -0.0032,  0.3863],
        [ 0.5595, -0.0032,  0.3863],
        [ 0.5595, -0.0032,  0.3863]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


6it [00:11,  2.06s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5690, -0.0073,  0.3886],
        [ 0.5690, -0.0073,  0.3886],
        [ 0.5690, -0.0073,  0.3886],
        [ 0.5690, -0.0073,  0.3886],
        [ 0.5690, -0.0073,  0.3886],
        [ 0.5690, -0.0073,  0.3886],
        [ 0.5690, -0.0073,  0.3886],
        [ 0.5690, -0.0073,  0.3886]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


7it [00:13,  1.99s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5566, -0.0121,  0.3694],
        [ 0.5566, -0.0121,  0.3694],
        [ 0.5566, -0.0121,  0.3694],
        [ 0.5566, -0.0121,  0.3694],
        [ 0.5566, -0.0121,  0.3694],
        [ 0.5566, -0.0121,  0.3694],
        [ 0.5566, -0.0121,  0.3694],
        [ 0.5566, -0.0121,  0.3694]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


8it [00:15,  1.94s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5652, -0.0154,  0.3648],
        [ 0.5652, -0.0154,  0.3648],
        [ 0.5652, -0.0154,  0.3648],
        [ 0.5652, -0.0154,  0.3648],
        [ 0.5652, -0.0154,  0.3648],
        [ 0.5652, -0.0154,  0.3648],
        [ 0.5652, -0.0154,  0.3648],
        [ 0.5652, -0.0154,  0.3648]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


9it [00:17,  1.91s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5936, -0.0176,  0.3736],
        [ 0.5936, -0.0176,  0.3736],
        [ 0.5936, -0.0176,  0.3736],
        [ 0.5936, -0.0176,  0.3736],
        [ 0.5936, -0.0176,  0.3736],
        [ 0.5936, -0.0176,  0.3736],
        [ 0.5936, -0.0176,  0.3736],
        [ 0.5936, -0.0176,  0.3736]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


10it [00:19,  1.89s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.6314, -0.0204,  0.3963],
        [ 0.6314, -0.0204,  0.3963],
        [ 0.6314, -0.0204,  0.3963],
        [ 0.6314, -0.0204,  0.3963],
        [ 0.6314, -0.0204,  0.3963],
        [ 0.6314, -0.0204,  0.3963],
        [ 0.6314, -0.0204,  0.3963],
        [ 0.6314, -0.0204,  0.3963]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


11it [00:21,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.6766, -0.0014,  0.4332],
        [ 0.6766, -0.0014,  0.4331],
        [ 0.6766, -0.0014,  0.4331],
        [ 0.6766, -0.0014,  0.4331],
        [ 0.6766, -0.0014,  0.4331],
        [ 0.6766, -0.0014,  0.4331],
        [ 0.6766, -0.0014,  0.4331],
        [ 0.6766, -0.0014,  0.4331]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


12it [00:22,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.7117, -0.0234,  0.4283],
        [ 0.7117, -0.0234,  0.4283],
        [ 0.7117, -0.0234,  0.4283],
        [ 0.7117, -0.0234,  0.4283],
        [ 0.7117, -0.0234,  0.4283],
        [ 0.7117, -0.0234,  0.4283],
        [ 0.7117, -0.0234,  0.4283],
        [ 0.7117, -0.0234,  0.4283]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


13it [00:25,  1.94s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.7553, -0.0225,  0.4386],
        [ 0.7553, -0.0225,  0.4386],
        [ 0.7553, -0.0225,  0.4386],
        [ 0.7553, -0.0225,  0.4386],
        [ 0.7553, -0.0225,  0.4386],
        [ 0.7553, -0.0225,  0.4386],
        [ 0.7553, -0.0225,  0.4386],
        [ 0.7553, -0.0225,  0.4386]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


14it [00:27,  2.04s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.7375, -0.0172,  0.3988],
        [ 0.7375, -0.0172,  0.3988],
        [ 0.7375, -0.0172,  0.3988],
        [ 0.7375, -0.0172,  0.3988],
        [ 0.7375, -0.0172,  0.3988],
        [ 0.7375, -0.0172,  0.3988],
        [ 0.7375, -0.0172,  0.3988],
        [ 0.7375, -0.0172,  0.3988]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


15it [00:29,  1.98s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.7732, 0.0416, 0.4520],
        [0.7732, 0.0416, 0.4520],
        [0.7732, 0.0416, 0.4520],
        [0.7732, 0.0416, 0.4520],
        [0.7732, 0.0416, 0.4520],
        [0.7732, 0.0416, 0.4520],
        [0.7732, 0.0416, 0.4520],
        [0.7732, 0.0416, 0.4520]], device='cuda:0', grad_fn=<AddmmBackward0>)


16it [00:31,  2.00s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.7202, 0.0559, 0.4050],
        [0.7202, 0.0559, 0.4050],
        [0.7202, 0.0559, 0.4050],
        [0.7202, 0.0559, 0.4050],
        [0.7201, 0.0559, 0.4050],
        [0.7202, 0.0559, 0.4050],
        [0.7202, 0.0559, 0.4050],
        [0.7202, 0.0559, 0.4050]], device='cuda:0', grad_fn=<AddmmBackward0>)


17it [00:33,  1.95s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.6368, 0.0034, 0.2835],
        [0.6368, 0.0034, 0.2835],
        [0.6368, 0.0034, 0.2835],
        [0.6368, 0.0034, 0.2835],
        [0.6368, 0.0034, 0.2835],
        [0.6368, 0.0034, 0.2835],
        [0.6368, 0.0034, 0.2835],
        [0.6368, 0.0034, 0.2835]], device='cuda:0', grad_fn=<AddmmBackward0>)


18it [00:34,  1.93s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5837, -0.0212,  0.2092],
        [ 0.5837, -0.0212,  0.2092],
        [ 0.5837, -0.0212,  0.2092],
        [ 0.5837, -0.0212,  0.2092],
        [ 0.5837, -0.0212,  0.2092],
        [ 0.5837, -0.0212,  0.2092],
        [ 0.5837, -0.0212,  0.2092],
        [ 0.5837, -0.0212,  0.2092]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


19it [00:36,  1.90s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5139, -0.0279,  0.1460],
        [ 0.5139, -0.0279,  0.1460],
        [ 0.5139, -0.0279,  0.1460],
        [ 0.5139, -0.0279,  0.1460],
        [ 0.5139, -0.0279,  0.1460],
        [ 0.5139, -0.0279,  0.1460],
        [ 0.5139, -0.0279,  0.1460],
        [ 0.5139, -0.0279,  0.1460]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


20it [00:38,  1.95s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4588, -0.0350,  0.0969],
        [ 0.4588, -0.0350,  0.0969],
        [ 0.4588, -0.0350,  0.0969],
        [ 0.4588, -0.0350,  0.0969],
        [ 0.4588, -0.0350,  0.0969],
        [ 0.4588, -0.0350,  0.0969],
        [ 0.4588, -0.0350,  0.0969],
        [ 0.4588, -0.0350,  0.0969]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


21it [00:41,  2.04s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4290, -0.0389,  0.0675],
        [ 0.4290, -0.0389,  0.0675],
        [ 0.4290, -0.0389,  0.0675],
        [ 0.4290, -0.0389,  0.0675],
        [ 0.4290, -0.0389,  0.0675],
        [ 0.4290, -0.0389,  0.0675],
        [ 0.4290, -0.0389,  0.0675],
        [ 0.4290, -0.0389,  0.0675]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


22it [00:43,  2.02s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4401, -0.0335,  0.0609],
        [ 0.4401, -0.0335,  0.0609],
        [ 0.4401, -0.0335,  0.0609],
        [ 0.4401, -0.0335,  0.0609],
        [ 0.4401, -0.0335,  0.0609],
        [ 0.4401, -0.0335,  0.0609],
        [ 0.4401, -0.0335,  0.0609],
        [ 0.4401, -0.0335,  0.0609]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


23it [00:44,  1.96s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4979, -0.0133,  0.0739],
        [ 0.4979, -0.0133,  0.0739],
        [ 0.4979, -0.0133,  0.0739],
        [ 0.4979, -0.0133,  0.0739],
        [ 0.4979, -0.0133,  0.0739],
        [ 0.4979, -0.0133,  0.0739],
        [ 0.4979, -0.0133,  0.0739],
        [ 0.4979, -0.0133,  0.0739]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


24it [00:46,  1.93s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.5820, 0.0153, 0.0943],
        [0.5820, 0.0153, 0.0943],
        [0.5820, 0.0153, 0.0943],
        [0.5820, 0.0153, 0.0943],
        [0.5820, 0.0153, 0.0943],
        [0.5820, 0.0153, 0.0943],
        [0.5820, 0.0153, 0.0943],
        [0.5820, 0.0153, 0.0943]], device='cuda:0', grad_fn=<AddmmBackward0>)


25it [00:48,  1.90s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.6201, 0.0295, 0.0974],
        [0.6201, 0.0295, 0.0974],
        [0.6201, 0.0295, 0.0974],
        [0.6201, 0.0295, 0.0974],
        [0.6201, 0.0295, 0.0974],
        [0.6201, 0.0295, 0.0974],
        [0.6201, 0.0295, 0.0974],
        [0.6201, 0.0295, 0.0974]], device='cuda:0', grad_fn=<AddmmBackward0>)


26it [00:50,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.6874, 0.0528, 0.1087],
        [0.6874, 0.0528, 0.1087],
        [0.6874, 0.0528, 0.1087],
        [0.6874, 0.0528, 0.1087],
        [0.6874, 0.0528, 0.1087],
        [0.6874, 0.0528, 0.1087],
        [0.6874, 0.0528, 0.1087],
        [0.6874, 0.0529, 0.1087]], device='cuda:0', grad_fn=<AddmmBackward0>)


27it [00:52,  1.87s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.7838, 0.0855, 0.1255],
        [0.7837, 0.0855, 0.1255],
        [0.7838, 0.0855, 0.1255],
        [0.7838, 0.0855, 0.1255],
        [0.7838, 0.0855, 0.1255],
        [0.7838, 0.0855, 0.1255],
        [0.7838, 0.0855, 0.1255],
        [0.7838, 0.0855, 0.1255]], device='cuda:0', grad_fn=<AddmmBackward0>)


28it [00:54,  1.94s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.8772, 0.1125, 0.1425],
        [0.8772, 0.1125, 0.1425],
        [0.8772, 0.1125, 0.1425],
        [0.8772, 0.1125, 0.1425],
        [0.8772, 0.1125, 0.1425],
        [0.8772, 0.1125, 0.1425],
        [0.8772, 0.1125, 0.1425],
        [0.8772, 0.1125, 0.1425]], device='cuda:0', grad_fn=<AddmmBackward0>)


29it [00:56,  2.04s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[1.0009, 0.1425, 0.1674],
        [1.0009, 0.1425, 0.1674],
        [1.0009, 0.1425, 0.1674],
        [1.0009, 0.1425, 0.1674],
        [1.0009, 0.1425, 0.1674],
        [1.0009, 0.1425, 0.1674],
        [1.0009, 0.1425, 0.1674],
        [1.0009, 0.1425, 0.1674]], device='cuda:0', grad_fn=<AddmmBackward0>)


30it [00:58,  1.98s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[1.0574, 0.1530, 0.1770],
        [1.0575, 0.1530, 0.1770],
        [1.0575, 0.1530, 0.1770],
        [1.0575, 0.1530, 0.1770],
        [1.0575, 0.1530, 0.1770],
        [1.0575, 0.1530, 0.1770],
        [1.0575, 0.1530, 0.1770],
        [1.0575, 0.1530, 0.1770]], device='cuda:0', grad_fn=<AddmmBackward0>)


31it [01:00,  1.93s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[1.0424, 0.1449, 0.1729],
        [1.0424, 0.1449, 0.1729],
        [1.0424, 0.1449, 0.1729],
        [1.0424, 0.1449, 0.1729],
        [1.0424, 0.1449, 0.1729],
        [1.0424, 0.1449, 0.1729],
        [1.0424, 0.1449, 0.1729],
        [1.0424, 0.1449, 0.1729]], device='cuda:0', grad_fn=<AddmmBackward0>)


32it [01:02,  1.90s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[1.0584, 0.1323, 0.1850],
        [1.0584, 0.1323, 0.1850],
        [1.0584, 0.1323, 0.1850],
        [1.0584, 0.1323, 0.1850],
        [1.0584, 0.1323, 0.1850],
        [1.0584, 0.1323, 0.1850],
        [1.0584, 0.1323, 0.1850],
        [1.0584, 0.1323, 0.1850]], device='cuda:0', grad_fn=<AddmmBackward0>)


33it [01:03,  1.89s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[1.0508, 0.1246, 0.1840],
        [1.0508, 0.1246, 0.1840],
        [1.0508, 0.1246, 0.1840],
        [1.0508, 0.1246, 0.1840],
        [1.0508, 0.1246, 0.1840],
        [1.0508, 0.1246, 0.1840],
        [1.0508, 0.1246, 0.1840],
        [1.0508, 0.1246, 0.1840]], device='cuda:0', grad_fn=<AddmmBackward0>)


34it [01:05,  1.87s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[1.1153, 0.1384, 0.1935],
        [1.1153, 0.1384, 0.1935],
        [1.1153, 0.1384, 0.1935],
        [1.1153, 0.1384, 0.1935],
        [1.1153, 0.1384, 0.1935],
        [1.1153, 0.1384, 0.1935],
        [1.1153, 0.1384, 0.1935],
        [1.1153, 0.1384, 0.1935]], device='cuda:0', grad_fn=<AddmmBackward0>)


35it [01:07,  1.92s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[1.4437, 0.2048, 0.2763],
        [1.4437, 0.2048, 0.2763],
        [1.4437, 0.2048, 0.2763],
        [1.4437, 0.2048, 0.2763],
        [1.4437, 0.2048, 0.2763],
        [1.4437, 0.2048, 0.2763],
        [1.4437, 0.2048, 0.2763],
        [1.4437, 0.2048, 0.2763]], device='cuda:0', grad_fn=<AddmmBackward0>)


36it [01:10,  2.01s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[1.6193, 0.2183, 0.3263],
        [1.6193, 0.2183, 0.3263],
        [1.6193, 0.2183, 0.3263],
        [1.6193, 0.2183, 0.3263],
        [1.6193, 0.2183, 0.3263],
        [1.6193, 0.2183, 0.3263],
        [1.6193, 0.2183, 0.3263],
        [1.6193, 0.2183, 0.3263]], device='cuda:0', grad_fn=<AddmmBackward0>)


37it [01:12,  2.06s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[1.8813, 0.2609, 0.3764],
        [1.8813, 0.2609, 0.3764],
        [1.8813, 0.2609, 0.3764],
        [1.8813, 0.2609, 0.3764],
        [1.8813, 0.2609, 0.3764],
        [1.8813, 0.2609, 0.3764],
        [1.8813, 0.2609, 0.3764],
        [1.8813, 0.2609, 0.3764]], device='cuda:0', grad_fn=<AddmmBackward0>)


38it [01:14,  1.99s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[1.6300, 0.2525, 0.3041],
        [1.6300, 0.2525, 0.3041],
        [1.6300, 0.2525, 0.3041],
        [1.6300, 0.2525, 0.3041],
        [1.6300, 0.2525, 0.3041],
        [1.6300, 0.2525, 0.3041],
        [1.6299, 0.2525, 0.3041],
        [1.6300, 0.2525, 0.3041]], device='cuda:0', grad_fn=<AddmmBackward0>)


39it [01:15,  1.95s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[1.4081, 0.2429, 0.2353],
        [1.4082, 0.2429, 0.2353],
        [1.4081, 0.2429, 0.2353],
        [1.4082, 0.2429, 0.2353],
        [1.4082, 0.2429, 0.2353],
        [1.4082, 0.2429, 0.2353],
        [1.4082, 0.2429, 0.2353],
        [1.4082, 0.2429, 0.2353]], device='cuda:0', grad_fn=<AddmmBackward0>)


40it [01:17,  1.92s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[1.2377, 0.2126, 0.1960],
        [1.2377, 0.2126, 0.1960],
        [1.2377, 0.2126, 0.1960],
        [1.2377, 0.2126, 0.1960],
        [1.2377, 0.2126, 0.1960],
        [1.2377, 0.2126, 0.1960],
        [1.2377, 0.2126, 0.1960],
        [1.2377, 0.2126, 0.1960]], device='cuda:0', grad_fn=<AddmmBackward0>)


41it [01:19,  1.90s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[1.0242, 0.1555, 0.1571],
        [1.0242, 0.1555, 0.1571],
        [1.0242, 0.1555, 0.1571],
        [1.0242, 0.1555, 0.1571],
        [1.0242, 0.1555, 0.1571],
        [1.0242, 0.1555, 0.1571],
        [1.0242, 0.1555, 0.1571],
        [1.0242, 0.1555, 0.1571]], device='cuda:0', grad_fn=<AddmmBackward0>)


42it [01:21,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.8124, 0.0959, 0.1223],
        [0.8124, 0.0959, 0.1223],
        [0.8124, 0.0959, 0.1223],
        [0.8124, 0.0959, 0.1223],
        [0.8124, 0.0959, 0.1223],
        [0.8124, 0.0959, 0.1223],
        [0.8124, 0.0959, 0.1223],
        [0.8124, 0.0959, 0.1223]], device='cuda:0', grad_fn=<AddmmBackward0>)


43it [01:23,  1.95s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[0.6219, 0.0324, 0.0828],
        [0.6219, 0.0324, 0.0828],
        [0.6219, 0.0324, 0.0828],
        [0.6219, 0.0324, 0.0828],
        [0.6219, 0.0324, 0.0828],
        [0.6219, 0.0324, 0.0828],
        [0.6219, 0.0324, 0.0828],
        [0.6219, 0.0324, 0.0828]], device='cuda:0', grad_fn=<AddmmBackward0>)


44it [01:25,  2.05s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4623, -0.0217,  0.0328],
        [ 0.4623, -0.0217,  0.0327],
        [ 0.4623, -0.0217,  0.0327],
        [ 0.4623, -0.0217,  0.0328],
        [ 0.4623, -0.0217,  0.0328],
        [ 0.4623, -0.0217,  0.0327],
        [ 0.4623, -0.0217,  0.0328],
        [ 0.4623, -0.0217,  0.0327]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


45it [01:27,  1.99s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3658, -0.0593, -0.0050],
        [ 0.3658, -0.0593, -0.0050],
        [ 0.3658, -0.0593, -0.0050],
        [ 0.3658, -0.0593, -0.0050],
        [ 0.3658, -0.0593, -0.0050],
        [ 0.3658, -0.0593, -0.0050],
        [ 0.3658, -0.0593, -0.0050],
        [ 0.3658, -0.0593, -0.0050]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


46it [01:29,  1.94s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.2889, -0.0913, -0.0416],
        [ 0.2889, -0.0913, -0.0416],
        [ 0.2889, -0.0913, -0.0416],
        [ 0.2889, -0.0913, -0.0416],
        [ 0.2889, -0.0913, -0.0416],
        [ 0.2889, -0.0913, -0.0416],
        [ 0.2889, -0.0913, -0.0416],
        [ 0.2889, -0.0913, -0.0416]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


47it [01:31,  1.91s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.2306, -0.1173, -0.0751],
        [ 0.2306, -0.1173, -0.0751],
        [ 0.2306, -0.1173, -0.0751],
        [ 0.2306, -0.1173, -0.0751],
        [ 0.2306, -0.1173, -0.0751],
        [ 0.2306, -0.1173, -0.0751],
        [ 0.2306, -0.1173, -0.0751],
        [ 0.2306, -0.1173, -0.0751]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


48it [01:33,  1.89s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.2040, -0.1264, -0.0910],
        [ 0.2040, -0.1264, -0.0910],
        [ 0.2040, -0.1264, -0.0910],
        [ 0.2040, -0.1264, -0.0910],
        [ 0.2040, -0.1264, -0.0910],
        [ 0.2040, -0.1264, -0.0910],
        [ 0.2040, -0.1264, -0.0910],
        [ 0.2040, -0.1264, -0.0910]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


49it [01:34,  1.86s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.2019, -0.1248, -0.0900],
        [ 0.2019, -0.1248, -0.0900],
        [ 0.2019, -0.1248, -0.0900],
        [ 0.2019, -0.1248, -0.0900],
        [ 0.2019, -0.1248, -0.0900],
        [ 0.2019, -0.1248, -0.0900],
        [ 0.2019, -0.1248, -0.0900],
        [ 0.2019, -0.1248, -0.0900]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


50it [01:36,  1.84s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.2016, -0.1244, -0.0900],
        [ 0.2016, -0.1244, -0.0900],
        [ 0.2016, -0.1244, -0.0900],
        [ 0.2016, -0.1244, -0.0900],
        [ 0.2016, -0.1244, -0.0900],
        [ 0.2016, -0.1244, -0.0900],
        [ 0.2016, -0.1244, -0.0900],
        [ 0.2016, -0.1244, -0.0900]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


51it [01:38,  1.90s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.2019, -0.1253, -0.0897],
        [ 0.2019, -0.1253, -0.0897],
        [ 0.2019, -0.1253, -0.0897],
        [ 0.2019, -0.1253, -0.0897],
        [ 0.2019, -0.1253, -0.0897],
        [ 0.2019, -0.1253, -0.0897],
        [ 0.2019, -0.1253, -0.0897],
        [ 0.2019, -0.1253, -0.0897]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


52it [01:40,  1.99s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.2026, -0.1279, -0.0883],
        [ 0.2026, -0.1279, -0.0883],
        [ 0.2026, -0.1279, -0.0883],
        [ 0.2026, -0.1279, -0.0883],
        [ 0.2026, -0.1279, -0.0883],
        [ 0.2026, -0.1279, -0.0883],
        [ 0.2026, -0.1279, -0.0883],
        [ 0.2026, -0.1279, -0.0883]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


53it [01:42,  1.93s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.2031, -0.1306, -0.0866],
        [ 0.2031, -0.1306, -0.0866],
        [ 0.2031, -0.1306, -0.0866],
        [ 0.2031, -0.1306, -0.0866],
        [ 0.2031, -0.1306, -0.0866],
        [ 0.2031, -0.1306, -0.0866],
        [ 0.2031, -0.1306, -0.0866],
        [ 0.2031, -0.1306, -0.0866]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


54it [01:44,  1.89s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.2034, -0.1325, -0.0854],
        [ 0.2034, -0.1325, -0.0854],
        [ 0.2034, -0.1325, -0.0854],
        [ 0.2034, -0.1325, -0.0854],
        [ 0.2034, -0.1325, -0.0854],
        [ 0.2034, -0.1325, -0.0854],
        [ 0.2034, -0.1325, -0.0854],
        [ 0.2034, -0.1325, -0.0854]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


55it [01:46,  1.85s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.2035, -0.1336, -0.0846],
        [ 0.2035, -0.1336, -0.0846],
        [ 0.2035, -0.1336, -0.0846],
        [ 0.2035, -0.1336, -0.0846],
        [ 0.2035, -0.1336, -0.0846],
        [ 0.2035, -0.1336, -0.0846],
        [ 0.2035, -0.1336, -0.0846],
        [ 0.2035, -0.1336, -0.0846]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


56it [01:48,  1.83s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.2033, -0.1350, -0.0844],
        [ 0.2033, -0.1350, -0.0844],
        [ 0.2033, -0.1350, -0.0844],
        [ 0.2033, -0.1350, -0.0844],
        [ 0.2033, -0.1350, -0.0844],
        [ 0.2033, -0.1350, -0.0844],
        [ 0.2033, -0.1350, -0.0844],
        [ 0.2033, -0.1350, -0.0844]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


57it [01:49,  1.83s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.0845, -0.1616, -0.1235],
        [ 0.0845, -0.1616, -0.1235],
        [ 0.0845, -0.1616, -0.1235],
        [ 0.0845, -0.1616, -0.1235],
        [ 0.0845, -0.1616, -0.1235],
        [ 0.0845, -0.1616, -0.1235],
        [ 0.0845, -0.1616, -0.1235],
        [ 0.0845, -0.1616, -0.1235]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


58it [01:51,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.1221, -0.1574, -0.1130],
        [ 0.1221, -0.1574, -0.1130],
        [ 0.1221, -0.1574, -0.1130],
        [ 0.1221, -0.1574, -0.1130],
        [ 0.1221, -0.1574, -0.1130],
        [ 0.1221, -0.1574, -0.1130],
        [ 0.1221, -0.1574, -0.1130],
        [ 0.1221, -0.1574, -0.1130]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


59it [01:54,  1.98s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.2062, -0.1398, -0.0825],
        [ 0.2062, -0.1398, -0.0825],
        [ 0.2062, -0.1398, -0.0825],
        [ 0.2062, -0.1398, -0.0825],
        [ 0.2062, -0.1398, -0.0825],
        [ 0.2062, -0.1398, -0.0825],
        [ 0.2062, -0.1398, -0.0825],
        [ 0.2062, -0.1398, -0.0825]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


60it [01:56,  2.02s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.2091, -0.1434, -0.0829],
        [ 0.2091, -0.1434, -0.0829],
        [ 0.2091, -0.1434, -0.0829],
        [ 0.2091, -0.1434, -0.0829],
        [ 0.2091, -0.1434, -0.0829],
        [ 0.2091, -0.1434, -0.0829],
        [ 0.2091, -0.1434, -0.0829],
        [ 0.2091, -0.1434, -0.0829]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


61it [01:58,  1.95s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.2133, -0.1476, -0.0843],
        [ 0.2133, -0.1476, -0.0843],
        [ 0.2133, -0.1476, -0.0843],
        [ 0.2133, -0.1476, -0.0843],
        [ 0.2133, -0.1476, -0.0843],
        [ 0.2133, -0.1476, -0.0843],
        [ 0.2133, -0.1476, -0.0843],
        [ 0.2133, -0.1476, -0.0843]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


62it [01:59,  1.90s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.2182, -0.1517, -0.0865],
        [ 0.2182, -0.1517, -0.0865],
        [ 0.2182, -0.1517, -0.0865],
        [ 0.2182, -0.1517, -0.0865],
        [ 0.2182, -0.1517, -0.0865],
        [ 0.2182, -0.1517, -0.0865],
        [ 0.2182, -0.1517, -0.0865],
        [ 0.2182, -0.1517, -0.0865]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


63it [02:01,  1.87s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.2218, -0.1539, -0.0888],
        [ 0.2218, -0.1539, -0.0888],
        [ 0.2218, -0.1539, -0.0888],
        [ 0.2218, -0.1539, -0.0888],
        [ 0.2218, -0.1539, -0.0888],
        [ 0.2218, -0.1539, -0.0888],
        [ 0.2218, -0.1539, -0.0888],
        [ 0.2218, -0.1539, -0.0888]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


64it [02:03,  1.84s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.2258, -0.1568, -0.0922],
        [ 0.2258, -0.1568, -0.0922],
        [ 0.2258, -0.1568, -0.0922],
        [ 0.2258, -0.1568, -0.0922],
        [ 0.2258, -0.1568, -0.0922],
        [ 0.2258, -0.1568, -0.0922],
        [ 0.2258, -0.1568, -0.0922],
        [ 0.2258, -0.1568, -0.0922]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


65it [02:05,  1.84s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.2315, -0.1529, -0.0822],
        [ 0.2315, -0.1529, -0.0822],
        [ 0.2315, -0.1529, -0.0822],
        [ 0.2315, -0.1529, -0.0822],
        [ 0.2315, -0.1529, -0.0822],
        [ 0.2315, -0.1529, -0.0822],
        [ 0.2315, -0.1529, -0.0822],
        [ 0.2315, -0.1529, -0.0822]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


66it [02:07,  1.91s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.2527, -0.1420, -0.0453],
        [ 0.2527, -0.1420, -0.0453],
        [ 0.2527, -0.1420, -0.0453],
        [ 0.2527, -0.1420, -0.0453],
        [ 0.2527, -0.1420, -0.0453],
        [ 0.2527, -0.1420, -0.0453],
        [ 0.2527, -0.1420, -0.0453],
        [ 0.2527, -0.1420, -0.0453]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


67it [02:09,  2.01s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.2784, -0.1434, -0.0067],
        [ 0.2784, -0.1434, -0.0068],
        [ 0.2784, -0.1434, -0.0068],
        [ 0.2785, -0.1434, -0.0067],
        [ 0.2784, -0.1434, -0.0067],
        [ 0.2785, -0.1434, -0.0067],
        [ 0.2784, -0.1434, -0.0067],
        [ 0.2784, -0.1434, -0.0067]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


68it [02:11,  1.98s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.2957, -0.0628,  0.0131],
        [ 0.2957, -0.0628,  0.0131],
        [ 0.2957, -0.0628,  0.0131],
        [ 0.2957, -0.0627,  0.0131],
        [ 0.2957, -0.0628,  0.0131],
        [ 0.2957, -0.0627,  0.0131],
        [ 0.2957, -0.0628,  0.0131],
        [ 0.2957, -0.0628,  0.0131]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


69it [02:13,  1.94s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3352, -0.1146,  0.0154],
        [ 0.3352, -0.1146,  0.0154],
        [ 0.3352, -0.1146,  0.0154],
        [ 0.3352, -0.1146,  0.0154],
        [ 0.3352, -0.1146,  0.0154],
        [ 0.3352, -0.1146,  0.0154],
        [ 0.3352, -0.1146,  0.0154],
        [ 0.3352, -0.1146,  0.0154]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


70it [02:15,  1.91s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3941, -0.2318,  0.1075],
        [ 0.3941, -0.2318,  0.1075],
        [ 0.3941, -0.2318,  0.1075],
        [ 0.3941, -0.2318,  0.1075],
        [ 0.3941, -0.2318,  0.1075],
        [ 0.3941, -0.2318,  0.1075],
        [ 0.3941, -0.2318,  0.1075],
        [ 0.3941, -0.2318,  0.1075]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


71it [02:16,  1.89s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4752, -0.2585,  0.1105],
        [ 0.4752, -0.2585,  0.1105],
        [ 0.4752, -0.2585,  0.1105],
        [ 0.4752, -0.2585,  0.1105],
        [ 0.4752, -0.2585,  0.1106],
        [ 0.4752, -0.2585,  0.1105],
        [ 0.4752, -0.2585,  0.1105],
        [ 0.4752, -0.2585,  0.1105]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


72it [02:18,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4551, -0.1916,  0.0124],
        [ 0.4551, -0.1916,  0.0125],
        [ 0.4551, -0.1916,  0.0125],
        [ 0.4551, -0.1916,  0.0125],
        [ 0.4551, -0.1916,  0.0124],
        [ 0.4551, -0.1916,  0.0124],
        [ 0.4551, -0.1916,  0.0125],
        [ 0.4551, -0.1916,  0.0125]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


73it [02:20,  1.87s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4851, -0.1512, -0.0202],
        [ 0.4851, -0.1512, -0.0202],
        [ 0.4851, -0.1513, -0.0202],
        [ 0.4850, -0.1512, -0.0202],
        [ 0.4851, -0.1512, -0.0202],
        [ 0.4850, -0.1512, -0.0202],
        [ 0.4851, -0.1512, -0.0202],
        [ 0.4850, -0.1512, -0.0202]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


74it [02:22,  1.95s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.6352, -0.1454,  0.0079],
        [ 0.6352, -0.1454,  0.0079],
        [ 0.6352, -0.1454,  0.0079],
        [ 0.6352, -0.1454,  0.0079],
        [ 0.6352, -0.1454,  0.0079],
        [ 0.6352, -0.1454,  0.0079],
        [ 0.6352, -0.1454,  0.0079],
        [ 0.6352, -0.1454,  0.0079]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


75it [02:25,  2.04s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.8584, -0.1356,  0.0311],
        [ 0.8584, -0.1356,  0.0311],
        [ 0.8584, -0.1356,  0.0311],
        [ 0.8584, -0.1356,  0.0311],
        [ 0.8584, -0.1356,  0.0311],
        [ 0.8584, -0.1356,  0.0311],
        [ 0.8584, -0.1356,  0.0311],
        [ 0.8584, -0.1356,  0.0311]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


76it [02:26,  1.98s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 1.1482, -0.1136,  0.0309],
        [ 1.1482, -0.1136,  0.0309],
        [ 1.1482, -0.1136,  0.0309],
        [ 1.1482, -0.1136,  0.0309],
        [ 1.1482, -0.1136,  0.0309],
        [ 1.1482, -0.1136,  0.0309],
        [ 1.1482, -0.1136,  0.0309],
        [ 1.1482, -0.1136,  0.0309]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


77it [02:28,  1.94s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 1.6432, -0.1433,  0.0372],
        [ 1.6433, -0.1433,  0.0372],
        [ 1.6433, -0.1433,  0.0372],
        [ 1.6432, -0.1433,  0.0372],
        [ 1.6433, -0.1433,  0.0372],
        [ 1.6433, -0.1433,  0.0372],
        [ 1.6433, -0.1433,  0.0372],
        [ 1.6432, -0.1433,  0.0372]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


78it [02:30,  1.93s/it]


Train Loss at epoch 1: 1.045013427734375



1it [00:00,  1.65it/s]

torch.Size([8, 20, 1024])


2it [00:01,  1.57it/s]

torch.Size([8, 20, 1024])


3it [00:01,  1.50it/s]

torch.Size([8, 20, 1024])


4it [00:02,  1.52it/s]

torch.Size([8, 20, 1024])


5it [00:03,  1.35it/s]

torch.Size([8, 20, 1024])


6it [00:04,  1.31it/s]

torch.Size([8, 20, 1024])


7it [00:05,  1.21it/s]

torch.Size([8, 20, 1024])


8it [00:06,  1.32it/s]


torch.Size([1, 20, 1024])
test Loss at epoch 1: 1.495032548904419



0it [00:00, ?it/s]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 1.6404, -0.1203,  0.0222],
        [ 1.6403, -0.1203,  0.0222],
        [ 1.6404, -0.1203,  0.0222],
        [ 1.6404, -0.1203,  0.0222],
        [ 1.6404, -0.1203,  0.0222],
        [ 1.6404, -0.1203,  0.0222],
        [ 1.6403, -0.1203,  0.0222],
        [ 1.6404, -0.1203,  0.0222]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


1it [00:02,  2.33s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 1.4817, -0.1507,  0.0543],
        [ 1.4817, -0.1507,  0.0543],
        [ 1.4817, -0.1507,  0.0543],
        [ 1.4817, -0.1507,  0.0543],
        [ 1.4817, -0.1507,  0.0543],
        [ 1.4817, -0.1507,  0.0543],
        [ 1.4817, -0.1507,  0.0543],
        [ 1.4817, -0.1507,  0.0543]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


2it [00:04,  2.10s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.9874, -0.1073,  0.0095],
        [ 0.9874, -0.1073,  0.0095],
        [ 0.9874, -0.1073,  0.0095],
        [ 0.9874, -0.1073,  0.0095],
        [ 0.9874, -0.1073,  0.0095],
        [ 0.9874, -0.1073,  0.0095],
        [ 0.9874, -0.1073,  0.0095],
        [ 0.9874, -0.1073,  0.0095]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


3it [00:06,  1.98s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5444, -0.1032, -0.0539],
        [ 0.5444, -0.1032, -0.0539],
        [ 0.5444, -0.1032, -0.0539],
        [ 0.5444, -0.1032, -0.0539],
        [ 0.5444, -0.1032, -0.0539],
        [ 0.5444, -0.1032, -0.0539],
        [ 0.5444, -0.1032, -0.0539],
        [ 0.5444, -0.1032, -0.0539]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


4it [00:07,  1.92s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.2963, -0.1550, -0.1088],
        [ 0.2963, -0.1550, -0.1088],
        [ 0.2963, -0.1550, -0.1088],
        [ 0.2963, -0.1550, -0.1088],
        [ 0.2963, -0.1550, -0.1088],
        [ 0.2963, -0.1550, -0.1088],
        [ 0.2963, -0.1550, -0.1088],
        [ 0.2963, -0.1550, -0.1088]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


5it [00:09,  1.89s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.2894, -0.1676, -0.1208],
        [ 0.2894, -0.1676, -0.1208],
        [ 0.2894, -0.1676, -0.1208],
        [ 0.2894, -0.1676, -0.1208],
        [ 0.2894, -0.1676, -0.1208],
        [ 0.2894, -0.1676, -0.1208],
        [ 0.2894, -0.1676, -0.1208],
        [ 0.2894, -0.1676, -0.1208]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


6it [00:11,  1.87s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.2896, -0.1801, -0.1301],
        [ 0.2896, -0.1801, -0.1301],
        [ 0.2896, -0.1801, -0.1301],
        [ 0.2896, -0.1801, -0.1301],
        [ 0.2896, -0.1801, -0.1301],
        [ 0.2896, -0.1801, -0.1301],
        [ 0.2896, -0.1801, -0.1301],
        [ 0.2896, -0.1801, -0.1301]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


7it [00:13,  1.87s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.2942, -0.1878, -0.1381],
        [ 0.2942, -0.1878, -0.1381],
        [ 0.2942, -0.1878, -0.1381],
        [ 0.2942, -0.1878, -0.1381],
        [ 0.2942, -0.1878, -0.1381],
        [ 0.2942, -0.1878, -0.1381],
        [ 0.2942, -0.1878, -0.1381],
        [ 0.2942, -0.1878, -0.1381]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


8it [00:15,  1.94s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3046, -0.1607, -0.1508],
        [ 0.3046, -0.1607, -0.1508],
        [ 0.3046, -0.1607, -0.1508],
        [ 0.3045, -0.1607, -0.1507],
        [ 0.3046, -0.1608, -0.1508],
        [ 0.3046, -0.1608, -0.1508],
        [ 0.3046, -0.1608, -0.1508],
        [ 0.3046, -0.1607, -0.1508]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


9it [00:17,  2.04s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[-0.1047,  0.8162,  0.7863],
        [-0.1047,  0.8162,  0.7863],
        [-0.1047,  0.8161,  0.7862],
        [-0.1047,  0.8162,  0.7862],
        [-0.1048,  0.8163,  0.7864],
        [-0.1048,  0.8163,  0.7864],
        [-0.1047,  0.8162,  0.7863],
        [-0.1046,  0.8162,  0.7862]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


10it [00:19,  1.98s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3065, -0.1881, -0.1918],
        [ 0.3065, -0.1881, -0.1918],
        [ 0.3065, -0.1881, -0.1918],
        [ 0.3065, -0.1881, -0.1918],
        [ 0.3065, -0.1881, -0.1918],
        [ 0.3065, -0.1881, -0.1918],
        [ 0.3065, -0.1881, -0.1918],
        [ 0.3065, -0.1881, -0.1918]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


11it [00:21,  1.94s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3533, -0.0328, -0.6726],
        [ 0.3533, -0.0328, -0.6726],
        [ 0.3532, -0.0329, -0.6725],
        [ 0.3533, -0.0328, -0.6726],
        [ 0.3532, -0.0328, -0.6725],
        [ 0.3533, -0.0328, -0.6726],
        [ 0.3533, -0.0328, -0.6725],
        [ 0.3533, -0.0328, -0.6726]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


12it [00:23,  1.91s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.2944, -0.2040, -0.2046],
        [ 0.2944, -0.2040, -0.2046],
        [ 0.2944, -0.2040, -0.2046],
        [ 0.2944, -0.2040, -0.2046],
        [ 0.2944, -0.2040, -0.2046],
        [ 0.2944, -0.2040, -0.2046],
        [ 0.2944, -0.2040, -0.2046],
        [ 0.2944, -0.2040, -0.2046]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


13it [00:25,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.2800, -0.2291, -0.2973],
        [ 0.2800, -0.2291, -0.2973],
        [ 0.2800, -0.2291, -0.2973],
        [ 0.2800, -0.2291, -0.2973],
        [ 0.2800, -0.2291, -0.2973],
        [ 0.2800, -0.2291, -0.2973],
        [ 0.2800, -0.2291, -0.2973],
        [ 0.2800, -0.2291, -0.2973]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


14it [00:27,  1.87s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3322, -0.2128, -0.1667],
        [ 0.3322, -0.2128, -0.1667],
        [ 0.3322, -0.2128, -0.1667],
        [ 0.3322, -0.2128, -0.1667],
        [ 0.3322, -0.2128, -0.1667],
        [ 0.3322, -0.2128, -0.1667],
        [ 0.3322, -0.2128, -0.1667],
        [ 0.3322, -0.2128, -0.1667]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


15it [00:28,  1.90s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3375, -0.2189, -0.1674],
        [ 0.3375, -0.2189, -0.1674],
        [ 0.3375, -0.2189, -0.1674],
        [ 0.3375, -0.2189, -0.1674],
        [ 0.3375, -0.2189, -0.1674],
        [ 0.3375, -0.2189, -0.1674],
        [ 0.3375, -0.2189, -0.1674],
        [ 0.3375, -0.2189, -0.1674]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


16it [00:31,  1.97s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3437, -0.2261, -0.1682],
        [ 0.3437, -0.2261, -0.1682],
        [ 0.3437, -0.2261, -0.1682],
        [ 0.3437, -0.2261, -0.1682],
        [ 0.3437, -0.2261, -0.1682],
        [ 0.3437, -0.2261, -0.1682],
        [ 0.3437, -0.2261, -0.1682],
        [ 0.3437, -0.2261, -0.1682]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


17it [00:33,  2.02s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3500, -0.2326, -0.1699],
        [ 0.3500, -0.2326, -0.1699],
        [ 0.3500, -0.2326, -0.1699],
        [ 0.3500, -0.2326, -0.1699],
        [ 0.3500, -0.2326, -0.1699],
        [ 0.3500, -0.2326, -0.1699],
        [ 0.3500, -0.2326, -0.1699],
        [ 0.3500, -0.2326, -0.1699]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


18it [00:35,  1.95s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3553, -0.2361, -0.1729],
        [ 0.3553, -0.2361, -0.1729],
        [ 0.3553, -0.2361, -0.1729],
        [ 0.3553, -0.2361, -0.1729],
        [ 0.3553, -0.2361, -0.1729],
        [ 0.3553, -0.2361, -0.1729],
        [ 0.3553, -0.2361, -0.1729],
        [ 0.3553, -0.2361, -0.1729]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


19it [00:36,  1.90s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3590, -0.2393, -0.1743],
        [ 0.3590, -0.2393, -0.1743],
        [ 0.3590, -0.2393, -0.1743],
        [ 0.3590, -0.2393, -0.1743],
        [ 0.3590, -0.2393, -0.1743],
        [ 0.3590, -0.2393, -0.1743],
        [ 0.3590, -0.2393, -0.1743],
        [ 0.3590, -0.2393, -0.1743]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


20it [00:38,  1.87s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3630, -0.2431, -0.1758],
        [ 0.3630, -0.2431, -0.1758],
        [ 0.3630, -0.2431, -0.1758],
        [ 0.3630, -0.2431, -0.1758],
        [ 0.3630, -0.2431, -0.1758],
        [ 0.3630, -0.2431, -0.1758],
        [ 0.3630, -0.2431, -0.1758],
        [ 0.3630, -0.2431, -0.1758]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


21it [00:40,  1.84s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3663, -0.2465, -0.1765],
        [ 0.3663, -0.2465, -0.1765],
        [ 0.3663, -0.2465, -0.1765],
        [ 0.3663, -0.2465, -0.1765],
        [ 0.3663, -0.2465, -0.1765],
        [ 0.3663, -0.2465, -0.1765],
        [ 0.3663, -0.2465, -0.1765],
        [ 0.3663, -0.2465, -0.1765]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


22it [00:42,  1.83s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3687, -0.2488, -0.1773],
        [ 0.3687, -0.2488, -0.1773],
        [ 0.3687, -0.2488, -0.1773],
        [ 0.3687, -0.2488, -0.1773],
        [ 0.3687, -0.2488, -0.1773],
        [ 0.3687, -0.2488, -0.1773],
        [ 0.3687, -0.2488, -0.1773],
        [ 0.3687, -0.2488, -0.1773]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


23it [00:44,  1.90s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3711, -0.2517, -0.1774],
        [ 0.3711, -0.2517, -0.1774],
        [ 0.3711, -0.2517, -0.1774],
        [ 0.3711, -0.2517, -0.1774],
        [ 0.3711, -0.2517, -0.1774],
        [ 0.3711, -0.2517, -0.1774],
        [ 0.3711, -0.2517, -0.1774],
        [ 0.3711, -0.2517, -0.1774]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


24it [00:46,  2.00s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3745, -0.2551, -0.1784],
        [ 0.3745, -0.2551, -0.1784],
        [ 0.3745, -0.2551, -0.1784],
        [ 0.3745, -0.2551, -0.1784],
        [ 0.3745, -0.2551, -0.1784],
        [ 0.3745, -0.2551, -0.1784],
        [ 0.3745, -0.2551, -0.1784],
        [ 0.3745, -0.2551, -0.1784]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


25it [00:48,  1.97s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3778, -0.2574, -0.1801],
        [ 0.3778, -0.2574, -0.1801],
        [ 0.3778, -0.2574, -0.1801],
        [ 0.3778, -0.2574, -0.1801],
        [ 0.3778, -0.2574, -0.1801],
        [ 0.3778, -0.2574, -0.1801],
        [ 0.3778, -0.2574, -0.1801],
        [ 0.3778, -0.2574, -0.1801]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


26it [00:50,  1.92s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3808, -0.2595, -0.1817],
        [ 0.3808, -0.2595, -0.1817],
        [ 0.3808, -0.2595, -0.1817],
        [ 0.3808, -0.2595, -0.1817],
        [ 0.3808, -0.2595, -0.1817],
        [ 0.3808, -0.2595, -0.1817],
        [ 0.3808, -0.2595, -0.1817],
        [ 0.3808, -0.2595, -0.1817]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


27it [00:51,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3825, -0.2598, -0.1834],
        [ 0.3825, -0.2598, -0.1834],
        [ 0.3825, -0.2598, -0.1834],
        [ 0.3825, -0.2598, -0.1834],
        [ 0.3825, -0.2598, -0.1834],
        [ 0.3825, -0.2598, -0.1834],
        [ 0.3825, -0.2598, -0.1834],
        [ 0.3825, -0.2598, -0.1834]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


28it [00:53,  1.86s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3840, -0.2593, -0.1857],
        [ 0.3840, -0.2593, -0.1857],
        [ 0.3840, -0.2593, -0.1857],
        [ 0.3840, -0.2593, -0.1857],
        [ 0.3840, -0.2593, -0.1857],
        [ 0.3840, -0.2593, -0.1857],
        [ 0.3840, -0.2593, -0.1857],
        [ 0.3840, -0.2593, -0.1857]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


29it [00:55,  1.87s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3850, -0.2581, -0.1879],
        [ 0.3850, -0.2581, -0.1879],
        [ 0.3850, -0.2581, -0.1879],
        [ 0.3850, -0.2581, -0.1879],
        [ 0.3850, -0.2581, -0.1879],
        [ 0.3850, -0.2581, -0.1879],
        [ 0.3850, -0.2581, -0.1879],
        [ 0.3850, -0.2581, -0.1879]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


30it [00:59,  2.31s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3847, -0.2570, -0.1886],
        [ 0.3847, -0.2570, -0.1886],
        [ 0.3847, -0.2570, -0.1886],
        [ 0.3847, -0.2570, -0.1886],
        [ 0.3847, -0.2570, -0.1886],
        [ 0.3847, -0.2570, -0.1886],
        [ 0.3847, -0.2570, -0.1886],
        [ 0.3847, -0.2570, -0.1886]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


31it [01:02,  2.56s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3851, -0.2552, -0.1907],
        [ 0.3851, -0.2552, -0.1907],
        [ 0.3851, -0.2552, -0.1907],
        [ 0.3851, -0.2552, -0.1907],
        [ 0.3851, -0.2552, -0.1907],
        [ 0.3851, -0.2552, -0.1907],
        [ 0.3851, -0.2552, -0.1907],
        [ 0.3851, -0.2552, -0.1907]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


32it [01:03,  2.33s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3856, -0.2544, -0.1920],
        [ 0.3856, -0.2544, -0.1920],
        [ 0.3856, -0.2544, -0.1920],
        [ 0.3856, -0.2544, -0.1920],
        [ 0.3856, -0.2544, -0.1920],
        [ 0.3856, -0.2544, -0.1920],
        [ 0.3856, -0.2544, -0.1920],
        [ 0.3856, -0.2544, -0.1920]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


33it [01:05,  2.16s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3868, -0.2545, -0.1933],
        [ 0.3868, -0.2545, -0.1933],
        [ 0.3868, -0.2545, -0.1933],
        [ 0.3868, -0.2545, -0.1933],
        [ 0.3868, -0.2545, -0.1933],
        [ 0.3868, -0.2545, -0.1933],
        [ 0.3868, -0.2545, -0.1933],
        [ 0.3868, -0.2545, -0.1933]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


34it [01:07,  2.04s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3886, -0.2554, -0.1946],
        [ 0.3886, -0.2554, -0.1946],
        [ 0.3886, -0.2554, -0.1946],
        [ 0.3886, -0.2554, -0.1946],
        [ 0.3886, -0.2554, -0.1946],
        [ 0.3886, -0.2554, -0.1946],
        [ 0.3886, -0.2554, -0.1946],
        [ 0.3886, -0.2554, -0.1946]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


35it [01:09,  1.96s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3897, -0.2554, -0.1958],
        [ 0.3897, -0.2554, -0.1958],
        [ 0.3897, -0.2554, -0.1958],
        [ 0.3897, -0.2554, -0.1958],
        [ 0.3897, -0.2554, -0.1958],
        [ 0.3897, -0.2554, -0.1958],
        [ 0.3897, -0.2554, -0.1958],
        [ 0.3897, -0.2554, -0.1958]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


36it [01:11,  1.91s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3902, -0.2555, -0.1963],
        [ 0.3902, -0.2555, -0.1963],
        [ 0.3902, -0.2555, -0.1963],
        [ 0.3902, -0.2555, -0.1963],
        [ 0.3902, -0.2555, -0.1963],
        [ 0.3902, -0.2555, -0.1963],
        [ 0.3902, -0.2555, -0.1963],
        [ 0.3902, -0.2555, -0.1963]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


37it [01:12,  1.87s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3901, -0.2555, -0.1962],
        [ 0.3901, -0.2555, -0.1962],
        [ 0.3901, -0.2555, -0.1962],
        [ 0.3901, -0.2555, -0.1962],
        [ 0.3901, -0.2555, -0.1962],
        [ 0.3901, -0.2555, -0.1962],
        [ 0.3901, -0.2555, -0.1962],
        [ 0.3901, -0.2555, -0.1962]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


38it [01:14,  1.93s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3907, -0.2556, -0.1969],
        [ 0.3907, -0.2556, -0.1969],
        [ 0.3907, -0.2556, -0.1969],
        [ 0.3907, -0.2556, -0.1969],
        [ 0.3907, -0.2556, -0.1969],
        [ 0.3907, -0.2556, -0.1969],
        [ 0.3907, -0.2556, -0.1969],
        [ 0.3907, -0.2556, -0.1969]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


39it [01:17,  2.01s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3920, -0.2565, -0.1976],
        [ 0.3920, -0.2565, -0.1976],
        [ 0.3920, -0.2565, -0.1976],
        [ 0.3920, -0.2565, -0.1976],
        [ 0.3920, -0.2565, -0.1976],
        [ 0.3920, -0.2565, -0.1976],
        [ 0.3920, -0.2565, -0.1976],
        [ 0.3920, -0.2565, -0.1976]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


40it [01:18,  1.94s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3926, -0.2573, -0.1976],
        [ 0.3926, -0.2573, -0.1976],
        [ 0.3926, -0.2573, -0.1976],
        [ 0.3926, -0.2573, -0.1976],
        [ 0.3926, -0.2573, -0.1976],
        [ 0.3926, -0.2573, -0.1976],
        [ 0.3926, -0.2573, -0.1976],
        [ 0.3926, -0.2573, -0.1976]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


41it [01:20,  1.89s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3914, -0.2564, -0.1970],
        [ 0.3914, -0.2564, -0.1970],
        [ 0.3914, -0.2564, -0.1970],
        [ 0.3914, -0.2564, -0.1970],
        [ 0.3914, -0.2564, -0.1970],
        [ 0.3914, -0.2564, -0.1970],
        [ 0.3914, -0.2564, -0.1970],
        [ 0.3914, -0.2564, -0.1970]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


42it [01:22,  1.86s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3904, -0.2556, -0.1965],
        [ 0.3904, -0.2556, -0.1965],
        [ 0.3904, -0.2556, -0.1965],
        [ 0.3904, -0.2556, -0.1965],
        [ 0.3904, -0.2556, -0.1965],
        [ 0.3904, -0.2556, -0.1965],
        [ 0.3904, -0.2556, -0.1965],
        [ 0.3904, -0.2556, -0.1965]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


43it [01:24,  1.83s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3896, -0.2549, -0.1962],
        [ 0.3896, -0.2549, -0.1962],
        [ 0.3896, -0.2549, -0.1962],
        [ 0.3896, -0.2549, -0.1962],
        [ 0.3896, -0.2549, -0.1962],
        [ 0.3896, -0.2549, -0.1962],
        [ 0.3896, -0.2549, -0.1962],
        [ 0.3896, -0.2549, -0.1962]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


44it [01:25,  1.81s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3883, -0.2543, -0.1953],
        [ 0.3883, -0.2543, -0.1953],
        [ 0.3883, -0.2543, -0.1953],
        [ 0.3883, -0.2543, -0.1953],
        [ 0.3883, -0.2543, -0.1953],
        [ 0.3883, -0.2543, -0.1953],
        [ 0.3883, -0.2543, -0.1953],
        [ 0.3883, -0.2543, -0.1953]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


45it [01:27,  1.81s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3867, -0.2529, -0.1945],
        [ 0.3867, -0.2529, -0.1945],
        [ 0.3867, -0.2529, -0.1945],
        [ 0.3867, -0.2529, -0.1945],
        [ 0.3867, -0.2529, -0.1945],
        [ 0.3867, -0.2529, -0.1945],
        [ 0.3867, -0.2529, -0.1945],
        [ 0.3867, -0.2529, -0.1945]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


46it [01:29,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3859, -0.2533, -0.1933],
        [ 0.3859, -0.2533, -0.1933],
        [ 0.3859, -0.2533, -0.1933],
        [ 0.3859, -0.2533, -0.1933],
        [ 0.3859, -0.2533, -0.1933],
        [ 0.3859, -0.2533, -0.1933],
        [ 0.3859, -0.2533, -0.1933],
        [ 0.3859, -0.2533, -0.1933]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


47it [01:31,  1.97s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3866, -0.2553, -0.1922],
        [ 0.3866, -0.2553, -0.1922],
        [ 0.3866, -0.2553, -0.1922],
        [ 0.3866, -0.2553, -0.1922],
        [ 0.3866, -0.2553, -0.1922],
        [ 0.3866, -0.2553, -0.1922],
        [ 0.3866, -0.2553, -0.1922],
        [ 0.3866, -0.2553, -0.1922]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


48it [01:33,  1.91s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3879, -0.2563, -0.1928],
        [ 0.3879, -0.2563, -0.1928],
        [ 0.3879, -0.2563, -0.1928],
        [ 0.3879, -0.2563, -0.1928],
        [ 0.3879, -0.2563, -0.1928],
        [ 0.3879, -0.2563, -0.1928],
        [ 0.3879, -0.2563, -0.1928],
        [ 0.3879, -0.2563, -0.1928]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


49it [01:35,  1.87s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3898, -0.2589, -0.1928],
        [ 0.3898, -0.2589, -0.1928],
        [ 0.3898, -0.2589, -0.1928],
        [ 0.3898, -0.2589, -0.1928],
        [ 0.3898, -0.2589, -0.1928],
        [ 0.3898, -0.2589, -0.1928],
        [ 0.3898, -0.2589, -0.1928],
        [ 0.3898, -0.2589, -0.1928]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


50it [01:37,  1.84s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3917, -0.2620, -0.1921],
        [ 0.3917, -0.2620, -0.1921],
        [ 0.3917, -0.2620, -0.1921],
        [ 0.3917, -0.2620, -0.1921],
        [ 0.3917, -0.2620, -0.1921],
        [ 0.3917, -0.2620, -0.1921],
        [ 0.3917, -0.2620, -0.1921],
        [ 0.3917, -0.2620, -0.1921]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


51it [01:39,  1.82s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3941, -0.2655, -0.1916],
        [ 0.3941, -0.2655, -0.1916],
        [ 0.3941, -0.2655, -0.1916],
        [ 0.3941, -0.2655, -0.1916],
        [ 0.3941, -0.2655, -0.1916],
        [ 0.3941, -0.2655, -0.1916],
        [ 0.3941, -0.2655, -0.1916],
        [ 0.3941, -0.2655, -0.1916]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


52it [01:40,  1.82s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3957, -0.2688, -0.1905],
        [ 0.3957, -0.2688, -0.1905],
        [ 0.3957, -0.2688, -0.1905],
        [ 0.3957, -0.2688, -0.1905],
        [ 0.3957, -0.2688, -0.1905],
        [ 0.3957, -0.2688, -0.1905],
        [ 0.3957, -0.2688, -0.1905],
        [ 0.3957, -0.2688, -0.1905]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


53it [01:42,  1.81s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3953, -0.2709, -0.1882],
        [ 0.3953, -0.2709, -0.1882],
        [ 0.3953, -0.2709, -0.1882],
        [ 0.3953, -0.2709, -0.1882],
        [ 0.3953, -0.2709, -0.1882],
        [ 0.3953, -0.2709, -0.1882],
        [ 0.3953, -0.2709, -0.1882],
        [ 0.3953, -0.2709, -0.1882]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


54it [01:44,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3951, -0.2719, -0.1869],
        [ 0.3951, -0.2719, -0.1869],
        [ 0.3951, -0.2719, -0.1869],
        [ 0.3951, -0.2719, -0.1869],
        [ 0.3951, -0.2719, -0.1869],
        [ 0.3951, -0.2719, -0.1869],
        [ 0.3951, -0.2719, -0.1869],
        [ 0.3951, -0.2719, -0.1869]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


55it [01:46,  1.98s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3956, -0.2737, -0.1859],
        [ 0.3956, -0.2737, -0.1859],
        [ 0.3956, -0.2737, -0.1859],
        [ 0.3956, -0.2737, -0.1859],
        [ 0.3956, -0.2737, -0.1859],
        [ 0.3956, -0.2737, -0.1859],
        [ 0.3956, -0.2737, -0.1859],
        [ 0.3956, -0.2737, -0.1859]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


56it [01:48,  1.92s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3949, -0.2745, -0.1844],
        [ 0.3949, -0.2745, -0.1844],
        [ 0.3949, -0.2745, -0.1844],
        [ 0.3949, -0.2745, -0.1844],
        [ 0.3949, -0.2745, -0.1844],
        [ 0.3949, -0.2745, -0.1844],
        [ 0.3949, -0.2745, -0.1844],
        [ 0.3949, -0.2745, -0.1844]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


57it [01:50,  1.87s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3937, -0.2743, -0.1831],
        [ 0.3937, -0.2743, -0.1831],
        [ 0.3937, -0.2743, -0.1831],
        [ 0.3937, -0.2743, -0.1831],
        [ 0.3937, -0.2743, -0.1831],
        [ 0.3937, -0.2743, -0.1831],
        [ 0.3937, -0.2743, -0.1831],
        [ 0.3937, -0.2743, -0.1831]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


58it [01:52,  1.84s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3915, -0.2718, -0.1828],
        [ 0.3915, -0.2718, -0.1828],
        [ 0.3915, -0.2718, -0.1828],
        [ 0.3915, -0.2718, -0.1828],
        [ 0.3915, -0.2718, -0.1828],
        [ 0.3915, -0.2718, -0.1828],
        [ 0.3915, -0.2718, -0.1828],
        [ 0.3915, -0.2718, -0.1828]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


59it [01:54,  1.82s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3902, -0.2710, -0.1819],
        [ 0.3902, -0.2710, -0.1819],
        [ 0.3902, -0.2710, -0.1819],
        [ 0.3902, -0.2710, -0.1819],
        [ 0.3902, -0.2710, -0.1819],
        [ 0.3902, -0.2710, -0.1819],
        [ 0.3902, -0.2710, -0.1819],
        [ 0.3902, -0.2710, -0.1819]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


60it [01:55,  1.81s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3891, -0.2720, -0.1798],
        [ 0.3891, -0.2720, -0.1798],
        [ 0.3891, -0.2720, -0.1798],
        [ 0.3891, -0.2720, -0.1798],
        [ 0.3891, -0.2720, -0.1798],
        [ 0.3891, -0.2720, -0.1798],
        [ 0.3891, -0.2720, -0.1798],
        [ 0.3891, -0.2720, -0.1798]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


61it [01:57,  1.82s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3889, -0.2720, -0.1795],
        [ 0.3889, -0.2720, -0.1795],
        [ 0.3889, -0.2720, -0.1795],
        [ 0.3889, -0.2720, -0.1795],
        [ 0.3889, -0.2720, -0.1795],
        [ 0.3889, -0.2720, -0.1795],
        [ 0.3889, -0.2720, -0.1795],
        [ 0.3889, -0.2720, -0.1795]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


62it [01:59,  1.90s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3907, -0.2736, -0.1801],
        [ 0.3907, -0.2736, -0.1801],
        [ 0.3907, -0.2736, -0.1801],
        [ 0.3907, -0.2736, -0.1801],
        [ 0.3907, -0.2736, -0.1801],
        [ 0.3907, -0.2736, -0.1801],
        [ 0.3907, -0.2736, -0.1801],
        [ 0.3907, -0.2736, -0.1801]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


63it [02:01,  1.98s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3924, -0.2759, -0.1800],
        [ 0.3924, -0.2759, -0.1800],
        [ 0.3924, -0.2759, -0.1800],
        [ 0.3924, -0.2759, -0.1800],
        [ 0.3924, -0.2759, -0.1800],
        [ 0.3924, -0.2759, -0.1800],
        [ 0.3924, -0.2759, -0.1800],
        [ 0.3924, -0.2759, -0.1800]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


64it [02:03,  1.92s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3940, -0.2786, -0.1793],
        [ 0.3940, -0.2786, -0.1793],
        [ 0.3940, -0.2786, -0.1793],
        [ 0.3940, -0.2786, -0.1793],
        [ 0.3940, -0.2786, -0.1793],
        [ 0.3940, -0.2786, -0.1793],
        [ 0.3940, -0.2786, -0.1793],
        [ 0.3940, -0.2786, -0.1793]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


65it [02:05,  1.87s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3955, -0.2803, -0.1796],
        [ 0.3955, -0.2803, -0.1796],
        [ 0.3955, -0.2803, -0.1796],
        [ 0.3955, -0.2803, -0.1796],
        [ 0.3955, -0.2803, -0.1796],
        [ 0.3955, -0.2803, -0.1796],
        [ 0.3955, -0.2803, -0.1796],
        [ 0.3955, -0.2803, -0.1796]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


66it [02:07,  1.84s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3977, -0.2810, -0.1814],
        [ 0.3977, -0.2810, -0.1814],
        [ 0.3977, -0.2810, -0.1814],
        [ 0.3977, -0.2810, -0.1814],
        [ 0.3977, -0.2810, -0.1814],
        [ 0.3977, -0.2810, -0.1814],
        [ 0.3977, -0.2810, -0.1814],
        [ 0.3977, -0.2810, -0.1814]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


67it [02:08,  1.82s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.3984, -0.2824, -0.1810],
        [ 0.3984, -0.2824, -0.1810],
        [ 0.3984, -0.2824, -0.1810],
        [ 0.3984, -0.2824, -0.1810],
        [ 0.3984, -0.2824, -0.1810],
        [ 0.3984, -0.2824, -0.1810],
        [ 0.3984, -0.2824, -0.1810],
        [ 0.3984, -0.2824, -0.1810]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


68it [02:10,  1.82s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4004, -0.2836, -0.1822],
        [ 0.4004, -0.2836, -0.1822],
        [ 0.4004, -0.2836, -0.1822],
        [ 0.4004, -0.2836, -0.1822],
        [ 0.4004, -0.2836, -0.1822],
        [ 0.4004, -0.2836, -0.1822],
        [ 0.4004, -0.2836, -0.1822],
        [ 0.4004, -0.2836, -0.1822]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


69it [02:12,  1.87s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4029, -0.2847, -0.1841],
        [ 0.4029, -0.2847, -0.1841],
        [ 0.4029, -0.2847, -0.1841],
        [ 0.4029, -0.2847, -0.1841],
        [ 0.4029, -0.2847, -0.1841],
        [ 0.4029, -0.2847, -0.1841],
        [ 0.4029, -0.2847, -0.1841],
        [ 0.4029, -0.2847, -0.1841]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


70it [02:14,  1.96s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4059, -0.2872, -0.1853],
        [ 0.4059, -0.2872, -0.1853],
        [ 0.4059, -0.2872, -0.1853],
        [ 0.4059, -0.2872, -0.1853],
        [ 0.4059, -0.2872, -0.1853],
        [ 0.4059, -0.2872, -0.1853],
        [ 0.4059, -0.2872, -0.1853],
        [ 0.4059, -0.2872, -0.1853]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


71it [02:17,  2.00s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4093, -0.2902, -0.1864],
        [ 0.4093, -0.2902, -0.1864],
        [ 0.4093, -0.2902, -0.1864],
        [ 0.4093, -0.2902, -0.1864],
        [ 0.4093, -0.2902, -0.1864],
        [ 0.4093, -0.2902, -0.1864],
        [ 0.4093, -0.2902, -0.1864],
        [ 0.4093, -0.2902, -0.1864]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


72it [02:18,  1.94s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4136, -0.2937, -0.1883],
        [ 0.4136, -0.2937, -0.1883],
        [ 0.4136, -0.2937, -0.1883],
        [ 0.4136, -0.2937, -0.1883],
        [ 0.4136, -0.2937, -0.1883],
        [ 0.4136, -0.2937, -0.1883],
        [ 0.4136, -0.2937, -0.1883],
        [ 0.4136, -0.2937, -0.1883]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


73it [02:20,  1.90s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4182, -0.2968, -0.1908],
        [ 0.4182, -0.2968, -0.1908],
        [ 0.4182, -0.2968, -0.1908],
        [ 0.4182, -0.2968, -0.1908],
        [ 0.4182, -0.2968, -0.1908],
        [ 0.4182, -0.2968, -0.1908],
        [ 0.4182, -0.2968, -0.1908],
        [ 0.4182, -0.2968, -0.1908]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


74it [02:22,  1.87s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4249, -0.3011, -0.1947],
        [ 0.4249, -0.3011, -0.1947],
        [ 0.4249, -0.3011, -0.1947],
        [ 0.4249, -0.3011, -0.1947],
        [ 0.4249, -0.3011, -0.1947],
        [ 0.4249, -0.3011, -0.1947],
        [ 0.4249, -0.3011, -0.1947],
        [ 0.4249, -0.3011, -0.1947]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


75it [02:24,  1.84s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4303, -0.3024, -0.1997],
        [ 0.4303, -0.3024, -0.1997],
        [ 0.4303, -0.3024, -0.1997],
        [ 0.4303, -0.3024, -0.1997],
        [ 0.4303, -0.3024, -0.1997],
        [ 0.4303, -0.3024, -0.1997],
        [ 0.4303, -0.3024, -0.1997],
        [ 0.4303, -0.3024, -0.1997]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


76it [02:26,  1.83s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4351, -0.3036, -0.2043],
        [ 0.4351, -0.3036, -0.2043],
        [ 0.4351, -0.3036, -0.2043],
        [ 0.4351, -0.3036, -0.2043],
        [ 0.4351, -0.3036, -0.2043],
        [ 0.4351, -0.3036, -0.2043],
        [ 0.4351, -0.3036, -0.2043],
        [ 0.4351, -0.3036, -0.2043]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


77it [02:28,  1.91s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4389, -0.3038, -0.2086],
        [ 0.4389, -0.3038, -0.2086],
        [ 0.4389, -0.3038, -0.2086],
        [ 0.4389, -0.3038, -0.2086],
        [ 0.4389, -0.3038, -0.2086],
        [ 0.4389, -0.3038, -0.2086],
        [ 0.4389, -0.3038, -0.2086],
        [ 0.4389, -0.3038, -0.2086]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


78it [02:30,  1.93s/it]


Train Loss at epoch 2: 1.0447039604187012



1it [00:00,  1.64it/s]

torch.Size([8, 20, 1024])


2it [00:01,  1.52it/s]

torch.Size([8, 20, 1024])


3it [00:02,  1.45it/s]

torch.Size([8, 20, 1024])


4it [00:02,  1.47it/s]

torch.Size([8, 20, 1024])


5it [00:03,  1.46it/s]

torch.Size([8, 20, 1024])


6it [00:04,  1.46it/s]

torch.Size([8, 20, 1024])


7it [00:04,  1.47it/s]

torch.Size([8, 20, 1024])


8it [00:05,  1.50it/s]


torch.Size([1, 20, 1024])
test Loss at epoch 2: 1.1774767637252808



0it [00:00, ?it/s]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4423, -0.3047, -0.2117],
        [ 0.4423, -0.3047, -0.2117],
        [ 0.4423, -0.3047, -0.2117],
        [ 0.4423, -0.3047, -0.2117],
        [ 0.4423, -0.3047, -0.2117],
        [ 0.4423, -0.3047, -0.2117],
        [ 0.4423, -0.3047, -0.2117],
        [ 0.4423, -0.3047, -0.2117]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


1it [00:01,  1.84s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4441, -0.3047, -0.2139],
        [ 0.4441, -0.3047, -0.2139],
        [ 0.4441, -0.3047, -0.2139],
        [ 0.4441, -0.3047, -0.2139],
        [ 0.4441, -0.3047, -0.2139],
        [ 0.4441, -0.3047, -0.2139],
        [ 0.4441, -0.3047, -0.2139],
        [ 0.4441, -0.3047, -0.2139]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


2it [00:03,  1.81s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4457, -0.3037, -0.2166],
        [ 0.4457, -0.3037, -0.2166],
        [ 0.4457, -0.3037, -0.2166],
        [ 0.4457, -0.3037, -0.2166],
        [ 0.4457, -0.3037, -0.2166],
        [ 0.4457, -0.3037, -0.2166],
        [ 0.4457, -0.3037, -0.2166],
        [ 0.4457, -0.3037, -0.2166]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


3it [00:05,  1.80s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4484, -0.3036, -0.2199],
        [ 0.4484, -0.3036, -0.2199],
        [ 0.4484, -0.3036, -0.2199],
        [ 0.4484, -0.3036, -0.2199],
        [ 0.4484, -0.3036, -0.2199],
        [ 0.4484, -0.3036, -0.2199],
        [ 0.4484, -0.3036, -0.2199],
        [ 0.4484, -0.3036, -0.2199]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


4it [00:07,  1.90s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4514, -0.3034, -0.2237],
        [ 0.4514, -0.3034, -0.2237],
        [ 0.4514, -0.3034, -0.2237],
        [ 0.4514, -0.3034, -0.2237],
        [ 0.4514, -0.3034, -0.2237],
        [ 0.4514, -0.3034, -0.2237],
        [ 0.4514, -0.3034, -0.2237],
        [ 0.4514, -0.3034, -0.2237]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


5it [00:09,  2.00s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4541, -0.3032, -0.2271],
        [ 0.4541, -0.3032, -0.2271],
        [ 0.4541, -0.3032, -0.2271],
        [ 0.4541, -0.3032, -0.2271],
        [ 0.4541, -0.3032, -0.2271],
        [ 0.4541, -0.3032, -0.2271],
        [ 0.4541, -0.3032, -0.2271],
        [ 0.4541, -0.3032, -0.2271]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


6it [00:11,  1.93s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4565, -0.3037, -0.2295],
        [ 0.4565, -0.3037, -0.2295],
        [ 0.4565, -0.3037, -0.2295],
        [ 0.4565, -0.3037, -0.2295],
        [ 0.4565, -0.3037, -0.2295],
        [ 0.4565, -0.3037, -0.2295],
        [ 0.4565, -0.3037, -0.2295],
        [ 0.4565, -0.3037, -0.2295]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


7it [00:13,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4606, -0.3048, -0.2331],
        [ 0.4606, -0.3048, -0.2331],
        [ 0.4606, -0.3048, -0.2331],
        [ 0.4606, -0.3048, -0.2331],
        [ 0.4606, -0.3048, -0.2331],
        [ 0.4606, -0.3048, -0.2331],
        [ 0.4606, -0.3048, -0.2331],
        [ 0.4606, -0.3048, -0.2331]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


8it [00:15,  1.86s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4635, -0.3058, -0.2357],
        [ 0.4635, -0.3058, -0.2357],
        [ 0.4635, -0.3058, -0.2357],
        [ 0.4635, -0.3058, -0.2357],
        [ 0.4635, -0.3058, -0.2357],
        [ 0.4635, -0.3058, -0.2357],
        [ 0.4635, -0.3058, -0.2357],
        [ 0.4635, -0.3058, -0.2357]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


9it [00:16,  1.84s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4655, -0.3058, -0.2380],
        [ 0.4655, -0.3058, -0.2380],
        [ 0.4655, -0.3058, -0.2380],
        [ 0.4655, -0.3058, -0.2380],
        [ 0.4655, -0.3058, -0.2380],
        [ 0.4655, -0.3058, -0.2380],
        [ 0.4655, -0.3058, -0.2380],
        [ 0.4655, -0.3058, -0.2380]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


10it [00:18,  1.82s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4672, -0.3057, -0.2401],
        [ 0.4672, -0.3057, -0.2401],
        [ 0.4672, -0.3057, -0.2401],
        [ 0.4672, -0.3057, -0.2401],
        [ 0.4672, -0.3057, -0.2401],
        [ 0.4672, -0.3057, -0.2401],
        [ 0.4672, -0.3057, -0.2401],
        [ 0.4672, -0.3057, -0.2401]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


11it [00:20,  1.81s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4693, -0.3063, -0.2421],
        [ 0.4693, -0.3063, -0.2421],
        [ 0.4693, -0.3063, -0.2421],
        [ 0.4693, -0.3063, -0.2421],
        [ 0.4693, -0.3063, -0.2421],
        [ 0.4693, -0.3063, -0.2421],
        [ 0.4693, -0.3063, -0.2421],
        [ 0.4693, -0.3063, -0.2421]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


12it [00:22,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4718, -0.3075, -0.2438],
        [ 0.4718, -0.3075, -0.2438],
        [ 0.4718, -0.3075, -0.2438],
        [ 0.4718, -0.3075, -0.2438],
        [ 0.4718, -0.3075, -0.2438],
        [ 0.4718, -0.3075, -0.2438],
        [ 0.4718, -0.3075, -0.2438],
        [ 0.4718, -0.3075, -0.2438]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


13it [00:24,  1.98s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4752, -0.3102, -0.2454],
        [ 0.4752, -0.3102, -0.2454],
        [ 0.4752, -0.3102, -0.2454],
        [ 0.4752, -0.3102, -0.2454],
        [ 0.4752, -0.3102, -0.2454],
        [ 0.4752, -0.3102, -0.2454],
        [ 0.4752, -0.3102, -0.2454],
        [ 0.4752, -0.3102, -0.2454]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


14it [00:26,  1.92s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4789, -0.3117, -0.2483],
        [ 0.4789, -0.3117, -0.2483],
        [ 0.4789, -0.3117, -0.2483],
        [ 0.4789, -0.3117, -0.2483],
        [ 0.4789, -0.3117, -0.2483],
        [ 0.4789, -0.3117, -0.2483],
        [ 0.4789, -0.3117, -0.2483],
        [ 0.4789, -0.3117, -0.2483]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


15it [00:28,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4821, -0.3121, -0.2517],
        [ 0.4821, -0.3121, -0.2517],
        [ 0.4821, -0.3121, -0.2517],
        [ 0.4821, -0.3121, -0.2517],
        [ 0.4821, -0.3121, -0.2517],
        [ 0.4821, -0.3121, -0.2517],
        [ 0.4821, -0.3121, -0.2517],
        [ 0.4821, -0.3121, -0.2517]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


16it [00:30,  1.85s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4855, -0.3124, -0.2555],
        [ 0.4855, -0.3124, -0.2555],
        [ 0.4855, -0.3124, -0.2555],
        [ 0.4855, -0.3124, -0.2555],
        [ 0.4855, -0.3124, -0.2555],
        [ 0.4855, -0.3124, -0.2555],
        [ 0.4855, -0.3124, -0.2555],
        [ 0.4855, -0.3124, -0.2555]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


17it [00:31,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4892, -0.3126, -0.2597],
        [ 0.4892, -0.3126, -0.2597],
        [ 0.4892, -0.3126, -0.2597],
        [ 0.4892, -0.3126, -0.2597],
        [ 0.4892, -0.3126, -0.2597],
        [ 0.4892, -0.3126, -0.2597],
        [ 0.4892, -0.3126, -0.2597],
        [ 0.4892, -0.3126, -0.2597]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


18it [00:33,  1.85s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4930, -0.3118, -0.2649],
        [ 0.4930, -0.3118, -0.2649],
        [ 0.4930, -0.3118, -0.2649],
        [ 0.4930, -0.3118, -0.2649],
        [ 0.4930, -0.3118, -0.2649],
        [ 0.4930, -0.3118, -0.2649],
        [ 0.4930, -0.3118, -0.2649],
        [ 0.4930, -0.3118, -0.2649]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


19it [00:35,  1.89s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4951, -0.3086, -0.2703],
        [ 0.4951, -0.3086, -0.2703],
        [ 0.4951, -0.3086, -0.2703],
        [ 0.4951, -0.3086, -0.2703],
        [ 0.4951, -0.3086, -0.2703],
        [ 0.4951, -0.3086, -0.2703],
        [ 0.4951, -0.3086, -0.2703],
        [ 0.4951, -0.3086, -0.2703]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


20it [00:37,  1.98s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4974, -0.3056, -0.2759],
        [ 0.4974, -0.3056, -0.2759],
        [ 0.4974, -0.3056, -0.2759],
        [ 0.4974, -0.3056, -0.2759],
        [ 0.4974, -0.3056, -0.2759],
        [ 0.4974, -0.3056, -0.2759],
        [ 0.4974, -0.3056, -0.2759],
        [ 0.4974, -0.3056, -0.2759]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


21it [00:39,  2.01s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4975, -0.3020, -0.2793],
        [ 0.4975, -0.3020, -0.2793],
        [ 0.4975, -0.3020, -0.2793],
        [ 0.4975, -0.3020, -0.2793],
        [ 0.4975, -0.3020, -0.2793],
        [ 0.4975, -0.3020, -0.2793],
        [ 0.4975, -0.3020, -0.2793],
        [ 0.4975, -0.3020, -0.2793]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


22it [00:41,  1.94s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4975, -0.2987, -0.2824],
        [ 0.4975, -0.2987, -0.2824],
        [ 0.4975, -0.2987, -0.2824],
        [ 0.4975, -0.2987, -0.2824],
        [ 0.4975, -0.2987, -0.2824],
        [ 0.4975, -0.2987, -0.2824],
        [ 0.4975, -0.2987, -0.2824],
        [ 0.4975, -0.2987, -0.2824]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


23it [00:43,  1.89s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4967, -0.2956, -0.2844],
        [ 0.4967, -0.2956, -0.2844],
        [ 0.4967, -0.2956, -0.2844],
        [ 0.4967, -0.2956, -0.2844],
        [ 0.4967, -0.2956, -0.2844],
        [ 0.4967, -0.2956, -0.2844],
        [ 0.4967, -0.2956, -0.2844],
        [ 0.4967, -0.2956, -0.2844]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


24it [00:45,  1.87s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4959, -0.2928, -0.2861],
        [ 0.4959, -0.2928, -0.2861],
        [ 0.4959, -0.2928, -0.2861],
        [ 0.4959, -0.2928, -0.2861],
        [ 0.4959, -0.2928, -0.2861],
        [ 0.4959, -0.2928, -0.2861],
        [ 0.4959, -0.2928, -0.2861],
        [ 0.4959, -0.2928, -0.2861]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


25it [00:47,  1.84s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4950, -0.2893, -0.2883],
        [ 0.4950, -0.2893, -0.2883],
        [ 0.4950, -0.2893, -0.2883],
        [ 0.4950, -0.2893, -0.2883],
        [ 0.4950, -0.2893, -0.2883],
        [ 0.4950, -0.2893, -0.2883],
        [ 0.4950, -0.2893, -0.2883],
        [ 0.4950, -0.2893, -0.2883]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


26it [00:48,  1.82s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4960, -0.2877, -0.2910],
        [ 0.4960, -0.2877, -0.2910],
        [ 0.4960, -0.2877, -0.2910],
        [ 0.4960, -0.2877, -0.2910],
        [ 0.4960, -0.2877, -0.2910],
        [ 0.4960, -0.2877, -0.2910],
        [ 0.4960, -0.2877, -0.2910],
        [ 0.4960, -0.2877, -0.2910]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


27it [00:50,  1.89s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4974, -0.2871, -0.2933],
        [ 0.4974, -0.2871, -0.2933],
        [ 0.4974, -0.2871, -0.2933],
        [ 0.4974, -0.2871, -0.2933],
        [ 0.4974, -0.2871, -0.2933],
        [ 0.4974, -0.2871, -0.2933],
        [ 0.4974, -0.2871, -0.2933],
        [ 0.4974, -0.2871, -0.2933]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


28it [00:53,  1.99s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4986, -0.2864, -0.2954],
        [ 0.4986, -0.2864, -0.2954],
        [ 0.4986, -0.2864, -0.2954],
        [ 0.4986, -0.2864, -0.2954],
        [ 0.4986, -0.2864, -0.2954],
        [ 0.4986, -0.2864, -0.2954],
        [ 0.4986, -0.2864, -0.2954],
        [ 0.4986, -0.2864, -0.2954]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


29it [00:55,  1.95s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4989, -0.2849, -0.2971],
        [ 0.4989, -0.2849, -0.2971],
        [ 0.4989, -0.2849, -0.2971],
        [ 0.4989, -0.2849, -0.2971],
        [ 0.4989, -0.2849, -0.2971],
        [ 0.4989, -0.2849, -0.2971],
        [ 0.4989, -0.2849, -0.2971],
        [ 0.4989, -0.2849, -0.2971]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


30it [00:56,  1.90s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4997, -0.2851, -0.2979],
        [ 0.4997, -0.2851, -0.2979],
        [ 0.4997, -0.2851, -0.2979],
        [ 0.4997, -0.2851, -0.2979],
        [ 0.4997, -0.2851, -0.2979],
        [ 0.4997, -0.2851, -0.2979],
        [ 0.4997, -0.2851, -0.2979],
        [ 0.4997, -0.2851, -0.2979]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


31it [00:58,  1.87s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4990, -0.2852, -0.2969],
        [ 0.4990, -0.2852, -0.2969],
        [ 0.4990, -0.2852, -0.2969],
        [ 0.4990, -0.2852, -0.2969],
        [ 0.4990, -0.2852, -0.2969],
        [ 0.4990, -0.2852, -0.2969],
        [ 0.4990, -0.2852, -0.2969],
        [ 0.4990, -0.2852, -0.2969]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


32it [01:00,  1.84s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4982, -0.2852, -0.2960],
        [ 0.4982, -0.2852, -0.2960],
        [ 0.4982, -0.2852, -0.2960],
        [ 0.4982, -0.2852, -0.2960],
        [ 0.4982, -0.2852, -0.2960],
        [ 0.4982, -0.2852, -0.2960],
        [ 0.4982, -0.2852, -0.2960],
        [ 0.4982, -0.2852, -0.2960]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


33it [01:02,  1.83s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4968, -0.2851, -0.2944],
        [ 0.4968, -0.2851, -0.2944],
        [ 0.4968, -0.2851, -0.2944],
        [ 0.4968, -0.2851, -0.2944],
        [ 0.4968, -0.2851, -0.2944],
        [ 0.4968, -0.2851, -0.2944],
        [ 0.4968, -0.2851, -0.2944],
        [ 0.4968, -0.2851, -0.2944]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


34it [01:03,  1.81s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4954, -0.2866, -0.2913],
        [ 0.4954, -0.2866, -0.2913],
        [ 0.4954, -0.2866, -0.2913],
        [ 0.4954, -0.2866, -0.2913],
        [ 0.4954, -0.2866, -0.2913],
        [ 0.4954, -0.2866, -0.2913],
        [ 0.4954, -0.2866, -0.2913],
        [ 0.4954, -0.2866, -0.2913]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


35it [01:06,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4946, -0.2879, -0.2892],
        [ 0.4946, -0.2879, -0.2892],
        [ 0.4946, -0.2879, -0.2892],
        [ 0.4946, -0.2879, -0.2892],
        [ 0.4946, -0.2879, -0.2892],
        [ 0.4946, -0.2879, -0.2892],
        [ 0.4946, -0.2879, -0.2892],
        [ 0.4946, -0.2879, -0.2892]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


36it [01:08,  2.00s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4944, -0.2898, -0.2872],
        [ 0.4944, -0.2898, -0.2872],
        [ 0.4944, -0.2898, -0.2872],
        [ 0.4944, -0.2898, -0.2872],
        [ 0.4944, -0.2898, -0.2872],
        [ 0.4944, -0.2898, -0.2872],
        [ 0.4944, -0.2898, -0.2872],
        [ 0.4944, -0.2898, -0.2872]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


37it [01:10,  1.93s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4948, -0.2915, -0.2861],
        [ 0.4948, -0.2915, -0.2861],
        [ 0.4948, -0.2915, -0.2861],
        [ 0.4948, -0.2915, -0.2861],
        [ 0.4948, -0.2915, -0.2861],
        [ 0.4948, -0.2915, -0.2861],
        [ 0.4948, -0.2915, -0.2861],
        [ 0.4948, -0.2915, -0.2861]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


38it [01:11,  1.89s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4951, -0.2920, -0.2858],
        [ 0.4951, -0.2920, -0.2858],
        [ 0.4951, -0.2920, -0.2858],
        [ 0.4951, -0.2920, -0.2858],
        [ 0.4951, -0.2920, -0.2858],
        [ 0.4951, -0.2920, -0.2858],
        [ 0.4951, -0.2920, -0.2858],
        [ 0.4951, -0.2920, -0.2858]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


39it [01:13,  1.86s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4952, -0.2925, -0.2855],
        [ 0.4952, -0.2925, -0.2855],
        [ 0.4952, -0.2925, -0.2855],
        [ 0.4952, -0.2925, -0.2855],
        [ 0.4952, -0.2925, -0.2855],
        [ 0.4952, -0.2925, -0.2855],
        [ 0.4952, -0.2925, -0.2855],
        [ 0.4952, -0.2925, -0.2855]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


40it [01:15,  1.84s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4951, -0.2912, -0.2867],
        [ 0.4951, -0.2912, -0.2867],
        [ 0.4951, -0.2912, -0.2867],
        [ 0.4951, -0.2912, -0.2867],
        [ 0.4951, -0.2912, -0.2867],
        [ 0.4951, -0.2912, -0.2867],
        [ 0.4951, -0.2912, -0.2867],
        [ 0.4951, -0.2912, -0.2867]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


41it [01:17,  1.82s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4963, -0.2908, -0.2885],
        [ 0.4963, -0.2908, -0.2885],
        [ 0.4963, -0.2908, -0.2885],
        [ 0.4963, -0.2908, -0.2885],
        [ 0.4963, -0.2908, -0.2885],
        [ 0.4963, -0.2908, -0.2885],
        [ 0.4963, -0.2908, -0.2885],
        [ 0.4963, -0.2908, -0.2885]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


42it [01:18,  1.81s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4965, -0.2903, -0.2892],
        [ 0.4965, -0.2903, -0.2892],
        [ 0.4965, -0.2903, -0.2892],
        [ 0.4965, -0.2903, -0.2892],
        [ 0.4965, -0.2903, -0.2892],
        [ 0.4965, -0.2903, -0.2892],
        [ 0.4965, -0.2903, -0.2892],
        [ 0.4965, -0.2903, -0.2892]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


43it [01:21,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4960, -0.2890, -0.2898],
        [ 0.4960, -0.2890, -0.2898],
        [ 0.4960, -0.2890, -0.2898],
        [ 0.4960, -0.2890, -0.2898],
        [ 0.4960, -0.2890, -0.2898],
        [ 0.4960, -0.2890, -0.2898],
        [ 0.4960, -0.2890, -0.2898],
        [ 0.4960, -0.2890, -0.2898]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


44it [01:23,  1.97s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4967, -0.2894, -0.2903],
        [ 0.4967, -0.2894, -0.2903],
        [ 0.4967, -0.2894, -0.2903],
        [ 0.4967, -0.2894, -0.2903],
        [ 0.4967, -0.2894, -0.2903],
        [ 0.4967, -0.2894, -0.2903],
        [ 0.4967, -0.2894, -0.2903],
        [ 0.4967, -0.2894, -0.2903]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


45it [01:24,  1.91s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4979, -0.2912, -0.2899],
        [ 0.4979, -0.2912, -0.2899],
        [ 0.4979, -0.2912, -0.2899],
        [ 0.4979, -0.2912, -0.2899],
        [ 0.4979, -0.2912, -0.2899],
        [ 0.4979, -0.2912, -0.2899],
        [ 0.4979, -0.2912, -0.2899],
        [ 0.4979, -0.2912, -0.2899]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


46it [01:26,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4995, -0.2944, -0.2887],
        [ 0.4995, -0.2944, -0.2887],
        [ 0.4995, -0.2944, -0.2887],
        [ 0.4995, -0.2944, -0.2887],
        [ 0.4995, -0.2944, -0.2887],
        [ 0.4995, -0.2944, -0.2887],
        [ 0.4995, -0.2944, -0.2887],
        [ 0.4995, -0.2944, -0.2887]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


47it [01:28,  1.85s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5027, -0.2988, -0.2883],
        [ 0.5027, -0.2988, -0.2883],
        [ 0.5027, -0.2988, -0.2883],
        [ 0.5027, -0.2988, -0.2883],
        [ 0.5027, -0.2988, -0.2883],
        [ 0.5027, -0.2988, -0.2883],
        [ 0.5027, -0.2988, -0.2883],
        [ 0.5027, -0.2988, -0.2883]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


48it [01:30,  1.83s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5055, -0.3019, -0.2887],
        [ 0.5055, -0.3019, -0.2887],
        [ 0.5055, -0.3019, -0.2887],
        [ 0.5055, -0.3019, -0.2887],
        [ 0.5055, -0.3019, -0.2887],
        [ 0.5055, -0.3019, -0.2887],
        [ 0.5055, -0.3019, -0.2887],
        [ 0.5055, -0.3019, -0.2887]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


49it [01:32,  1.82s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5085, -0.3046, -0.2897],
        [ 0.5085, -0.3046, -0.2897],
        [ 0.5085, -0.3046, -0.2897],
        [ 0.5085, -0.3046, -0.2897],
        [ 0.5085, -0.3046, -0.2897],
        [ 0.5085, -0.3046, -0.2897],
        [ 0.5085, -0.3046, -0.2897],
        [ 0.5085, -0.3046, -0.2897]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


50it [01:33,  1.81s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5111, -0.3085, -0.2890],
        [ 0.5111, -0.3085, -0.2890],
        [ 0.5111, -0.3085, -0.2890],
        [ 0.5111, -0.3085, -0.2890],
        [ 0.5111, -0.3085, -0.2890],
        [ 0.5111, -0.3085, -0.2890],
        [ 0.5111, -0.3085, -0.2890],
        [ 0.5111, -0.3085, -0.2890]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


51it [01:36,  1.89s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5132, -0.3135, -0.2867],
        [ 0.5132, -0.3135, -0.2867],
        [ 0.5132, -0.3135, -0.2867],
        [ 0.5132, -0.3135, -0.2867],
        [ 0.5132, -0.3135, -0.2867],
        [ 0.5132, -0.3135, -0.2867],
        [ 0.5132, -0.3135, -0.2867],
        [ 0.5132, -0.3135, -0.2867]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


52it [01:38,  1.99s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5150, -0.3187, -0.2839],
        [ 0.5150, -0.3187, -0.2839],
        [ 0.5150, -0.3187, -0.2839],
        [ 0.5150, -0.3187, -0.2839],
        [ 0.5150, -0.3187, -0.2839],
        [ 0.5150, -0.3187, -0.2839],
        [ 0.5150, -0.3187, -0.2839],
        [ 0.5150, -0.3187, -0.2839]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


53it [01:40,  1.93s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5178, -0.3248, -0.2812],
        [ 0.5178, -0.3248, -0.2812],
        [ 0.5178, -0.3248, -0.2812],
        [ 0.5178, -0.3248, -0.2812],
        [ 0.5178, -0.3248, -0.2812],
        [ 0.5178, -0.3248, -0.2812],
        [ 0.5178, -0.3248, -0.2812],
        [ 0.5178, -0.3248, -0.2812]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


54it [01:41,  1.89s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5227, -0.3318, -0.2803],
        [ 0.5227, -0.3318, -0.2803],
        [ 0.5227, -0.3318, -0.2803],
        [ 0.5227, -0.3318, -0.2803],
        [ 0.5227, -0.3318, -0.2803],
        [ 0.5227, -0.3318, -0.2803],
        [ 0.5227, -0.3318, -0.2803],
        [ 0.5227, -0.3318, -0.2803]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


55it [01:43,  1.85s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5269, -0.3379, -0.2794],
        [ 0.5269, -0.3379, -0.2794],
        [ 0.5269, -0.3379, -0.2794],
        [ 0.5269, -0.3379, -0.2794],
        [ 0.5269, -0.3379, -0.2794],
        [ 0.5269, -0.3379, -0.2794],
        [ 0.5269, -0.3379, -0.2794],
        [ 0.5269, -0.3379, -0.2794]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


56it [01:45,  1.83s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5299, -0.3416, -0.2793],
        [ 0.5299, -0.3416, -0.2793],
        [ 0.5299, -0.3416, -0.2793],
        [ 0.5299, -0.3416, -0.2793],
        [ 0.5299, -0.3416, -0.2793],
        [ 0.5299, -0.3416, -0.2793],
        [ 0.5299, -0.3416, -0.2793],
        [ 0.5299, -0.3416, -0.2793]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


57it [01:47,  1.82s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5317, -0.3449, -0.2784],
        [ 0.5317, -0.3449, -0.2784],
        [ 0.5317, -0.3449, -0.2784],
        [ 0.5317, -0.3449, -0.2784],
        [ 0.5317, -0.3449, -0.2784],
        [ 0.5317, -0.3449, -0.2784],
        [ 0.5317, -0.3449, -0.2784],
        [ 0.5317, -0.3449, -0.2784]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


58it [01:49,  1.84s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5313, -0.3460, -0.2768],
        [ 0.5313, -0.3460, -0.2768],
        [ 0.5313, -0.3460, -0.2768],
        [ 0.5313, -0.3460, -0.2768],
        [ 0.5313, -0.3460, -0.2768],
        [ 0.5313, -0.3460, -0.2768],
        [ 0.5313, -0.3460, -0.2768],
        [ 0.5313, -0.3460, -0.2768]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


59it [01:51,  1.92s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5306, -0.3468, -0.2752],
        [ 0.5306, -0.3468, -0.2752],
        [ 0.5306, -0.3468, -0.2752],
        [ 0.5306, -0.3468, -0.2752],
        [ 0.5306, -0.3468, -0.2752],
        [ 0.5306, -0.3468, -0.2752],
        [ 0.5306, -0.3468, -0.2752],
        [ 0.5306, -0.3468, -0.2752]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


60it [01:53,  2.01s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5318, -0.3482, -0.2753],
        [ 0.5318, -0.3482, -0.2753],
        [ 0.5318, -0.3482, -0.2753],
        [ 0.5318, -0.3482, -0.2753],
        [ 0.5318, -0.3482, -0.2753],
        [ 0.5318, -0.3482, -0.2753],
        [ 0.5318, -0.3482, -0.2753],
        [ 0.5318, -0.3482, -0.2753]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


61it [01:55,  1.94s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5333, -0.3501, -0.2752],
        [ 0.5333, -0.3501, -0.2752],
        [ 0.5333, -0.3501, -0.2752],
        [ 0.5333, -0.3501, -0.2752],
        [ 0.5333, -0.3501, -0.2752],
        [ 0.5333, -0.3501, -0.2752],
        [ 0.5333, -0.3501, -0.2752],
        [ 0.5333, -0.3501, -0.2752]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


62it [01:56,  1.89s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5326, -0.3516, -0.2729],
        [ 0.5326, -0.3516, -0.2729],
        [ 0.5326, -0.3516, -0.2729],
        [ 0.5326, -0.3516, -0.2729],
        [ 0.5326, -0.3516, -0.2729],
        [ 0.5326, -0.3516, -0.2729],
        [ 0.5326, -0.3516, -0.2729],
        [ 0.5326, -0.3516, -0.2729]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


63it [01:58,  1.85s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5304, -0.3512, -0.2707],
        [ 0.5304, -0.3512, -0.2707],
        [ 0.5304, -0.3512, -0.2707],
        [ 0.5304, -0.3512, -0.2707],
        [ 0.5304, -0.3512, -0.2707],
        [ 0.5304, -0.3512, -0.2707],
        [ 0.5304, -0.3512, -0.2707],
        [ 0.5304, -0.3512, -0.2707]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


64it [02:00,  1.83s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5276, -0.3507, -0.2679],
        [ 0.5276, -0.3507, -0.2679],
        [ 0.5276, -0.3507, -0.2679],
        [ 0.5276, -0.3507, -0.2679],
        [ 0.5276, -0.3507, -0.2679],
        [ 0.5276, -0.3507, -0.2679],
        [ 0.5276, -0.3507, -0.2679],
        [ 0.5276, -0.3507, -0.2679]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


65it [02:02,  1.81s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5263, -0.3516, -0.2654],
        [ 0.5263, -0.3516, -0.2654],
        [ 0.5263, -0.3516, -0.2654],
        [ 0.5263, -0.3516, -0.2654],
        [ 0.5263, -0.3516, -0.2654],
        [ 0.5263, -0.3516, -0.2654],
        [ 0.5263, -0.3516, -0.2654],
        [ 0.5263, -0.3516, -0.2654]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


66it [02:04,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5255, -0.3531, -0.2630],
        [ 0.5255, -0.3531, -0.2630],
        [ 0.5255, -0.3531, -0.2630],
        [ 0.5255, -0.3531, -0.2630],
        [ 0.5255, -0.3531, -0.2630],
        [ 0.5255, -0.3531, -0.2630],
        [ 0.5255, -0.3531, -0.2630],
        [ 0.5255, -0.3531, -0.2630]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


67it [02:06,  1.98s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5246, -0.3542, -0.2608],
        [ 0.5246, -0.3542, -0.2608],
        [ 0.5246, -0.3542, -0.2608],
        [ 0.5246, -0.3542, -0.2608],
        [ 0.5246, -0.3542, -0.2608],
        [ 0.5246, -0.3542, -0.2608],
        [ 0.5246, -0.3542, -0.2608],
        [ 0.5246, -0.3542, -0.2608]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


68it [02:08,  1.98s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5236, -0.3550, -0.2588],
        [ 0.5236, -0.3550, -0.2588],
        [ 0.5236, -0.3550, -0.2588],
        [ 0.5236, -0.3550, -0.2588],
        [ 0.5236, -0.3550, -0.2588],
        [ 0.5236, -0.3550, -0.2588],
        [ 0.5236, -0.3550, -0.2588],
        [ 0.5236, -0.3550, -0.2588]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


69it [02:10,  1.93s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5219, -0.3548, -0.2570],
        [ 0.5219, -0.3548, -0.2570],
        [ 0.5219, -0.3548, -0.2570],
        [ 0.5219, -0.3548, -0.2570],
        [ 0.5219, -0.3548, -0.2570],
        [ 0.5219, -0.3548, -0.2570],
        [ 0.5219, -0.3548, -0.2570],
        [ 0.5219, -0.3548, -0.2570]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


70it [02:12,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5202, -0.3544, -0.2553],
        [ 0.5202, -0.3544, -0.2553],
        [ 0.5202, -0.3544, -0.2553],
        [ 0.5202, -0.3544, -0.2553],
        [ 0.5202, -0.3544, -0.2553],
        [ 0.5202, -0.3544, -0.2553],
        [ 0.5202, -0.3544, -0.2553],
        [ 0.5202, -0.3544, -0.2553]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


71it [02:13,  1.86s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5191, -0.3539, -0.2545],
        [ 0.5191, -0.3539, -0.2545],
        [ 0.5191, -0.3539, -0.2545],
        [ 0.5191, -0.3539, -0.2545],
        [ 0.5191, -0.3539, -0.2545],
        [ 0.5191, -0.3539, -0.2545],
        [ 0.5191, -0.3539, -0.2545],
        [ 0.5191, -0.3539, -0.2545]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


72it [02:15,  1.84s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5167, -0.3533, -0.2523],
        [ 0.5167, -0.3533, -0.2523],
        [ 0.5167, -0.3533, -0.2523],
        [ 0.5167, -0.3533, -0.2523],
        [ 0.5167, -0.3533, -0.2523],
        [ 0.5167, -0.3533, -0.2523],
        [ 0.5167, -0.3533, -0.2523],
        [ 0.5167, -0.3533, -0.2523]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


73it [02:17,  1.86s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5137, -0.3501, -0.2518],
        [ 0.5137, -0.3501, -0.2518],
        [ 0.5137, -0.3501, -0.2518],
        [ 0.5137, -0.3501, -0.2518],
        [ 0.5137, -0.3501, -0.2518],
        [ 0.5137, -0.3501, -0.2518],
        [ 0.5137, -0.3501, -0.2518],
        [ 0.5137, -0.3501, -0.2518]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


74it [02:19,  1.94s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5115, -0.3479, -0.2513],
        [ 0.5115, -0.3479, -0.2513],
        [ 0.5115, -0.3479, -0.2513],
        [ 0.5115, -0.3479, -0.2513],
        [ 0.5115, -0.3479, -0.2513],
        [ 0.5115, -0.3479, -0.2513],
        [ 0.5115, -0.3479, -0.2513],
        [ 0.5115, -0.3479, -0.2513]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


75it [02:21,  2.01s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5088, -0.3466, -0.2493],
        [ 0.5088, -0.3466, -0.2493],
        [ 0.5088, -0.3466, -0.2493],
        [ 0.5088, -0.3466, -0.2493],
        [ 0.5088, -0.3466, -0.2493],
        [ 0.5088, -0.3466, -0.2493],
        [ 0.5088, -0.3466, -0.2493],
        [ 0.5088, -0.3466, -0.2493]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


76it [02:23,  1.94s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5048, -0.3452, -0.2459],
        [ 0.5048, -0.3452, -0.2459],
        [ 0.5048, -0.3452, -0.2459],
        [ 0.5048, -0.3452, -0.2459],
        [ 0.5048, -0.3452, -0.2459],
        [ 0.5048, -0.3452, -0.2459],
        [ 0.5048, -0.3452, -0.2459],
        [ 0.5048, -0.3452, -0.2459]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


77it [02:25,  1.89s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5012, -0.3446, -0.2421],
        [ 0.5012, -0.3446, -0.2421],
        [ 0.5012, -0.3446, -0.2421],
        [ 0.5012, -0.3446, -0.2421],
        [ 0.5012, -0.3446, -0.2421],
        [ 0.5012, -0.3446, -0.2421],
        [ 0.5012, -0.3446, -0.2421],
        [ 0.5012, -0.3446, -0.2421]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


78it [02:27,  1.89s/it]


Train Loss at epoch 3: 1.0272481441497803



1it [00:00,  1.60it/s]

torch.Size([8, 20, 1024])


2it [00:01,  1.53it/s]

torch.Size([8, 20, 1024])


3it [00:01,  1.50it/s]

torch.Size([8, 20, 1024])


4it [00:02,  1.49it/s]

torch.Size([8, 20, 1024])


5it [00:03,  1.48it/s]

torch.Size([8, 20, 1024])


6it [00:04,  1.49it/s]

torch.Size([8, 20, 1024])


7it [00:04,  1.37it/s]

torch.Size([8, 20, 1024])


8it [00:05,  1.42it/s]


torch.Size([1, 20, 1024])
test Loss at epoch 3: 1.115257740020752



0it [00:00, ?it/s]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4964, -0.3431, -0.2380],
        [ 0.4964, -0.3431, -0.2380],
        [ 0.4964, -0.3431, -0.2380],
        [ 0.4964, -0.3431, -0.2380],
        [ 0.4964, -0.3431, -0.2380],
        [ 0.4964, -0.3431, -0.2380],
        [ 0.4964, -0.3431, -0.2380],
        [ 0.4964, -0.3431, -0.2380]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


1it [00:02,  2.11s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4914, -0.3416, -0.2334],
        [ 0.4914, -0.3416, -0.2334],
        [ 0.4914, -0.3416, -0.2334],
        [ 0.4914, -0.3416, -0.2334],
        [ 0.4914, -0.3416, -0.2334],
        [ 0.4914, -0.3416, -0.2334],
        [ 0.4914, -0.3416, -0.2334],
        [ 0.4914, -0.3416, -0.2334]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


2it [00:04,  2.19s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4868, -0.3417, -0.2278],
        [ 0.4868, -0.3417, -0.2278],
        [ 0.4868, -0.3417, -0.2278],
        [ 0.4868, -0.3417, -0.2278],
        [ 0.4868, -0.3417, -0.2278],
        [ 0.4868, -0.3417, -0.2278],
        [ 0.4868, -0.3417, -0.2278],
        [ 0.4868, -0.3417, -0.2278]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


3it [00:06,  2.00s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4832, -0.3424, -0.2228],
        [ 0.4832, -0.3424, -0.2228],
        [ 0.4832, -0.3424, -0.2228],
        [ 0.4832, -0.3424, -0.2228],
        [ 0.4832, -0.3424, -0.2228],
        [ 0.4832, -0.3424, -0.2228],
        [ 0.4832, -0.3424, -0.2228],
        [ 0.4832, -0.3424, -0.2228]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


4it [00:07,  1.91s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4805, -0.3445, -0.2176],
        [ 0.4805, -0.3445, -0.2176],
        [ 0.4805, -0.3445, -0.2176],
        [ 0.4805, -0.3445, -0.2176],
        [ 0.4805, -0.3445, -0.2176],
        [ 0.4805, -0.3445, -0.2176],
        [ 0.4805, -0.3445, -0.2176],
        [ 0.4805, -0.3445, -0.2176]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


5it [00:09,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4779, -0.3471, -0.2121],
        [ 0.4779, -0.3471, -0.2121],
        [ 0.4779, -0.3471, -0.2121],
        [ 0.4779, -0.3471, -0.2121],
        [ 0.4779, -0.3471, -0.2121],
        [ 0.4779, -0.3471, -0.2121],
        [ 0.4779, -0.3471, -0.2121],
        [ 0.4779, -0.3471, -0.2121]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


6it [00:11,  1.85s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4743, -0.3484, -0.2065],
        [ 0.4743, -0.3484, -0.2065],
        [ 0.4743, -0.3484, -0.2065],
        [ 0.4743, -0.3484, -0.2065],
        [ 0.4743, -0.3484, -0.2065],
        [ 0.4743, -0.3484, -0.2065],
        [ 0.4743, -0.3484, -0.2065],
        [ 0.4743, -0.3484, -0.2065]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


7it [00:13,  1.83s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4709, -0.3487, -0.2023],
        [ 0.4709, -0.3487, -0.2023],
        [ 0.4709, -0.3487, -0.2023],
        [ 0.4709, -0.3487, -0.2023],
        [ 0.4709, -0.3487, -0.2023],
        [ 0.4709, -0.3487, -0.2023],
        [ 0.4709, -0.3487, -0.2023],
        [ 0.4709, -0.3487, -0.2023]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


8it [00:15,  1.86s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4698, -0.3496, -0.2001],
        [ 0.4698, -0.3496, -0.2001],
        [ 0.4698, -0.3496, -0.2001],
        [ 0.4698, -0.3496, -0.2001],
        [ 0.4698, -0.3496, -0.2001],
        [ 0.4698, -0.3496, -0.2001],
        [ 0.4698, -0.3496, -0.2001],
        [ 0.4698, -0.3496, -0.2001]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


9it [00:17,  1.95s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4713, -0.3518, -0.1997],
        [ 0.4713, -0.3518, -0.1997],
        [ 0.4713, -0.3518, -0.1997],
        [ 0.4713, -0.3518, -0.1997],
        [ 0.4713, -0.3518, -0.1997],
        [ 0.4713, -0.3518, -0.1997],
        [ 0.4713, -0.3518, -0.1997],
        [ 0.4713, -0.3518, -0.1997]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


10it [00:19,  2.03s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4719, -0.3552, -0.1972],
        [ 0.4719, -0.3552, -0.1972],
        [ 0.4719, -0.3552, -0.1972],
        [ 0.4719, -0.3552, -0.1972],
        [ 0.4719, -0.3552, -0.1972],
        [ 0.4719, -0.3552, -0.1972],
        [ 0.4719, -0.3552, -0.1972],
        [ 0.4719, -0.3552, -0.1972]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


11it [00:21,  1.95s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4718, -0.3574, -0.1950],
        [ 0.4718, -0.3574, -0.1950],
        [ 0.4718, -0.3574, -0.1950],
        [ 0.4718, -0.3574, -0.1950],
        [ 0.4718, -0.3574, -0.1950],
        [ 0.4718, -0.3574, -0.1950],
        [ 0.4718, -0.3574, -0.1950],
        [ 0.4718, -0.3574, -0.1950]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


12it [00:23,  1.91s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4722, -0.3599, -0.1930],
        [ 0.4722, -0.3599, -0.1930],
        [ 0.4722, -0.3599, -0.1930],
        [ 0.4722, -0.3599, -0.1930],
        [ 0.4722, -0.3599, -0.1930],
        [ 0.4722, -0.3599, -0.1930],
        [ 0.4722, -0.3599, -0.1930],
        [ 0.4722, -0.3599, -0.1930]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


13it [00:24,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4732, -0.3620, -0.1922],
        [ 0.4732, -0.3620, -0.1922],
        [ 0.4732, -0.3620, -0.1922],
        [ 0.4732, -0.3620, -0.1922],
        [ 0.4732, -0.3620, -0.1922],
        [ 0.4732, -0.3620, -0.1922],
        [ 0.4732, -0.3620, -0.1922],
        [ 0.4732, -0.3620, -0.1922]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


14it [00:26,  1.85s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4734, -0.3630, -0.1915],
        [ 0.4734, -0.3630, -0.1915],
        [ 0.4734, -0.3630, -0.1915],
        [ 0.4734, -0.3630, -0.1915],
        [ 0.4734, -0.3630, -0.1915],
        [ 0.4734, -0.3630, -0.1915],
        [ 0.4734, -0.3630, -0.1915],
        [ 0.4734, -0.3630, -0.1915]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


15it [00:28,  1.83s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4741, -0.3637, -0.1917],
        [ 0.4741, -0.3637, -0.1917],
        [ 0.4741, -0.3637, -0.1917],
        [ 0.4741, -0.3637, -0.1917],
        [ 0.4741, -0.3637, -0.1917],
        [ 0.4741, -0.3637, -0.1917],
        [ 0.4741, -0.3637, -0.1917],
        [ 0.4741, -0.3637, -0.1917]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


16it [00:30,  1.89s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4753, -0.3649, -0.1920],
        [ 0.4753, -0.3649, -0.1920],
        [ 0.4753, -0.3649, -0.1920],
        [ 0.4753, -0.3649, -0.1920],
        [ 0.4753, -0.3649, -0.1920],
        [ 0.4753, -0.3649, -0.1920],
        [ 0.4753, -0.3649, -0.1920],
        [ 0.4753, -0.3649, -0.1920]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


17it [00:32,  1.99s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4751, -0.3626, -0.1939],
        [ 0.4751, -0.3626, -0.1939],
        [ 0.4751, -0.3626, -0.1939],
        [ 0.4751, -0.3626, -0.1939],
        [ 0.4751, -0.3626, -0.1939],
        [ 0.4751, -0.3626, -0.1939],
        [ 0.4751, -0.3626, -0.1939],
        [ 0.4751, -0.3626, -0.1939]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


18it [00:34,  1.98s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4754, -0.3605, -0.1964],
        [ 0.4754, -0.3605, -0.1964],
        [ 0.4754, -0.3605, -0.1964],
        [ 0.4754, -0.3605, -0.1964],
        [ 0.4754, -0.3605, -0.1964],
        [ 0.4754, -0.3605, -0.1964],
        [ 0.4754, -0.3605, -0.1964],
        [ 0.4754, -0.3605, -0.1964]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


19it [00:36,  1.93s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4737, -0.3583, -0.1965],
        [ 0.4737, -0.3583, -0.1965],
        [ 0.4737, -0.3583, -0.1965],
        [ 0.4737, -0.3583, -0.1965],
        [ 0.4737, -0.3583, -0.1965],
        [ 0.4737, -0.3583, -0.1965],
        [ 0.4737, -0.3583, -0.1965],
        [ 0.4737, -0.3583, -0.1965]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


20it [00:38,  2.03s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4708, -0.3546, -0.1966],
        [ 0.4708, -0.3546, -0.1966],
        [ 0.4708, -0.3546, -0.1966],
        [ 0.4708, -0.3546, -0.1966],
        [ 0.4708, -0.3546, -0.1966],
        [ 0.4708, -0.3546, -0.1966],
        [ 0.4708, -0.3546, -0.1966],
        [ 0.4708, -0.3546, -0.1966]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


21it [00:40,  1.95s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4695, -0.3520, -0.1976],
        [ 0.4695, -0.3520, -0.1976],
        [ 0.4695, -0.3520, -0.1976],
        [ 0.4695, -0.3520, -0.1976],
        [ 0.4695, -0.3520, -0.1976],
        [ 0.4695, -0.3520, -0.1976],
        [ 0.4695, -0.3520, -0.1976],
        [ 0.4695, -0.3520, -0.1976]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


22it [00:42,  1.91s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4688, -0.3494, -0.1993],
        [ 0.4688, -0.3494, -0.1993],
        [ 0.4688, -0.3494, -0.1993],
        [ 0.4688, -0.3494, -0.1993],
        [ 0.4688, -0.3494, -0.1993],
        [ 0.4688, -0.3494, -0.1993],
        [ 0.4688, -0.3494, -0.1993],
        [ 0.4688, -0.3494, -0.1993]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


23it [00:44,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4682, -0.3469, -0.2009],
        [ 0.4682, -0.3469, -0.2009],
        [ 0.4682, -0.3469, -0.2009],
        [ 0.4682, -0.3469, -0.2009],
        [ 0.4682, -0.3469, -0.2009],
        [ 0.4682, -0.3469, -0.2009],
        [ 0.4682, -0.3469, -0.2009],
        [ 0.4682, -0.3469, -0.2009]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


24it [00:46,  1.94s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4688, -0.3453, -0.2032],
        [ 0.4688, -0.3453, -0.2032],
        [ 0.4688, -0.3453, -0.2032],
        [ 0.4688, -0.3453, -0.2032],
        [ 0.4688, -0.3453, -0.2032],
        [ 0.4688, -0.3453, -0.2032],
        [ 0.4688, -0.3453, -0.2032],
        [ 0.4688, -0.3453, -0.2032]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


25it [00:48,  2.02s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4700, -0.3453, -0.2045],
        [ 0.4700, -0.3453, -0.2045],
        [ 0.4700, -0.3453, -0.2045],
        [ 0.4700, -0.3453, -0.2045],
        [ 0.4700, -0.3453, -0.2045],
        [ 0.4700, -0.3453, -0.2045],
        [ 0.4700, -0.3453, -0.2045],
        [ 0.4700, -0.3453, -0.2045]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


26it [00:50,  1.95s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4722, -0.3452, -0.2074],
        [ 0.4722, -0.3452, -0.2074],
        [ 0.4722, -0.3452, -0.2074],
        [ 0.4722, -0.3452, -0.2074],
        [ 0.4722, -0.3452, -0.2074],
        [ 0.4722, -0.3452, -0.2074],
        [ 0.4722, -0.3452, -0.2074],
        [ 0.4722, -0.3452, -0.2074]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


27it [00:52,  1.90s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4742, -0.3449, -0.2100],
        [ 0.4742, -0.3449, -0.2100],
        [ 0.4742, -0.3449, -0.2100],
        [ 0.4742, -0.3449, -0.2100],
        [ 0.4742, -0.3449, -0.2100],
        [ 0.4742, -0.3449, -0.2100],
        [ 0.4742, -0.3449, -0.2100],
        [ 0.4742, -0.3449, -0.2100]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


28it [00:53,  1.87s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4772, -0.3445, -0.2139],
        [ 0.4772, -0.3445, -0.2139],
        [ 0.4772, -0.3445, -0.2139],
        [ 0.4772, -0.3445, -0.2139],
        [ 0.4772, -0.3445, -0.2139],
        [ 0.4772, -0.3445, -0.2139],
        [ 0.4772, -0.3445, -0.2139],
        [ 0.4772, -0.3445, -0.2139]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


29it [00:55,  1.84s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4785, -0.3432, -0.2167],
        [ 0.4785, -0.3432, -0.2167],
        [ 0.4785, -0.3432, -0.2167],
        [ 0.4785, -0.3432, -0.2167],
        [ 0.4785, -0.3432, -0.2167],
        [ 0.4785, -0.3432, -0.2167],
        [ 0.4785, -0.3432, -0.2167],
        [ 0.4785, -0.3432, -0.2167]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


30it [00:57,  1.83s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4783, -0.3388, -0.2208],
        [ 0.4783, -0.3388, -0.2208],
        [ 0.4783, -0.3388, -0.2208],
        [ 0.4783, -0.3388, -0.2208],
        [ 0.4783, -0.3388, -0.2208],
        [ 0.4783, -0.3388, -0.2208],
        [ 0.4783, -0.3388, -0.2208],
        [ 0.4783, -0.3388, -0.2208]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


31it [00:59,  1.83s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4794, -0.3355, -0.2253],
        [ 0.4794, -0.3355, -0.2253],
        [ 0.4794, -0.3355, -0.2253],
        [ 0.4794, -0.3355, -0.2253],
        [ 0.4794, -0.3355, -0.2253],
        [ 0.4794, -0.3355, -0.2253],
        [ 0.4794, -0.3355, -0.2253],
        [ 0.4794, -0.3355, -0.2253]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


32it [01:01,  1.91s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4796, -0.3323, -0.2286],
        [ 0.4796, -0.3323, -0.2286],
        [ 0.4796, -0.3323, -0.2286],
        [ 0.4796, -0.3323, -0.2286],
        [ 0.4796, -0.3323, -0.2286],
        [ 0.4796, -0.3323, -0.2286],
        [ 0.4796, -0.3323, -0.2286],
        [ 0.4796, -0.3323, -0.2286]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


33it [01:03,  2.01s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4791, -0.3271, -0.2331],
        [ 0.4791, -0.3271, -0.2331],
        [ 0.4791, -0.3271, -0.2331],
        [ 0.4791, -0.3271, -0.2331],
        [ 0.4791, -0.3271, -0.2331],
        [ 0.4791, -0.3271, -0.2331],
        [ 0.4791, -0.3271, -0.2331],
        [ 0.4791, -0.3271, -0.2331]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


34it [01:05,  1.95s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4791, -0.3222, -0.2380],
        [ 0.4791, -0.3222, -0.2380],
        [ 0.4791, -0.3222, -0.2380],
        [ 0.4791, -0.3222, -0.2380],
        [ 0.4791, -0.3222, -0.2380],
        [ 0.4791, -0.3222, -0.2380],
        [ 0.4791, -0.3222, -0.2380],
        [ 0.4791, -0.3222, -0.2380]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


35it [01:07,  1.91s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4772, -0.3170, -0.2408],
        [ 0.4772, -0.3170, -0.2408],
        [ 0.4772, -0.3170, -0.2408],
        [ 0.4772, -0.3170, -0.2408],
        [ 0.4772, -0.3170, -0.2408],
        [ 0.4772, -0.3170, -0.2408],
        [ 0.4772, -0.3170, -0.2408],
        [ 0.4772, -0.3170, -0.2408]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


36it [01:09,  1.87s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4748, -0.3121, -0.2426],
        [ 0.4748, -0.3121, -0.2426],
        [ 0.4748, -0.3121, -0.2426],
        [ 0.4748, -0.3121, -0.2426],
        [ 0.4748, -0.3121, -0.2426],
        [ 0.4748, -0.3121, -0.2426],
        [ 0.4748, -0.3121, -0.2426],
        [ 0.4748, -0.3121, -0.2426]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


37it [01:10,  1.84s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4738, -0.3092, -0.2443],
        [ 0.4738, -0.3092, -0.2443],
        [ 0.4738, -0.3092, -0.2443],
        [ 0.4738, -0.3092, -0.2443],
        [ 0.4738, -0.3092, -0.2443],
        [ 0.4738, -0.3092, -0.2443],
        [ 0.4738, -0.3092, -0.2443],
        [ 0.4738, -0.3092, -0.2443]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


38it [01:12,  1.83s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4721, -0.3049, -0.2465],
        [ 0.4721, -0.3049, -0.2465],
        [ 0.4721, -0.3049, -0.2465],
        [ 0.4721, -0.3049, -0.2465],
        [ 0.4721, -0.3049, -0.2465],
        [ 0.4721, -0.3049, -0.2465],
        [ 0.4721, -0.3049, -0.2465],
        [ 0.4721, -0.3049, -0.2465]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


39it [01:14,  1.89s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4700, -0.3010, -0.2478],
        [ 0.4700, -0.3010, -0.2478],
        [ 0.4700, -0.3010, -0.2478],
        [ 0.4700, -0.3010, -0.2478],
        [ 0.4700, -0.3010, -0.2478],
        [ 0.4700, -0.3010, -0.2478],
        [ 0.4700, -0.3010, -0.2478],
        [ 0.4700, -0.3010, -0.2478]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


40it [01:16,  1.97s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4680, -0.2974, -0.2489],
        [ 0.4680, -0.2974, -0.2489],
        [ 0.4680, -0.2974, -0.2489],
        [ 0.4680, -0.2974, -0.2489],
        [ 0.4680, -0.2974, -0.2489],
        [ 0.4680, -0.2974, -0.2489],
        [ 0.4680, -0.2974, -0.2489],
        [ 0.4680, -0.2974, -0.2489]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


41it [01:18,  2.03s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4642, -0.2904, -0.2515],
        [ 0.4642, -0.2904, -0.2515],
        [ 0.4642, -0.2904, -0.2515],
        [ 0.4642, -0.2904, -0.2515],
        [ 0.4642, -0.2904, -0.2515],
        [ 0.4642, -0.2904, -0.2515],
        [ 0.4642, -0.2904, -0.2515],
        [ 0.4642, -0.2904, -0.2515]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


42it [01:21,  2.09s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4601, -0.2832, -0.2538],
        [ 0.4601, -0.2832, -0.2538],
        [ 0.4601, -0.2832, -0.2538],
        [ 0.4601, -0.2832, -0.2538],
        [ 0.4601, -0.2832, -0.2538],
        [ 0.4601, -0.2832, -0.2538],
        [ 0.4601, -0.2832, -0.2538],
        [ 0.4601, -0.2832, -0.2538]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


43it [01:24,  2.42s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4576, -0.2774, -0.2566],
        [ 0.4576, -0.2774, -0.2566],
        [ 0.4576, -0.2774, -0.2566],
        [ 0.4576, -0.2774, -0.2566],
        [ 0.4576, -0.2774, -0.2566],
        [ 0.4576, -0.2774, -0.2566],
        [ 0.4576, -0.2774, -0.2566],
        [ 0.4576, -0.2774, -0.2566]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


44it [01:27,  2.51s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4548, -0.2730, -0.2576],
        [ 0.4548, -0.2730, -0.2576],
        [ 0.4548, -0.2730, -0.2576],
        [ 0.4548, -0.2730, -0.2576],
        [ 0.4548, -0.2730, -0.2576],
        [ 0.4548, -0.2730, -0.2576],
        [ 0.4548, -0.2730, -0.2576],
        [ 0.4548, -0.2730, -0.2576]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


45it [01:29,  2.59s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4540, -0.2705, -0.2593],
        [ 0.4540, -0.2705, -0.2593],
        [ 0.4540, -0.2705, -0.2593],
        [ 0.4540, -0.2705, -0.2593],
        [ 0.4540, -0.2705, -0.2593],
        [ 0.4540, -0.2705, -0.2593],
        [ 0.4540, -0.2705, -0.2593],
        [ 0.4540, -0.2705, -0.2593]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


46it [01:33,  2.91s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4521, -0.2697, -0.2577],
        [ 0.4521, -0.2697, -0.2577],
        [ 0.4521, -0.2697, -0.2577],
        [ 0.4521, -0.2697, -0.2577],
        [ 0.4521, -0.2697, -0.2577],
        [ 0.4521, -0.2697, -0.2577],
        [ 0.4521, -0.2697, -0.2577],
        [ 0.4521, -0.2697, -0.2577]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


47it [01:35,  2.73s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4509, -0.2690, -0.2571],
        [ 0.4509, -0.2690, -0.2571],
        [ 0.4509, -0.2690, -0.2571],
        [ 0.4509, -0.2690, -0.2571],
        [ 0.4509, -0.2690, -0.2571],
        [ 0.4509, -0.2690, -0.2571],
        [ 0.4509, -0.2690, -0.2571],
        [ 0.4509, -0.2690, -0.2571]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


48it [01:38,  2.57s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4486, -0.2683, -0.2550],
        [ 0.4486, -0.2683, -0.2550],
        [ 0.4486, -0.2683, -0.2550],
        [ 0.4486, -0.2683, -0.2550],
        [ 0.4486, -0.2683, -0.2550],
        [ 0.4486, -0.2683, -0.2550],
        [ 0.4486, -0.2683, -0.2550],
        [ 0.4486, -0.2683, -0.2550]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


49it [01:39,  2.34s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4471, -0.2677, -0.2538],
        [ 0.4471, -0.2677, -0.2538],
        [ 0.4471, -0.2677, -0.2538],
        [ 0.4471, -0.2677, -0.2538],
        [ 0.4471, -0.2677, -0.2538],
        [ 0.4471, -0.2677, -0.2538],
        [ 0.4471, -0.2677, -0.2538],
        [ 0.4471, -0.2677, -0.2538]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


50it [01:41,  2.16s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4445, -0.2686, -0.2499],
        [ 0.4445, -0.2686, -0.2499],
        [ 0.4445, -0.2686, -0.2499],
        [ 0.4445, -0.2686, -0.2499],
        [ 0.4445, -0.2686, -0.2499],
        [ 0.4445, -0.2686, -0.2499],
        [ 0.4445, -0.2686, -0.2499],
        [ 0.4445, -0.2686, -0.2499]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


51it [01:43,  2.04s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4428, -0.2702, -0.2463],
        [ 0.4428, -0.2702, -0.2463],
        [ 0.4428, -0.2702, -0.2463],
        [ 0.4428, -0.2702, -0.2463],
        [ 0.4428, -0.2702, -0.2463],
        [ 0.4428, -0.2702, -0.2463],
        [ 0.4428, -0.2702, -0.2463],
        [ 0.4428, -0.2702, -0.2463]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


52it [01:45,  2.06s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4425, -0.2731, -0.2431],
        [ 0.4425, -0.2731, -0.2431],
        [ 0.4425, -0.2731, -0.2431],
        [ 0.4425, -0.2731, -0.2431],
        [ 0.4425, -0.2731, -0.2431],
        [ 0.4425, -0.2731, -0.2431],
        [ 0.4425, -0.2731, -0.2431],
        [ 0.4425, -0.2731, -0.2431]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


53it [01:47,  2.12s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4422, -0.2750, -0.2409],
        [ 0.4422, -0.2750, -0.2409],
        [ 0.4422, -0.2750, -0.2409],
        [ 0.4422, -0.2750, -0.2409],
        [ 0.4422, -0.2750, -0.2409],
        [ 0.4422, -0.2750, -0.2409],
        [ 0.4422, -0.2750, -0.2409],
        [ 0.4422, -0.2750, -0.2409]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


54it [01:49,  2.04s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4432, -0.2781, -0.2390],
        [ 0.4432, -0.2781, -0.2390],
        [ 0.4432, -0.2781, -0.2390],
        [ 0.4432, -0.2781, -0.2390],
        [ 0.4432, -0.2781, -0.2390],
        [ 0.4432, -0.2781, -0.2390],
        [ 0.4432, -0.2781, -0.2390],
        [ 0.4432, -0.2781, -0.2390]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


55it [01:51,  1.97s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4460, -0.2824, -0.2381],
        [ 0.4460, -0.2824, -0.2381],
        [ 0.4460, -0.2824, -0.2381],
        [ 0.4460, -0.2824, -0.2381],
        [ 0.4460, -0.2824, -0.2381],
        [ 0.4460, -0.2824, -0.2381],
        [ 0.4460, -0.2824, -0.2381],
        [ 0.4460, -0.2824, -0.2381]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


56it [01:53,  1.92s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4478, -0.2855, -0.2373],
        [ 0.4478, -0.2855, -0.2373],
        [ 0.4478, -0.2855, -0.2373],
        [ 0.4478, -0.2855, -0.2373],
        [ 0.4478, -0.2855, -0.2373],
        [ 0.4478, -0.2855, -0.2373],
        [ 0.4478, -0.2855, -0.2373],
        [ 0.4478, -0.2855, -0.2373]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


57it [01:54,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4489, -0.2882, -0.2358],
        [ 0.4489, -0.2882, -0.2358],
        [ 0.4489, -0.2882, -0.2358],
        [ 0.4489, -0.2882, -0.2358],
        [ 0.4489, -0.2882, -0.2358],
        [ 0.4489, -0.2882, -0.2358],
        [ 0.4489, -0.2882, -0.2358],
        [ 0.4489, -0.2882, -0.2358]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


58it [01:56,  1.85s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4510, -0.2921, -0.2345],
        [ 0.4510, -0.2921, -0.2345],
        [ 0.4510, -0.2921, -0.2345],
        [ 0.4510, -0.2921, -0.2345],
        [ 0.4510, -0.2921, -0.2345],
        [ 0.4510, -0.2921, -0.2345],
        [ 0.4510, -0.2921, -0.2345],
        [ 0.4510, -0.2921, -0.2345]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


59it [01:58,  1.83s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4523, -0.2940, -0.2341],
        [ 0.4523, -0.2940, -0.2341],
        [ 0.4523, -0.2940, -0.2341],
        [ 0.4523, -0.2940, -0.2341],
        [ 0.4523, -0.2940, -0.2341],
        [ 0.4523, -0.2940, -0.2341],
        [ 0.4523, -0.2940, -0.2341],
        [ 0.4523, -0.2940, -0.2341]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


60it [02:00,  1.89s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4535, -0.2965, -0.2330],
        [ 0.4535, -0.2965, -0.2330],
        [ 0.4535, -0.2965, -0.2330],
        [ 0.4535, -0.2965, -0.2330],
        [ 0.4535, -0.2965, -0.2330],
        [ 0.4535, -0.2965, -0.2330],
        [ 0.4535, -0.2965, -0.2330],
        [ 0.4535, -0.2965, -0.2330]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


61it [02:02,  1.99s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4525, -0.2971, -0.2313],
        [ 0.4525, -0.2971, -0.2313],
        [ 0.4525, -0.2971, -0.2313],
        [ 0.4525, -0.2971, -0.2313],
        [ 0.4525, -0.2971, -0.2313],
        [ 0.4525, -0.2971, -0.2313],
        [ 0.4525, -0.2971, -0.2313],
        [ 0.4525, -0.2971, -0.2313]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


62it [02:04,  1.93s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4523, -0.2991, -0.2291],
        [ 0.4523, -0.2991, -0.2291],
        [ 0.4523, -0.2991, -0.2291],
        [ 0.4523, -0.2991, -0.2291],
        [ 0.4523, -0.2991, -0.2291],
        [ 0.4523, -0.2991, -0.2291],
        [ 0.4523, -0.2991, -0.2291],
        [ 0.4523, -0.2991, -0.2291]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


63it [02:06,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4515, -0.2993, -0.2279],
        [ 0.4515, -0.2993, -0.2279],
        [ 0.4515, -0.2993, -0.2279],
        [ 0.4515, -0.2993, -0.2279],
        [ 0.4515, -0.2993, -0.2279],
        [ 0.4515, -0.2993, -0.2279],
        [ 0.4515, -0.2993, -0.2279],
        [ 0.4515, -0.2993, -0.2279]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


64it [02:08,  1.86s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4513, -0.3002, -0.2268],
        [ 0.4513, -0.3002, -0.2268],
        [ 0.4513, -0.3002, -0.2268],
        [ 0.4513, -0.3002, -0.2268],
        [ 0.4513, -0.3002, -0.2268],
        [ 0.4513, -0.3002, -0.2268],
        [ 0.4513, -0.3002, -0.2268],
        [ 0.4513, -0.3002, -0.2268]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


65it [02:09,  1.84s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4524, -0.3016, -0.2266],
        [ 0.4524, -0.3016, -0.2266],
        [ 0.4524, -0.3016, -0.2266],
        [ 0.4524, -0.3016, -0.2266],
        [ 0.4524, -0.3016, -0.2266],
        [ 0.4524, -0.3016, -0.2266],
        [ 0.4524, -0.3016, -0.2266],
        [ 0.4524, -0.3016, -0.2266]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


66it [02:11,  1.82s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4546, -0.3036, -0.2272],
        [ 0.4546, -0.3036, -0.2272],
        [ 0.4546, -0.3036, -0.2272],
        [ 0.4546, -0.3036, -0.2272],
        [ 0.4546, -0.3036, -0.2272],
        [ 0.4546, -0.3036, -0.2272],
        [ 0.4546, -0.3036, -0.2272],
        [ 0.4546, -0.3036, -0.2272]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


67it [02:13,  1.86s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4566, -0.3046, -0.2286],
        [ 0.4566, -0.3046, -0.2286],
        [ 0.4566, -0.3046, -0.2286],
        [ 0.4566, -0.3046, -0.2286],
        [ 0.4566, -0.3046, -0.2286],
        [ 0.4566, -0.3046, -0.2286],
        [ 0.4566, -0.3046, -0.2286],
        [ 0.4566, -0.3046, -0.2286]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


68it [02:15,  1.93s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4589, -0.3062, -0.2299],
        [ 0.4589, -0.3062, -0.2299],
        [ 0.4589, -0.3062, -0.2299],
        [ 0.4589, -0.3062, -0.2299],
        [ 0.4589, -0.3062, -0.2299],
        [ 0.4589, -0.3062, -0.2299],
        [ 0.4589, -0.3062, -0.2299],
        [ 0.4589, -0.3062, -0.2299]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


69it [02:17,  2.01s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4617, -0.3075, -0.2318],
        [ 0.4617, -0.3075, -0.2318],
        [ 0.4617, -0.3075, -0.2318],
        [ 0.4617, -0.3075, -0.2318],
        [ 0.4617, -0.3075, -0.2318],
        [ 0.4617, -0.3075, -0.2318],
        [ 0.4617, -0.3075, -0.2318],
        [ 0.4617, -0.3075, -0.2318]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


70it [02:19,  1.95s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4641, -0.3094, -0.2328],
        [ 0.4641, -0.3094, -0.2328],
        [ 0.4641, -0.3094, -0.2328],
        [ 0.4641, -0.3094, -0.2328],
        [ 0.4641, -0.3094, -0.2328],
        [ 0.4641, -0.3094, -0.2328],
        [ 0.4641, -0.3094, -0.2328],
        [ 0.4641, -0.3094, -0.2328]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


71it [02:21,  1.89s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4681, -0.3117, -0.2352],
        [ 0.4681, -0.3117, -0.2352],
        [ 0.4681, -0.3117, -0.2352],
        [ 0.4681, -0.3117, -0.2352],
        [ 0.4681, -0.3117, -0.2352],
        [ 0.4681, -0.3117, -0.2352],
        [ 0.4681, -0.3117, -0.2352],
        [ 0.4681, -0.3117, -0.2352]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


72it [02:23,  1.86s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4710, -0.3152, -0.2352],
        [ 0.4710, -0.3152, -0.2352],
        [ 0.4710, -0.3152, -0.2352],
        [ 0.4710, -0.3152, -0.2352],
        [ 0.4710, -0.3152, -0.2352],
        [ 0.4710, -0.3152, -0.2352],
        [ 0.4710, -0.3152, -0.2352],
        [ 0.4710, -0.3152, -0.2352]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


73it [02:25,  1.85s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4723, -0.3168, -0.2352],
        [ 0.4723, -0.3168, -0.2352],
        [ 0.4723, -0.3168, -0.2352],
        [ 0.4723, -0.3168, -0.2352],
        [ 0.4723, -0.3168, -0.2352],
        [ 0.4723, -0.3168, -0.2352],
        [ 0.4723, -0.3168, -0.2352],
        [ 0.4723, -0.3168, -0.2352]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


74it [02:26,  1.84s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4740, -0.3188, -0.2352],
        [ 0.4740, -0.3188, -0.2352],
        [ 0.4740, -0.3188, -0.2352],
        [ 0.4740, -0.3188, -0.2352],
        [ 0.4740, -0.3188, -0.2352],
        [ 0.4740, -0.3188, -0.2352],
        [ 0.4740, -0.3188, -0.2352],
        [ 0.4740, -0.3188, -0.2352]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


75it [02:28,  1.85s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4755, -0.3213, -0.2344],
        [ 0.4755, -0.3213, -0.2344],
        [ 0.4755, -0.3213, -0.2344],
        [ 0.4755, -0.3213, -0.2344],
        [ 0.4755, -0.3213, -0.2344],
        [ 0.4755, -0.3213, -0.2344],
        [ 0.4755, -0.3213, -0.2344],
        [ 0.4755, -0.3213, -0.2344]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


76it [02:30,  1.91s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4774, -0.3235, -0.2345],
        [ 0.4774, -0.3235, -0.2345],
        [ 0.4774, -0.3235, -0.2345],
        [ 0.4774, -0.3235, -0.2345],
        [ 0.4774, -0.3235, -0.2345],
        [ 0.4774, -0.3235, -0.2345],
        [ 0.4774, -0.3235, -0.2345],
        [ 0.4774, -0.3235, -0.2345]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


77it [02:33,  2.01s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4797, -0.3253, -0.2354],
        [ 0.4797, -0.3253, -0.2354],
        [ 0.4797, -0.3253, -0.2354],
        [ 0.4797, -0.3253, -0.2354],
        [ 0.4797, -0.3253, -0.2354],
        [ 0.4797, -0.3253, -0.2354],
        [ 0.4797, -0.3253, -0.2354],
        [ 0.4797, -0.3253, -0.2354]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


78it [02:34,  1.99s/it]


Train Loss at epoch 4: 1.0276762247085571



1it [00:00,  1.60it/s]

torch.Size([8, 20, 1024])


2it [00:01,  1.48it/s]

torch.Size([8, 20, 1024])


3it [00:02,  1.47it/s]

torch.Size([8, 20, 1024])


4it [00:02,  1.47it/s]

torch.Size([8, 20, 1024])


5it [00:03,  1.46it/s]

torch.Size([8, 20, 1024])


6it [00:04,  1.46it/s]

torch.Size([8, 20, 1024])


7it [00:04,  1.44it/s]

torch.Size([8, 20, 1024])


8it [00:05,  1.49it/s]


torch.Size([1, 20, 1024])
test Loss at epoch 4: 1.190306544303894



0it [00:00, ?it/s]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4803, -0.3276, -0.2339],
        [ 0.4803, -0.3276, -0.2339],
        [ 0.4803, -0.3276, -0.2339],
        [ 0.4803, -0.3276, -0.2339],
        [ 0.4803, -0.3276, -0.2339],
        [ 0.4803, -0.3276, -0.2339],
        [ 0.4803, -0.3276, -0.2339],
        [ 0.4803, -0.3276, -0.2339]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


1it [00:01,  1.86s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4821, -0.3295, -0.2341],
        [ 0.4821, -0.3295, -0.2341],
        [ 0.4821, -0.3295, -0.2341],
        [ 0.4821, -0.3295, -0.2341],
        [ 0.4821, -0.3295, -0.2341],
        [ 0.4821, -0.3295, -0.2341],
        [ 0.4821, -0.3295, -0.2341],
        [ 0.4821, -0.3295, -0.2341]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


2it [00:03,  1.96s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4830, -0.3304, -0.2344],
        [ 0.4830, -0.3304, -0.2344],
        [ 0.4830, -0.3304, -0.2344],
        [ 0.4830, -0.3304, -0.2344],
        [ 0.4830, -0.3304, -0.2344],
        [ 0.4830, -0.3304, -0.2344],
        [ 0.4830, -0.3304, -0.2344],
        [ 0.4830, -0.3304, -0.2344]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


3it [00:06,  2.08s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4831, -0.3296, -0.2353],
        [ 0.4831, -0.3296, -0.2353],
        [ 0.4831, -0.3296, -0.2353],
        [ 0.4831, -0.3296, -0.2353],
        [ 0.4831, -0.3296, -0.2353],
        [ 0.4831, -0.3296, -0.2353],
        [ 0.4831, -0.3296, -0.2353],
        [ 0.4831, -0.3296, -0.2353]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


4it [00:08,  2.06s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4825, -0.3279, -0.2362],
        [ 0.4825, -0.3279, -0.2362],
        [ 0.4825, -0.3279, -0.2362],
        [ 0.4825, -0.3279, -0.2362],
        [ 0.4825, -0.3279, -0.2362],
        [ 0.4825, -0.3279, -0.2362],
        [ 0.4825, -0.3279, -0.2362],
        [ 0.4825, -0.3279, -0.2362]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


5it [00:09,  1.96s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4824, -0.3263, -0.2377],
        [ 0.4824, -0.3263, -0.2377],
        [ 0.4824, -0.3263, -0.2377],
        [ 0.4824, -0.3263, -0.2377],
        [ 0.4824, -0.3263, -0.2377],
        [ 0.4824, -0.3263, -0.2377],
        [ 0.4824, -0.3263, -0.2377],
        [ 0.4824, -0.3263, -0.2377]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


6it [00:11,  1.91s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4810, -0.3247, -0.2376],
        [ 0.4810, -0.3247, -0.2376],
        [ 0.4810, -0.3247, -0.2376],
        [ 0.4810, -0.3247, -0.2376],
        [ 0.4810, -0.3247, -0.2376],
        [ 0.4810, -0.3247, -0.2376],
        [ 0.4810, -0.3247, -0.2376],
        [ 0.4810, -0.3247, -0.2376]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


7it [00:13,  1.87s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4784, -0.3217, -0.2375],
        [ 0.4784, -0.3217, -0.2375],
        [ 0.4784, -0.3217, -0.2375],
        [ 0.4784, -0.3217, -0.2375],
        [ 0.4784, -0.3217, -0.2375],
        [ 0.4784, -0.3217, -0.2375],
        [ 0.4784, -0.3217, -0.2375],
        [ 0.4784, -0.3217, -0.2375]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


8it [00:16,  2.08s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4760, -0.3196, -0.2367],
        [ 0.4760, -0.3196, -0.2367],
        [ 0.4760, -0.3196, -0.2367],
        [ 0.4760, -0.3196, -0.2367],
        [ 0.4760, -0.3196, -0.2367],
        [ 0.4760, -0.3196, -0.2367],
        [ 0.4760, -0.3196, -0.2367],
        [ 0.4760, -0.3196, -0.2367]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


9it [00:18,  2.18s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4737, -0.3191, -0.2345],
        [ 0.4737, -0.3191, -0.2345],
        [ 0.4737, -0.3191, -0.2345],
        [ 0.4737, -0.3191, -0.2345],
        [ 0.4737, -0.3191, -0.2345],
        [ 0.4737, -0.3191, -0.2345],
        [ 0.4737, -0.3191, -0.2345],
        [ 0.4737, -0.3191, -0.2345]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


10it [00:20,  2.17s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4709, -0.3186, -0.2318],
        [ 0.4709, -0.3186, -0.2318],
        [ 0.4709, -0.3186, -0.2318],
        [ 0.4709, -0.3186, -0.2318],
        [ 0.4709, -0.3186, -0.2318],
        [ 0.4709, -0.3186, -0.2318],
        [ 0.4709, -0.3186, -0.2318],
        [ 0.4709, -0.3186, -0.2318]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


11it [00:22,  2.15s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4690, -0.3195, -0.2286],
        [ 0.4690, -0.3195, -0.2286],
        [ 0.4690, -0.3195, -0.2286],
        [ 0.4690, -0.3195, -0.2286],
        [ 0.4690, -0.3195, -0.2286],
        [ 0.4690, -0.3195, -0.2286],
        [ 0.4690, -0.3195, -0.2286],
        [ 0.4690, -0.3195, -0.2286]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


12it [00:24,  2.04s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4679, -0.3210, -0.2258],
        [ 0.4679, -0.3210, -0.2258],
        [ 0.4679, -0.3210, -0.2258],
        [ 0.4679, -0.3210, -0.2258],
        [ 0.4679, -0.3210, -0.2258],
        [ 0.4679, -0.3210, -0.2258],
        [ 0.4679, -0.3210, -0.2258],
        [ 0.4679, -0.3210, -0.2258]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


13it [00:26,  1.96s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4661, -0.3222, -0.2225],
        [ 0.4661, -0.3222, -0.2225],
        [ 0.4661, -0.3222, -0.2225],
        [ 0.4661, -0.3222, -0.2225],
        [ 0.4661, -0.3222, -0.2225],
        [ 0.4661, -0.3222, -0.2225],
        [ 0.4661, -0.3222, -0.2225],
        [ 0.4661, -0.3222, -0.2225]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


14it [00:28,  1.90s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4645, -0.3233, -0.2196],
        [ 0.4645, -0.3233, -0.2196],
        [ 0.4645, -0.3233, -0.2196],
        [ 0.4645, -0.3233, -0.2196],
        [ 0.4645, -0.3233, -0.2196],
        [ 0.4645, -0.3233, -0.2196],
        [ 0.4645, -0.3233, -0.2196],
        [ 0.4645, -0.3233, -0.2196]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


15it [00:29,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4649, -0.3248, -0.2185],
        [ 0.4649, -0.3248, -0.2185],
        [ 0.4649, -0.3248, -0.2185],
        [ 0.4649, -0.3248, -0.2185],
        [ 0.4649, -0.3248, -0.2185],
        [ 0.4649, -0.3248, -0.2185],
        [ 0.4649, -0.3248, -0.2185],
        [ 0.4649, -0.3248, -0.2185]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


16it [00:31,  1.85s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4653, -0.3254, -0.2184],
        [ 0.4653, -0.3254, -0.2184],
        [ 0.4653, -0.3254, -0.2184],
        [ 0.4653, -0.3254, -0.2184],
        [ 0.4653, -0.3254, -0.2184],
        [ 0.4653, -0.3254, -0.2184],
        [ 0.4653, -0.3254, -0.2184],
        [ 0.4653, -0.3254, -0.2184]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


17it [00:33,  1.91s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4648, -0.3257, -0.2175],
        [ 0.4648, -0.3257, -0.2175],
        [ 0.4648, -0.3257, -0.2175],
        [ 0.4648, -0.3257, -0.2175],
        [ 0.4648, -0.3257, -0.2175],
        [ 0.4648, -0.3257, -0.2175],
        [ 0.4648, -0.3257, -0.2175],
        [ 0.4648, -0.3257, -0.2175]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


18it [00:35,  2.00s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4644, -0.3260, -0.2168],
        [ 0.4644, -0.3260, -0.2168],
        [ 0.4644, -0.3260, -0.2168],
        [ 0.4644, -0.3260, -0.2168],
        [ 0.4644, -0.3260, -0.2168],
        [ 0.4644, -0.3260, -0.2168],
        [ 0.4644, -0.3260, -0.2168],
        [ 0.4644, -0.3260, -0.2168]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


19it [00:37,  1.97s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4646, -0.3268, -0.2162],
        [ 0.4646, -0.3268, -0.2162],
        [ 0.4646, -0.3268, -0.2162],
        [ 0.4646, -0.3268, -0.2162],
        [ 0.4646, -0.3268, -0.2162],
        [ 0.4646, -0.3268, -0.2162],
        [ 0.4646, -0.3268, -0.2162],
        [ 0.4646, -0.3268, -0.2162]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


20it [00:39,  1.91s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4654, -0.3282, -0.2157],
        [ 0.4654, -0.3282, -0.2157],
        [ 0.4654, -0.3282, -0.2157],
        [ 0.4654, -0.3282, -0.2157],
        [ 0.4654, -0.3282, -0.2157],
        [ 0.4654, -0.3282, -0.2157],
        [ 0.4654, -0.3282, -0.2157],
        [ 0.4654, -0.3282, -0.2157]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


21it [00:41,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4680, -0.3301, -0.2169],
        [ 0.4680, -0.3301, -0.2169],
        [ 0.4680, -0.3301, -0.2169],
        [ 0.4680, -0.3301, -0.2169],
        [ 0.4680, -0.3301, -0.2169],
        [ 0.4680, -0.3301, -0.2169],
        [ 0.4680, -0.3301, -0.2169],
        [ 0.4680, -0.3301, -0.2169]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


22it [00:43,  1.85s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4715, -0.3325, -0.2187],
        [ 0.4715, -0.3325, -0.2187],
        [ 0.4715, -0.3325, -0.2187],
        [ 0.4715, -0.3325, -0.2187],
        [ 0.4715, -0.3325, -0.2187],
        [ 0.4715, -0.3325, -0.2187],
        [ 0.4715, -0.3325, -0.2187],
        [ 0.4715, -0.3325, -0.2187]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


23it [00:44,  1.83s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4746, -0.3353, -0.2196],
        [ 0.4746, -0.3353, -0.2196],
        [ 0.4746, -0.3353, -0.2196],
        [ 0.4746, -0.3353, -0.2196],
        [ 0.4746, -0.3353, -0.2196],
        [ 0.4746, -0.3353, -0.2196],
        [ 0.4746, -0.3353, -0.2196],
        [ 0.4746, -0.3353, -0.2196]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


24it [00:46,  1.82s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4780, -0.3385, -0.2205],
        [ 0.4780, -0.3385, -0.2205],
        [ 0.4780, -0.3385, -0.2205],
        [ 0.4780, -0.3385, -0.2205],
        [ 0.4780, -0.3385, -0.2205],
        [ 0.4780, -0.3385, -0.2205],
        [ 0.4780, -0.3385, -0.2205],
        [ 0.4780, -0.3385, -0.2205]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


25it [00:48,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4815, -0.3412, -0.2221],
        [ 0.4815, -0.3412, -0.2221],
        [ 0.4815, -0.3412, -0.2221],
        [ 0.4815, -0.3412, -0.2221],
        [ 0.4815, -0.3412, -0.2221],
        [ 0.4815, -0.3412, -0.2221],
        [ 0.4815, -0.3412, -0.2221],
        [ 0.4815, -0.3412, -0.2221]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


26it [00:50,  1.97s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4860, -0.3450, -0.2235],
        [ 0.4860, -0.3450, -0.2235],
        [ 0.4860, -0.3450, -0.2235],
        [ 0.4860, -0.3450, -0.2235],
        [ 0.4860, -0.3450, -0.2235],
        [ 0.4860, -0.3450, -0.2235],
        [ 0.4860, -0.3450, -0.2235],
        [ 0.4860, -0.3450, -0.2235]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


27it [00:52,  1.92s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4892, -0.3483, -0.2241],
        [ 0.4892, -0.3483, -0.2241],
        [ 0.4892, -0.3483, -0.2241],
        [ 0.4892, -0.3483, -0.2241],
        [ 0.4892, -0.3483, -0.2241],
        [ 0.4892, -0.3483, -0.2241],
        [ 0.4892, -0.3483, -0.2241],
        [ 0.4892, -0.3483, -0.2241]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


28it [00:54,  1.89s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4907, -0.3503, -0.2239],
        [ 0.4907, -0.3503, -0.2239],
        [ 0.4907, -0.3503, -0.2239],
        [ 0.4907, -0.3503, -0.2239],
        [ 0.4907, -0.3503, -0.2239],
        [ 0.4907, -0.3503, -0.2239],
        [ 0.4907, -0.3503, -0.2239],
        [ 0.4907, -0.3503, -0.2239]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


29it [00:56,  1.87s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4920, -0.3520, -0.2238],
        [ 0.4920, -0.3520, -0.2238],
        [ 0.4920, -0.3520, -0.2238],
        [ 0.4920, -0.3520, -0.2238],
        [ 0.4920, -0.3520, -0.2238],
        [ 0.4920, -0.3520, -0.2238],
        [ 0.4920, -0.3520, -0.2238],
        [ 0.4920, -0.3520, -0.2238]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


30it [00:58,  1.90s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4931, -0.3534, -0.2237],
        [ 0.4931, -0.3534, -0.2237],
        [ 0.4931, -0.3534, -0.2237],
        [ 0.4931, -0.3534, -0.2237],
        [ 0.4931, -0.3534, -0.2237],
        [ 0.4931, -0.3534, -0.2237],
        [ 0.4931, -0.3534, -0.2237],
        [ 0.4931, -0.3534, -0.2237]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


31it [01:01,  2.14s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4945, -0.3552, -0.2236],
        [ 0.4945, -0.3552, -0.2236],
        [ 0.4945, -0.3552, -0.2236],
        [ 0.4945, -0.3552, -0.2236],
        [ 0.4945, -0.3552, -0.2236],
        [ 0.4945, -0.3552, -0.2236],
        [ 0.4945, -0.3552, -0.2236],
        [ 0.4945, -0.3552, -0.2236]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


32it [01:04,  2.58s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4951, -0.3551, -0.2243],
        [ 0.4951, -0.3551, -0.2243],
        [ 0.4951, -0.3551, -0.2243],
        [ 0.4951, -0.3551, -0.2243],
        [ 0.4951, -0.3551, -0.2243],
        [ 0.4951, -0.3551, -0.2243],
        [ 0.4951, -0.3551, -0.2243],
        [ 0.4951, -0.3551, -0.2243]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


33it [01:06,  2.50s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4961, -0.3557, -0.2250],
        [ 0.4961, -0.3557, -0.2250],
        [ 0.4961, -0.3557, -0.2250],
        [ 0.4961, -0.3557, -0.2250],
        [ 0.4961, -0.3557, -0.2250],
        [ 0.4961, -0.3557, -0.2250],
        [ 0.4961, -0.3557, -0.2250],
        [ 0.4961, -0.3557, -0.2250]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


34it [01:08,  2.29s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4970, -0.3553, -0.2264],
        [ 0.4970, -0.3553, -0.2264],
        [ 0.4970, -0.3553, -0.2264],
        [ 0.4970, -0.3553, -0.2264],
        [ 0.4970, -0.3553, -0.2264],
        [ 0.4970, -0.3553, -0.2264],
        [ 0.4970, -0.3553, -0.2264],
        [ 0.4970, -0.3553, -0.2264]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


35it [01:10,  2.13s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4989, -0.3547, -0.2292],
        [ 0.4989, -0.3547, -0.2292],
        [ 0.4989, -0.3547, -0.2292],
        [ 0.4989, -0.3547, -0.2292],
        [ 0.4989, -0.3547, -0.2292],
        [ 0.4989, -0.3547, -0.2292],
        [ 0.4989, -0.3547, -0.2292],
        [ 0.4989, -0.3547, -0.2292]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


36it [01:12,  2.02s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5018, -0.3548, -0.2326],
        [ 0.5018, -0.3548, -0.2326],
        [ 0.5018, -0.3548, -0.2326],
        [ 0.5018, -0.3548, -0.2326],
        [ 0.5018, -0.3548, -0.2326],
        [ 0.5018, -0.3548, -0.2326],
        [ 0.5018, -0.3548, -0.2326],
        [ 0.5018, -0.3548, -0.2326]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


37it [01:14,  1.95s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5042, -0.3547, -0.2356],
        [ 0.5042, -0.3547, -0.2356],
        [ 0.5042, -0.3547, -0.2356],
        [ 0.5042, -0.3547, -0.2356],
        [ 0.5042, -0.3547, -0.2356],
        [ 0.5042, -0.3547, -0.2356],
        [ 0.5042, -0.3547, -0.2356],
        [ 0.5042, -0.3547, -0.2356]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


38it [01:15,  1.91s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5076, -0.3553, -0.2390],
        [ 0.5076, -0.3553, -0.2390],
        [ 0.5076, -0.3553, -0.2390],
        [ 0.5076, -0.3553, -0.2390],
        [ 0.5076, -0.3553, -0.2390],
        [ 0.5076, -0.3553, -0.2390],
        [ 0.5076, -0.3553, -0.2390],
        [ 0.5076, -0.3553, -0.2390]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


39it [01:17,  1.91s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5124, -0.3571, -0.2429],
        [ 0.5124, -0.3571, -0.2429],
        [ 0.5124, -0.3571, -0.2429],
        [ 0.5124, -0.3571, -0.2429],
        [ 0.5124, -0.3571, -0.2429],
        [ 0.5124, -0.3571, -0.2429],
        [ 0.5124, -0.3571, -0.2429],
        [ 0.5124, -0.3571, -0.2429]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


40it [01:19,  1.96s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5153, -0.3594, -0.2441],
        [ 0.5153, -0.3594, -0.2441],
        [ 0.5153, -0.3594, -0.2441],
        [ 0.5153, -0.3594, -0.2441],
        [ 0.5153, -0.3594, -0.2441],
        [ 0.5153, -0.3594, -0.2441],
        [ 0.5153, -0.3594, -0.2441],
        [ 0.5153, -0.3594, -0.2441]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


41it [01:22,  2.03s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5184, -0.3613, -0.2459],
        [ 0.5184, -0.3613, -0.2459],
        [ 0.5184, -0.3613, -0.2459],
        [ 0.5184, -0.3613, -0.2459],
        [ 0.5184, -0.3613, -0.2459],
        [ 0.5184, -0.3613, -0.2459],
        [ 0.5184, -0.3613, -0.2459],
        [ 0.5184, -0.3613, -0.2459]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


42it [01:23,  1.96s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5204, -0.3635, -0.2460],
        [ 0.5204, -0.3635, -0.2460],
        [ 0.5204, -0.3635, -0.2460],
        [ 0.5204, -0.3635, -0.2460],
        [ 0.5204, -0.3635, -0.2460],
        [ 0.5204, -0.3635, -0.2460],
        [ 0.5204, -0.3635, -0.2460],
        [ 0.5204, -0.3635, -0.2460]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


43it [01:25,  1.91s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5207, -0.3646, -0.2454],
        [ 0.5207, -0.3646, -0.2454],
        [ 0.5207, -0.3646, -0.2454],
        [ 0.5207, -0.3646, -0.2454],
        [ 0.5207, -0.3646, -0.2454],
        [ 0.5207, -0.3646, -0.2454],
        [ 0.5207, -0.3646, -0.2454],
        [ 0.5207, -0.3646, -0.2454]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


44it [01:27,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5215, -0.3662, -0.2447],
        [ 0.5215, -0.3662, -0.2447],
        [ 0.5215, -0.3662, -0.2447],
        [ 0.5215, -0.3662, -0.2447],
        [ 0.5215, -0.3662, -0.2447],
        [ 0.5215, -0.3662, -0.2447],
        [ 0.5215, -0.3662, -0.2447],
        [ 0.5215, -0.3662, -0.2447]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


45it [01:29,  1.85s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5213, -0.3690, -0.2419],
        [ 0.5213, -0.3690, -0.2419],
        [ 0.5213, -0.3690, -0.2419],
        [ 0.5213, -0.3690, -0.2419],
        [ 0.5213, -0.3690, -0.2419],
        [ 0.5213, -0.3690, -0.2419],
        [ 0.5213, -0.3690, -0.2419],
        [ 0.5213, -0.3690, -0.2419]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


46it [01:31,  1.83s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5217, -0.3713, -0.2401],
        [ 0.5217, -0.3713, -0.2401],
        [ 0.5217, -0.3713, -0.2401],
        [ 0.5217, -0.3713, -0.2401],
        [ 0.5217, -0.3713, -0.2401],
        [ 0.5217, -0.3713, -0.2401],
        [ 0.5217, -0.3713, -0.2401],
        [ 0.5217, -0.3713, -0.2401]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


47it [01:33,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5219, -0.3732, -0.2384],
        [ 0.5219, -0.3732, -0.2384],
        [ 0.5219, -0.3732, -0.2384],
        [ 0.5219, -0.3732, -0.2384],
        [ 0.5219, -0.3732, -0.2384],
        [ 0.5219, -0.3732, -0.2384],
        [ 0.5219, -0.3732, -0.2384],
        [ 0.5219, -0.3732, -0.2384]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


48it [01:35,  1.99s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5218, -0.3754, -0.2362],
        [ 0.5218, -0.3754, -0.2362],
        [ 0.5218, -0.3754, -0.2362],
        [ 0.5218, -0.3754, -0.2362],
        [ 0.5218, -0.3754, -0.2362],
        [ 0.5218, -0.3754, -0.2362],
        [ 0.5218, -0.3754, -0.2362],
        [ 0.5218, -0.3754, -0.2362]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


49it [01:37,  1.97s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5229, -0.3773, -0.2357],
        [ 0.5229, -0.3773, -0.2357],
        [ 0.5229, -0.3773, -0.2357],
        [ 0.5229, -0.3773, -0.2357],
        [ 0.5229, -0.3773, -0.2357],
        [ 0.5229, -0.3773, -0.2357],
        [ 0.5229, -0.3773, -0.2357],
        [ 0.5229, -0.3773, -0.2357]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


50it [01:39,  1.92s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5237, -0.3795, -0.2345],
        [ 0.5237, -0.3795, -0.2345],
        [ 0.5237, -0.3795, -0.2345],
        [ 0.5237, -0.3795, -0.2345],
        [ 0.5237, -0.3795, -0.2345],
        [ 0.5237, -0.3795, -0.2345],
        [ 0.5237, -0.3795, -0.2345],
        [ 0.5237, -0.3795, -0.2345]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


51it [01:40,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5256, -0.3829, -0.2334],
        [ 0.5256, -0.3829, -0.2334],
        [ 0.5256, -0.3829, -0.2334],
        [ 0.5256, -0.3829, -0.2334],
        [ 0.5256, -0.3829, -0.2334],
        [ 0.5256, -0.3829, -0.2334],
        [ 0.5256, -0.3829, -0.2334],
        [ 0.5256, -0.3829, -0.2334]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


52it [01:42,  1.85s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5271, -0.3841, -0.2339],
        [ 0.5271, -0.3841, -0.2339],
        [ 0.5271, -0.3841, -0.2339],
        [ 0.5271, -0.3841, -0.2339],
        [ 0.5271, -0.3841, -0.2339],
        [ 0.5271, -0.3841, -0.2339],
        [ 0.5271, -0.3841, -0.2339],
        [ 0.5271, -0.3841, -0.2339]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


53it [01:44,  1.83s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5270, -0.3827, -0.2351],
        [ 0.5270, -0.3827, -0.2351],
        [ 0.5270, -0.3827, -0.2351],
        [ 0.5270, -0.3827, -0.2351],
        [ 0.5270, -0.3827, -0.2351],
        [ 0.5270, -0.3827, -0.2351],
        [ 0.5270, -0.3827, -0.2351],
        [ 0.5270, -0.3827, -0.2351]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


54it [01:46,  1.81s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5273, -0.3812, -0.2370],
        [ 0.5273, -0.3812, -0.2370],
        [ 0.5273, -0.3812, -0.2370],
        [ 0.5273, -0.3812, -0.2370],
        [ 0.5273, -0.3812, -0.2370],
        [ 0.5273, -0.3812, -0.2370],
        [ 0.5273, -0.3812, -0.2370],
        [ 0.5273, -0.3812, -0.2370]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


55it [01:48,  1.89s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5261, -0.3796, -0.2372],
        [ 0.5261, -0.3796, -0.2372],
        [ 0.5261, -0.3796, -0.2372],
        [ 0.5261, -0.3796, -0.2372],
        [ 0.5261, -0.3796, -0.2372],
        [ 0.5261, -0.3796, -0.2372],
        [ 0.5261, -0.3796, -0.2372],
        [ 0.5261, -0.3796, -0.2372]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


56it [01:50,  1.98s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5256, -0.3780, -0.2381],
        [ 0.5256, -0.3780, -0.2381],
        [ 0.5256, -0.3780, -0.2381],
        [ 0.5256, -0.3780, -0.2381],
        [ 0.5256, -0.3780, -0.2381],
        [ 0.5256, -0.3780, -0.2381],
        [ 0.5256, -0.3780, -0.2381],
        [ 0.5256, -0.3780, -0.2381]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


57it [01:52,  1.92s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5242, -0.3771, -0.2374],
        [ 0.5242, -0.3771, -0.2374],
        [ 0.5242, -0.3771, -0.2374],
        [ 0.5242, -0.3771, -0.2374],
        [ 0.5242, -0.3771, -0.2374],
        [ 0.5242, -0.3771, -0.2374],
        [ 0.5242, -0.3771, -0.2374],
        [ 0.5242, -0.3771, -0.2374]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


58it [01:53,  1.89s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5222, -0.3754, -0.2367],
        [ 0.5222, -0.3754, -0.2367],
        [ 0.5222, -0.3754, -0.2367],
        [ 0.5222, -0.3754, -0.2367],
        [ 0.5222, -0.3754, -0.2367],
        [ 0.5222, -0.3754, -0.2367],
        [ 0.5222, -0.3754, -0.2367],
        [ 0.5222, -0.3754, -0.2367]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


59it [01:55,  1.85s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5221, -0.3751, -0.2369],
        [ 0.5221, -0.3751, -0.2369],
        [ 0.5221, -0.3751, -0.2369],
        [ 0.5221, -0.3751, -0.2369],
        [ 0.5221, -0.3751, -0.2369],
        [ 0.5221, -0.3751, -0.2369],
        [ 0.5221, -0.3751, -0.2369],
        [ 0.5221, -0.3751, -0.2369]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


60it [01:57,  1.84s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5232, -0.3755, -0.2378],
        [ 0.5232, -0.3755, -0.2378],
        [ 0.5232, -0.3755, -0.2378],
        [ 0.5232, -0.3755, -0.2378],
        [ 0.5232, -0.3755, -0.2378],
        [ 0.5232, -0.3755, -0.2378],
        [ 0.5232, -0.3755, -0.2378],
        [ 0.5232, -0.3755, -0.2378]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


61it [01:59,  1.82s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5240, -0.3756, -0.2386],
        [ 0.5240, -0.3756, -0.2386],
        [ 0.5240, -0.3756, -0.2386],
        [ 0.5240, -0.3756, -0.2386],
        [ 0.5240, -0.3756, -0.2386],
        [ 0.5240, -0.3756, -0.2386],
        [ 0.5240, -0.3756, -0.2386],
        [ 0.5240, -0.3756, -0.2386]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


62it [02:01,  1.80s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5239, -0.3739, -0.2401],
        [ 0.5239, -0.3739, -0.2401],
        [ 0.5239, -0.3739, -0.2401],
        [ 0.5239, -0.3739, -0.2401],
        [ 0.5239, -0.3739, -0.2401],
        [ 0.5239, -0.3739, -0.2401],
        [ 0.5239, -0.3739, -0.2401],
        [ 0.5239, -0.3739, -0.2401]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


63it [02:03,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5236, -0.3722, -0.2415],
        [ 0.5236, -0.3722, -0.2415],
        [ 0.5236, -0.3722, -0.2415],
        [ 0.5236, -0.3722, -0.2415],
        [ 0.5236, -0.3722, -0.2415],
        [ 0.5236, -0.3722, -0.2415],
        [ 0.5236, -0.3722, -0.2415],
        [ 0.5236, -0.3722, -0.2415]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


64it [02:05,  1.98s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5212, -0.3705, -0.2404],
        [ 0.5212, -0.3705, -0.2404],
        [ 0.5212, -0.3705, -0.2404],
        [ 0.5212, -0.3705, -0.2404],
        [ 0.5212, -0.3705, -0.2404],
        [ 0.5212, -0.3705, -0.2404],
        [ 0.5212, -0.3705, -0.2404],
        [ 0.5212, -0.3705, -0.2404]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


65it [02:07,  1.93s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5164, -0.3657, -0.2394],
        [ 0.5164, -0.3657, -0.2394],
        [ 0.5164, -0.3657, -0.2394],
        [ 0.5164, -0.3657, -0.2394],
        [ 0.5164, -0.3657, -0.2394],
        [ 0.5164, -0.3657, -0.2394],
        [ 0.5164, -0.3657, -0.2394],
        [ 0.5164, -0.3657, -0.2394]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


66it [02:08,  1.89s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5119, -0.3604, -0.2393],
        [ 0.5119, -0.3604, -0.2393],
        [ 0.5119, -0.3604, -0.2393],
        [ 0.5119, -0.3604, -0.2393],
        [ 0.5119, -0.3604, -0.2393],
        [ 0.5119, -0.3604, -0.2393],
        [ 0.5119, -0.3604, -0.2393],
        [ 0.5119, -0.3604, -0.2393]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


67it [02:10,  1.86s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5084, -0.3571, -0.2384],
        [ 0.5084, -0.3571, -0.2384],
        [ 0.5084, -0.3571, -0.2384],
        [ 0.5084, -0.3571, -0.2384],
        [ 0.5084, -0.3571, -0.2384],
        [ 0.5084, -0.3571, -0.2384],
        [ 0.5084, -0.3571, -0.2384],
        [ 0.5084, -0.3571, -0.2384]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


68it [02:12,  1.84s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5044, -0.3539, -0.2368],
        [ 0.5044, -0.3539, -0.2368],
        [ 0.5044, -0.3539, -0.2368],
        [ 0.5044, -0.3539, -0.2368],
        [ 0.5044, -0.3539, -0.2368],
        [ 0.5044, -0.3539, -0.2368],
        [ 0.5044, -0.3539, -0.2368],
        [ 0.5044, -0.3539, -0.2368]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


69it [02:14,  1.83s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5013, -0.3500, -0.2370],
        [ 0.5013, -0.3500, -0.2370],
        [ 0.5013, -0.3500, -0.2370],
        [ 0.5013, -0.3500, -0.2370],
        [ 0.5013, -0.3500, -0.2370],
        [ 0.5013, -0.3500, -0.2370],
        [ 0.5013, -0.3500, -0.2370],
        [ 0.5013, -0.3500, -0.2370]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


70it [02:16,  1.82s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4990, -0.3480, -0.2363],
        [ 0.4990, -0.3480, -0.2363],
        [ 0.4990, -0.3480, -0.2363],
        [ 0.4990, -0.3480, -0.2363],
        [ 0.4990, -0.3480, -0.2363],
        [ 0.4990, -0.3480, -0.2363],
        [ 0.4990, -0.3480, -0.2363],
        [ 0.4990, -0.3480, -0.2363]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


71it [02:18,  1.89s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4975, -0.3460, -0.2365],
        [ 0.4975, -0.3460, -0.2365],
        [ 0.4975, -0.3460, -0.2365],
        [ 0.4975, -0.3460, -0.2365],
        [ 0.4975, -0.3460, -0.2365],
        [ 0.4975, -0.3460, -0.2365],
        [ 0.4975, -0.3460, -0.2365],
        [ 0.4975, -0.3460, -0.2365]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


72it [02:20,  1.98s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4954, -0.3432, -0.2367],
        [ 0.4954, -0.3432, -0.2367],
        [ 0.4954, -0.3432, -0.2367],
        [ 0.4954, -0.3432, -0.2367],
        [ 0.4954, -0.3432, -0.2367],
        [ 0.4954, -0.3432, -0.2367],
        [ 0.4954, -0.3432, -0.2367],
        [ 0.4954, -0.3432, -0.2367]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


73it [02:22,  1.92s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4933, -0.3414, -0.2361],
        [ 0.4933, -0.3414, -0.2361],
        [ 0.4933, -0.3414, -0.2361],
        [ 0.4933, -0.3414, -0.2361],
        [ 0.4933, -0.3414, -0.2361],
        [ 0.4933, -0.3414, -0.2361],
        [ 0.4933, -0.3414, -0.2361],
        [ 0.4933, -0.3414, -0.2361]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


74it [02:23,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4920, -0.3396, -0.2363],
        [ 0.4920, -0.3396, -0.2363],
        [ 0.4920, -0.3396, -0.2363],
        [ 0.4920, -0.3396, -0.2363],
        [ 0.4920, -0.3396, -0.2363],
        [ 0.4920, -0.3396, -0.2363],
        [ 0.4920, -0.3396, -0.2363],
        [ 0.4920, -0.3396, -0.2363]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


75it [02:25,  1.85s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4907, -0.3386, -0.2358],
        [ 0.4907, -0.3386, -0.2358],
        [ 0.4907, -0.3386, -0.2358],
        [ 0.4907, -0.3386, -0.2358],
        [ 0.4907, -0.3386, -0.2358],
        [ 0.4907, -0.3386, -0.2358],
        [ 0.4907, -0.3386, -0.2358],
        [ 0.4907, -0.3386, -0.2358]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


76it [02:27,  1.84s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4882, -0.3368, -0.2346],
        [ 0.4882, -0.3368, -0.2346],
        [ 0.4882, -0.3368, -0.2346],
        [ 0.4882, -0.3368, -0.2346],
        [ 0.4882, -0.3368, -0.2346],
        [ 0.4882, -0.3368, -0.2346],
        [ 0.4882, -0.3368, -0.2346],
        [ 0.4882, -0.3368, -0.2346]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


77it [02:29,  1.83s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4858, -0.3350, -0.2335],
        [ 0.4858, -0.3350, -0.2335],
        [ 0.4858, -0.3350, -0.2335],
        [ 0.4858, -0.3350, -0.2335],
        [ 0.4858, -0.3350, -0.2335],
        [ 0.4858, -0.3350, -0.2335],
        [ 0.4858, -0.3350, -0.2335],
        [ 0.4858, -0.3350, -0.2335]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


78it [02:31,  1.94s/it]


Train Loss at epoch 5: 1.026690125465393



1it [00:00,  1.12it/s]

torch.Size([8, 20, 1024])


2it [00:01,  1.01s/it]

torch.Size([8, 20, 1024])


3it [00:03,  1.05s/it]

torch.Size([8, 20, 1024])


4it [00:03,  1.04it/s]

torch.Size([8, 20, 1024])


5it [00:04,  1.14it/s]

torch.Size([8, 20, 1024])


6it [00:05,  1.23it/s]

torch.Size([8, 20, 1024])


7it [00:06,  1.29it/s]

torch.Size([8, 20, 1024])


8it [00:06,  1.21it/s]


torch.Size([1, 20, 1024])
test Loss at epoch 5: 1.2024669647216797



0it [00:00, ?it/s]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4841, -0.3341, -0.2325],
        [ 0.4841, -0.3341, -0.2325],
        [ 0.4841, -0.3341, -0.2325],
        [ 0.4841, -0.3341, -0.2325],
        [ 0.4841, -0.3341, -0.2325],
        [ 0.4841, -0.3341, -0.2325],
        [ 0.4841, -0.3341, -0.2325],
        [ 0.4841, -0.3341, -0.2325]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


1it [00:01,  1.87s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4845, -0.3346, -0.2324],
        [ 0.4845, -0.3346, -0.2324],
        [ 0.4845, -0.3346, -0.2324],
        [ 0.4845, -0.3346, -0.2324],
        [ 0.4845, -0.3346, -0.2324],
        [ 0.4845, -0.3346, -0.2324],
        [ 0.4845, -0.3346, -0.2324],
        [ 0.4845, -0.3346, -0.2324]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


2it [00:03,  1.83s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4848, -0.3342, -0.2331],
        [ 0.4848, -0.3342, -0.2331],
        [ 0.4848, -0.3342, -0.2331],
        [ 0.4848, -0.3342, -0.2331],
        [ 0.4848, -0.3342, -0.2331],
        [ 0.4848, -0.3342, -0.2331],
        [ 0.4848, -0.3342, -0.2331],
        [ 0.4848, -0.3342, -0.2331]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


3it [00:05,  1.81s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4855, -0.3352, -0.2330],
        [ 0.4855, -0.3352, -0.2330],
        [ 0.4855, -0.3352, -0.2330],
        [ 0.4855, -0.3352, -0.2330],
        [ 0.4855, -0.3352, -0.2330],
        [ 0.4855, -0.3352, -0.2330],
        [ 0.4855, -0.3352, -0.2330],
        [ 0.4855, -0.3352, -0.2330]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


4it [00:07,  1.80s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4855, -0.3368, -0.2314],
        [ 0.4855, -0.3368, -0.2314],
        [ 0.4855, -0.3368, -0.2314],
        [ 0.4855, -0.3368, -0.2314],
        [ 0.4855, -0.3368, -0.2314],
        [ 0.4855, -0.3368, -0.2314],
        [ 0.4855, -0.3368, -0.2314],
        [ 0.4855, -0.3368, -0.2314]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


5it [00:09,  1.89s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4847, -0.3389, -0.2284],
        [ 0.4847, -0.3389, -0.2284],
        [ 0.4847, -0.3389, -0.2284],
        [ 0.4847, -0.3389, -0.2284],
        [ 0.4847, -0.3389, -0.2284],
        [ 0.4847, -0.3389, -0.2284],
        [ 0.4847, -0.3389, -0.2284],
        [ 0.4847, -0.3389, -0.2284]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


6it [00:11,  2.00s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4833, -0.3398, -0.2258],
        [ 0.4833, -0.3398, -0.2258],
        [ 0.4833, -0.3398, -0.2258],
        [ 0.4833, -0.3398, -0.2258],
        [ 0.4833, -0.3398, -0.2258],
        [ 0.4833, -0.3398, -0.2258],
        [ 0.4833, -0.3398, -0.2258],
        [ 0.4833, -0.3398, -0.2258]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


7it [00:13,  1.93s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4819, -0.3413, -0.2227],
        [ 0.4819, -0.3413, -0.2227],
        [ 0.4819, -0.3413, -0.2227],
        [ 0.4819, -0.3413, -0.2227],
        [ 0.4819, -0.3413, -0.2227],
        [ 0.4819, -0.3413, -0.2227],
        [ 0.4819, -0.3413, -0.2227],
        [ 0.4819, -0.3413, -0.2227]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


8it [00:15,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4812, -0.3440, -0.2192],
        [ 0.4812, -0.3440, -0.2192],
        [ 0.4812, -0.3440, -0.2192],
        [ 0.4812, -0.3440, -0.2192],
        [ 0.4812, -0.3440, -0.2192],
        [ 0.4812, -0.3440, -0.2192],
        [ 0.4812, -0.3440, -0.2192],
        [ 0.4812, -0.3440, -0.2192]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


9it [00:16,  1.86s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4805, -0.3471, -0.2153],
        [ 0.4805, -0.3471, -0.2153],
        [ 0.4805, -0.3471, -0.2153],
        [ 0.4805, -0.3471, -0.2153],
        [ 0.4805, -0.3471, -0.2153],
        [ 0.4805, -0.3471, -0.2153],
        [ 0.4805, -0.3471, -0.2153],
        [ 0.4805, -0.3471, -0.2153]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


10it [00:18,  1.83s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4810, -0.3505, -0.2126],
        [ 0.4810, -0.3505, -0.2126],
        [ 0.4810, -0.3505, -0.2126],
        [ 0.4810, -0.3505, -0.2126],
        [ 0.4810, -0.3505, -0.2126],
        [ 0.4810, -0.3505, -0.2126],
        [ 0.4810, -0.3505, -0.2126],
        [ 0.4810, -0.3505, -0.2126]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


11it [00:20,  1.82s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4808, -0.3526, -0.2102],
        [ 0.4808, -0.3526, -0.2102],
        [ 0.4808, -0.3526, -0.2102],
        [ 0.4808, -0.3526, -0.2102],
        [ 0.4808, -0.3526, -0.2102],
        [ 0.4808, -0.3526, -0.2102],
        [ 0.4808, -0.3526, -0.2102],
        [ 0.4808, -0.3526, -0.2102]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


12it [00:22,  1.81s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4805, -0.3544, -0.2081],
        [ 0.4805, -0.3544, -0.2081],
        [ 0.4805, -0.3544, -0.2081],
        [ 0.4805, -0.3544, -0.2081],
        [ 0.4805, -0.3544, -0.2081],
        [ 0.4805, -0.3544, -0.2081],
        [ 0.4805, -0.3544, -0.2081],
        [ 0.4805, -0.3544, -0.2081]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


13it [00:24,  1.87s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4802, -0.3550, -0.2070],
        [ 0.4802, -0.3550, -0.2070],
        [ 0.4802, -0.3550, -0.2070],
        [ 0.4802, -0.3550, -0.2070],
        [ 0.4802, -0.3550, -0.2070],
        [ 0.4802, -0.3550, -0.2070],
        [ 0.4802, -0.3550, -0.2070],
        [ 0.4802, -0.3550, -0.2070]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


14it [00:26,  1.98s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4791, -0.3547, -0.2061],
        [ 0.4791, -0.3547, -0.2061],
        [ 0.4791, -0.3547, -0.2061],
        [ 0.4791, -0.3547, -0.2061],
        [ 0.4791, -0.3547, -0.2061],
        [ 0.4791, -0.3547, -0.2061],
        [ 0.4791, -0.3547, -0.2061],
        [ 0.4791, -0.3547, -0.2061]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


15it [00:28,  1.92s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4788, -0.3550, -0.2054],
        [ 0.4788, -0.3550, -0.2054],
        [ 0.4788, -0.3550, -0.2054],
        [ 0.4788, -0.3550, -0.2054],
        [ 0.4788, -0.3550, -0.2054],
        [ 0.4788, -0.3550, -0.2054],
        [ 0.4788, -0.3550, -0.2054],
        [ 0.4788, -0.3550, -0.2054]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


16it [00:30,  1.93s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4777, -0.3559, -0.2032],
        [ 0.4777, -0.3559, -0.2032],
        [ 0.4777, -0.3559, -0.2032],
        [ 0.4777, -0.3559, -0.2032],
        [ 0.4777, -0.3559, -0.2032],
        [ 0.4777, -0.3559, -0.2032],
        [ 0.4777, -0.3559, -0.2032],
        [ 0.4777, -0.3559, -0.2032]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


17it [00:32,  1.89s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4760, -0.3550, -0.2021],
        [ 0.4760, -0.3550, -0.2021],
        [ 0.4760, -0.3550, -0.2021],
        [ 0.4760, -0.3550, -0.2021],
        [ 0.4760, -0.3550, -0.2021],
        [ 0.4760, -0.3550, -0.2021],
        [ 0.4760, -0.3550, -0.2021],
        [ 0.4760, -0.3550, -0.2021]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


18it [00:33,  1.86s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4757, -0.3547, -0.2020],
        [ 0.4757, -0.3547, -0.2020],
        [ 0.4757, -0.3547, -0.2020],
        [ 0.4757, -0.3547, -0.2020],
        [ 0.4757, -0.3547, -0.2020],
        [ 0.4757, -0.3547, -0.2020],
        [ 0.4757, -0.3547, -0.2020],
        [ 0.4757, -0.3547, -0.2020]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


19it [00:35,  1.83s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4760, -0.3544, -0.2027],
        [ 0.4760, -0.3544, -0.2027],
        [ 0.4760, -0.3544, -0.2027],
        [ 0.4760, -0.3544, -0.2027],
        [ 0.4760, -0.3544, -0.2027],
        [ 0.4760, -0.3544, -0.2027],
        [ 0.4760, -0.3544, -0.2027],
        [ 0.4760, -0.3544, -0.2027]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


20it [00:37,  1.85s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4762, -0.3539, -0.2034],
        [ 0.4762, -0.3539, -0.2034],
        [ 0.4762, -0.3539, -0.2034],
        [ 0.4762, -0.3539, -0.2034],
        [ 0.4762, -0.3539, -0.2034],
        [ 0.4762, -0.3539, -0.2034],
        [ 0.4762, -0.3539, -0.2034],
        [ 0.4762, -0.3539, -0.2034]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


21it [00:39,  1.94s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4756, -0.3534, -0.2033],
        [ 0.4756, -0.3534, -0.2033],
        [ 0.4756, -0.3534, -0.2033],
        [ 0.4756, -0.3534, -0.2033],
        [ 0.4756, -0.3534, -0.2033],
        [ 0.4756, -0.3534, -0.2033],
        [ 0.4756, -0.3534, -0.2033],
        [ 0.4756, -0.3534, -0.2033]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


22it [00:41,  2.02s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4757, -0.3542, -0.2026],
        [ 0.4757, -0.3542, -0.2026],
        [ 0.4757, -0.3542, -0.2026],
        [ 0.4757, -0.3542, -0.2026],
        [ 0.4757, -0.3542, -0.2026],
        [ 0.4757, -0.3542, -0.2026],
        [ 0.4757, -0.3542, -0.2026],
        [ 0.4757, -0.3542, -0.2026]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


23it [00:43,  1.95s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4757, -0.3556, -0.2012],
        [ 0.4757, -0.3556, -0.2012],
        [ 0.4757, -0.3556, -0.2012],
        [ 0.4757, -0.3556, -0.2012],
        [ 0.4757, -0.3556, -0.2012],
        [ 0.4757, -0.3556, -0.2012],
        [ 0.4757, -0.3556, -0.2012],
        [ 0.4757, -0.3556, -0.2012]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


24it [00:45,  1.90s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4756, -0.3559, -0.2008],
        [ 0.4756, -0.3559, -0.2008],
        [ 0.4756, -0.3559, -0.2008],
        [ 0.4756, -0.3559, -0.2008],
        [ 0.4756, -0.3559, -0.2008],
        [ 0.4756, -0.3559, -0.2008],
        [ 0.4756, -0.3559, -0.2008],
        [ 0.4756, -0.3559, -0.2008]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


25it [00:47,  1.86s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4748, -0.3545, -0.2012],
        [ 0.4748, -0.3545, -0.2012],
        [ 0.4748, -0.3545, -0.2012],
        [ 0.4748, -0.3545, -0.2012],
        [ 0.4748, -0.3545, -0.2012],
        [ 0.4748, -0.3545, -0.2012],
        [ 0.4748, -0.3545, -0.2012],
        [ 0.4748, -0.3545, -0.2012]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


26it [00:48,  1.84s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4741, -0.3531, -0.2017],
        [ 0.4741, -0.3531, -0.2017],
        [ 0.4741, -0.3531, -0.2017],
        [ 0.4741, -0.3531, -0.2017],
        [ 0.4741, -0.3531, -0.2017],
        [ 0.4741, -0.3531, -0.2017],
        [ 0.4741, -0.3531, -0.2017],
        [ 0.4741, -0.3531, -0.2017]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


27it [00:50,  1.83s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4746, -0.3524, -0.2030],
        [ 0.4746, -0.3524, -0.2030],
        [ 0.4746, -0.3524, -0.2030],
        [ 0.4746, -0.3524, -0.2030],
        [ 0.4746, -0.3524, -0.2030],
        [ 0.4746, -0.3524, -0.2030],
        [ 0.4746, -0.3524, -0.2030],
        [ 0.4746, -0.3524, -0.2030]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


28it [00:52,  1.90s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4756, -0.3524, -0.2042],
        [ 0.4756, -0.3524, -0.2042],
        [ 0.4756, -0.3524, -0.2042],
        [ 0.4756, -0.3524, -0.2042],
        [ 0.4756, -0.3524, -0.2042],
        [ 0.4756, -0.3524, -0.2042],
        [ 0.4756, -0.3524, -0.2042],
        [ 0.4756, -0.3524, -0.2042]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


29it [00:55,  1.99s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4765, -0.3530, -0.2046],
        [ 0.4765, -0.3530, -0.2046],
        [ 0.4765, -0.3530, -0.2046],
        [ 0.4765, -0.3530, -0.2046],
        [ 0.4765, -0.3530, -0.2046],
        [ 0.4765, -0.3530, -0.2046],
        [ 0.4765, -0.3530, -0.2046],
        [ 0.4765, -0.3530, -0.2046]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


30it [00:56,  1.99s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4785, -0.3542, -0.2058],
        [ 0.4785, -0.3542, -0.2058],
        [ 0.4785, -0.3542, -0.2058],
        [ 0.4785, -0.3542, -0.2058],
        [ 0.4785, -0.3542, -0.2058],
        [ 0.4785, -0.3542, -0.2058],
        [ 0.4785, -0.3542, -0.2058],
        [ 0.4785, -0.3542, -0.2058]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


31it [00:58,  1.92s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4809, -0.3544, -0.2085],
        [ 0.4809, -0.3544, -0.2085],
        [ 0.4809, -0.3544, -0.2085],
        [ 0.4809, -0.3544, -0.2085],
        [ 0.4809, -0.3544, -0.2085],
        [ 0.4809, -0.3544, -0.2085],
        [ 0.4809, -0.3544, -0.2085],
        [ 0.4809, -0.3544, -0.2085]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


32it [01:00,  1.89s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4836, -0.3551, -0.2110],
        [ 0.4836, -0.3551, -0.2110],
        [ 0.4836, -0.3551, -0.2110],
        [ 0.4836, -0.3551, -0.2110],
        [ 0.4836, -0.3551, -0.2110],
        [ 0.4836, -0.3551, -0.2110],
        [ 0.4836, -0.3551, -0.2110],
        [ 0.4836, -0.3551, -0.2110]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


33it [01:02,  1.86s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4853, -0.3548, -0.2133],
        [ 0.4853, -0.3548, -0.2133],
        [ 0.4853, -0.3548, -0.2133],
        [ 0.4853, -0.3548, -0.2133],
        [ 0.4853, -0.3548, -0.2133],
        [ 0.4853, -0.3548, -0.2133],
        [ 0.4853, -0.3548, -0.2133],
        [ 0.4853, -0.3548, -0.2133]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


34it [01:04,  1.83s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4861, -0.3536, -0.2154],
        [ 0.4861, -0.3536, -0.2154],
        [ 0.4861, -0.3536, -0.2154],
        [ 0.4861, -0.3536, -0.2154],
        [ 0.4861, -0.3536, -0.2154],
        [ 0.4861, -0.3536, -0.2154],
        [ 0.4861, -0.3536, -0.2154],
        [ 0.4861, -0.3536, -0.2154]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


35it [01:05,  1.82s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4874, -0.3532, -0.2174],
        [ 0.4874, -0.3532, -0.2174],
        [ 0.4874, -0.3532, -0.2174],
        [ 0.4874, -0.3532, -0.2174],
        [ 0.4874, -0.3532, -0.2174],
        [ 0.4874, -0.3532, -0.2174],
        [ 0.4874, -0.3532, -0.2174],
        [ 0.4874, -0.3532, -0.2174]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


36it [01:07,  1.89s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4891, -0.3518, -0.2207],
        [ 0.4891, -0.3518, -0.2207],
        [ 0.4891, -0.3518, -0.2207],
        [ 0.4891, -0.3518, -0.2207],
        [ 0.4891, -0.3518, -0.2207],
        [ 0.4891, -0.3518, -0.2207],
        [ 0.4891, -0.3518, -0.2207],
        [ 0.4891, -0.3518, -0.2207]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


37it [01:10,  1.98s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4918, -0.3512, -0.2245],
        [ 0.4918, -0.3512, -0.2245],
        [ 0.4918, -0.3512, -0.2245],
        [ 0.4918, -0.3512, -0.2245],
        [ 0.4918, -0.3512, -0.2245],
        [ 0.4918, -0.3512, -0.2245],
        [ 0.4918, -0.3512, -0.2245],
        [ 0.4918, -0.3512, -0.2245]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


38it [01:11,  1.93s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4928, -0.3505, -0.2264],
        [ 0.4928, -0.3505, -0.2264],
        [ 0.4928, -0.3505, -0.2264],
        [ 0.4928, -0.3505, -0.2264],
        [ 0.4928, -0.3505, -0.2264],
        [ 0.4928, -0.3505, -0.2264],
        [ 0.4928, -0.3505, -0.2264],
        [ 0.4928, -0.3505, -0.2264]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


39it [01:13,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4937, -0.3497, -0.2282],
        [ 0.4937, -0.3497, -0.2282],
        [ 0.4937, -0.3497, -0.2282],
        [ 0.4937, -0.3497, -0.2282],
        [ 0.4937, -0.3497, -0.2282],
        [ 0.4937, -0.3497, -0.2282],
        [ 0.4937, -0.3497, -0.2282],
        [ 0.4937, -0.3497, -0.2282]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


40it [01:15,  1.85s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4930, -0.3481, -0.2290],
        [ 0.4930, -0.3481, -0.2290],
        [ 0.4930, -0.3481, -0.2290],
        [ 0.4930, -0.3481, -0.2290],
        [ 0.4930, -0.3481, -0.2290],
        [ 0.4930, -0.3481, -0.2290],
        [ 0.4930, -0.3481, -0.2290],
        [ 0.4930, -0.3481, -0.2290]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


41it [01:17,  1.83s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4916, -0.3464, -0.2290],
        [ 0.4916, -0.3464, -0.2290],
        [ 0.4916, -0.3464, -0.2290],
        [ 0.4916, -0.3464, -0.2290],
        [ 0.4916, -0.3464, -0.2290],
        [ 0.4916, -0.3464, -0.2290],
        [ 0.4916, -0.3464, -0.2290],
        [ 0.4916, -0.3464, -0.2290]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


42it [01:19,  1.83s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4915, -0.3464, -0.2290],
        [ 0.4915, -0.3464, -0.2290],
        [ 0.4915, -0.3464, -0.2290],
        [ 0.4915, -0.3464, -0.2290],
        [ 0.4915, -0.3464, -0.2290],
        [ 0.4915, -0.3464, -0.2290],
        [ 0.4915, -0.3464, -0.2290],
        [ 0.4915, -0.3464, -0.2290]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


43it [01:20,  1.81s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4907, -0.3469, -0.2275],
        [ 0.4907, -0.3469, -0.2275],
        [ 0.4907, -0.3469, -0.2275],
        [ 0.4907, -0.3469, -0.2275],
        [ 0.4907, -0.3469, -0.2275],
        [ 0.4907, -0.3469, -0.2275],
        [ 0.4907, -0.3469, -0.2275],
        [ 0.4907, -0.3469, -0.2275]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


44it [01:23,  1.90s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4885, -0.3473, -0.2246],
        [ 0.4885, -0.3473, -0.2246],
        [ 0.4885, -0.3473, -0.2246],
        [ 0.4885, -0.3473, -0.2246],
        [ 0.4885, -0.3473, -0.2246],
        [ 0.4885, -0.3473, -0.2246],
        [ 0.4885, -0.3473, -0.2246],
        [ 0.4885, -0.3473, -0.2246]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


45it [01:25,  1.98s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4878, -0.3474, -0.2235],
        [ 0.4878, -0.3474, -0.2235],
        [ 0.4878, -0.3474, -0.2235],
        [ 0.4878, -0.3474, -0.2235],
        [ 0.4878, -0.3474, -0.2235],
        [ 0.4878, -0.3474, -0.2235],
        [ 0.4878, -0.3474, -0.2235],
        [ 0.4878, -0.3474, -0.2235]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


46it [01:26,  1.91s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4870, -0.3490, -0.2211],
        [ 0.4870, -0.3490, -0.2211],
        [ 0.4870, -0.3490, -0.2211],
        [ 0.4870, -0.3490, -0.2211],
        [ 0.4870, -0.3490, -0.2211],
        [ 0.4870, -0.3490, -0.2211],
        [ 0.4870, -0.3490, -0.2211],
        [ 0.4870, -0.3490, -0.2211]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


47it [01:28,  1.87s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4862, -0.3495, -0.2197],
        [ 0.4862, -0.3495, -0.2197],
        [ 0.4862, -0.3495, -0.2197],
        [ 0.4862, -0.3495, -0.2197],
        [ 0.4862, -0.3495, -0.2197],
        [ 0.4862, -0.3495, -0.2197],
        [ 0.4862, -0.3495, -0.2197],
        [ 0.4862, -0.3495, -0.2197]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


48it [01:30,  1.84s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4854, -0.3482, -0.2201],
        [ 0.4854, -0.3482, -0.2201],
        [ 0.4854, -0.3482, -0.2201],
        [ 0.4854, -0.3482, -0.2201],
        [ 0.4854, -0.3482, -0.2201],
        [ 0.4854, -0.3482, -0.2201],
        [ 0.4854, -0.3482, -0.2201],
        [ 0.4854, -0.3482, -0.2201]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


49it [01:32,  1.82s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4846, -0.3461, -0.2212],
        [ 0.4846, -0.3461, -0.2212],
        [ 0.4846, -0.3461, -0.2212],
        [ 0.4846, -0.3461, -0.2212],
        [ 0.4846, -0.3461, -0.2212],
        [ 0.4846, -0.3461, -0.2212],
        [ 0.4846, -0.3461, -0.2212],
        [ 0.4846, -0.3461, -0.2212]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


50it [01:34,  1.81s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4851, -0.3440, -0.2237],
        [ 0.4851, -0.3440, -0.2237],
        [ 0.4851, -0.3440, -0.2237],
        [ 0.4851, -0.3440, -0.2237],
        [ 0.4851, -0.3440, -0.2237],
        [ 0.4851, -0.3440, -0.2237],
        [ 0.4851, -0.3440, -0.2237],
        [ 0.4851, -0.3440, -0.2237]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


51it [01:35,  1.81s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4861, -0.3436, -0.2253],
        [ 0.4861, -0.3436, -0.2253],
        [ 0.4861, -0.3436, -0.2253],
        [ 0.4861, -0.3436, -0.2253],
        [ 0.4861, -0.3436, -0.2253],
        [ 0.4861, -0.3436, -0.2253],
        [ 0.4861, -0.3436, -0.2253],
        [ 0.4861, -0.3436, -0.2253]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


52it [01:37,  1.89s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4855, -0.3438, -0.2245],
        [ 0.4855, -0.3438, -0.2245],
        [ 0.4855, -0.3438, -0.2245],
        [ 0.4855, -0.3438, -0.2245],
        [ 0.4855, -0.3438, -0.2245],
        [ 0.4855, -0.3438, -0.2245],
        [ 0.4855, -0.3438, -0.2245],
        [ 0.4855, -0.3438, -0.2245]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


53it [01:40,  1.98s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4843, -0.3423, -0.2245],
        [ 0.4843, -0.3423, -0.2245],
        [ 0.4843, -0.3423, -0.2245],
        [ 0.4843, -0.3423, -0.2245],
        [ 0.4843, -0.3423, -0.2245],
        [ 0.4843, -0.3423, -0.2245],
        [ 0.4843, -0.3423, -0.2245],
        [ 0.4843, -0.3423, -0.2245]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


54it [01:41,  1.93s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4844, -0.3409, -0.2261],
        [ 0.4844, -0.3409, -0.2261],
        [ 0.4844, -0.3409, -0.2261],
        [ 0.4844, -0.3409, -0.2261],
        [ 0.4844, -0.3409, -0.2261],
        [ 0.4844, -0.3409, -0.2261],
        [ 0.4844, -0.3409, -0.2261],
        [ 0.4844, -0.3409, -0.2261]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


55it [01:43,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4851, -0.3410, -0.2268],
        [ 0.4851, -0.3410, -0.2268],
        [ 0.4851, -0.3410, -0.2268],
        [ 0.4851, -0.3410, -0.2268],
        [ 0.4851, -0.3410, -0.2268],
        [ 0.4851, -0.3410, -0.2268],
        [ 0.4851, -0.3410, -0.2268],
        [ 0.4851, -0.3410, -0.2268]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


56it [01:45,  1.85s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4856, -0.3409, -0.2274],
        [ 0.4856, -0.3409, -0.2274],
        [ 0.4856, -0.3409, -0.2274],
        [ 0.4856, -0.3409, -0.2274],
        [ 0.4856, -0.3409, -0.2274],
        [ 0.4856, -0.3409, -0.2274],
        [ 0.4856, -0.3409, -0.2274],
        [ 0.4856, -0.3409, -0.2274]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


57it [01:47,  1.83s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4873, -0.3407, -0.2296],
        [ 0.4873, -0.3407, -0.2296],
        [ 0.4873, -0.3407, -0.2296],
        [ 0.4873, -0.3407, -0.2296],
        [ 0.4873, -0.3407, -0.2296],
        [ 0.4873, -0.3407, -0.2296],
        [ 0.4873, -0.3407, -0.2296],
        [ 0.4873, -0.3407, -0.2296]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


58it [01:49,  1.82s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4887, -0.3403, -0.2315],
        [ 0.4887, -0.3403, -0.2315],
        [ 0.4887, -0.3403, -0.2315],
        [ 0.4887, -0.3403, -0.2315],
        [ 0.4887, -0.3403, -0.2315],
        [ 0.4887, -0.3403, -0.2315],
        [ 0.4887, -0.3403, -0.2315],
        [ 0.4887, -0.3403, -0.2315]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


59it [01:51,  1.85s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4892, -0.3399, -0.2325],
        [ 0.4892, -0.3399, -0.2325],
        [ 0.4892, -0.3399, -0.2325],
        [ 0.4892, -0.3399, -0.2325],
        [ 0.4892, -0.3399, -0.2325],
        [ 0.4892, -0.3399, -0.2325],
        [ 0.4892, -0.3399, -0.2325],
        [ 0.4892, -0.3399, -0.2325]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


60it [01:53,  1.92s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4908, -0.3401, -0.2342],
        [ 0.4908, -0.3401, -0.2342],
        [ 0.4908, -0.3401, -0.2342],
        [ 0.4908, -0.3401, -0.2342],
        [ 0.4908, -0.3401, -0.2342],
        [ 0.4908, -0.3401, -0.2342],
        [ 0.4908, -0.3401, -0.2342],
        [ 0.4908, -0.3401, -0.2342]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


61it [01:55,  2.01s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4942, -0.3410, -0.2373],
        [ 0.4942, -0.3410, -0.2373],
        [ 0.4942, -0.3410, -0.2373],
        [ 0.4942, -0.3410, -0.2373],
        [ 0.4942, -0.3410, -0.2373],
        [ 0.4942, -0.3410, -0.2373],
        [ 0.4942, -0.3410, -0.2373],
        [ 0.4942, -0.3410, -0.2373]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


62it [01:57,  1.94s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4971, -0.3424, -0.2393],
        [ 0.4971, -0.3424, -0.2393],
        [ 0.4971, -0.3424, -0.2393],
        [ 0.4971, -0.3424, -0.2393],
        [ 0.4971, -0.3424, -0.2393],
        [ 0.4971, -0.3424, -0.2393],
        [ 0.4971, -0.3424, -0.2393],
        [ 0.4971, -0.3424, -0.2393]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


63it [01:58,  1.90s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4996, -0.3427, -0.2419],
        [ 0.4996, -0.3427, -0.2419],
        [ 0.4996, -0.3427, -0.2419],
        [ 0.4996, -0.3427, -0.2419],
        [ 0.4996, -0.3427, -0.2419],
        [ 0.4996, -0.3427, -0.2419],
        [ 0.4996, -0.3427, -0.2419],
        [ 0.4996, -0.3427, -0.2419]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


64it [02:00,  1.86s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5017, -0.3429, -0.2442],
        [ 0.5017, -0.3429, -0.2442],
        [ 0.5017, -0.3429, -0.2442],
        [ 0.5017, -0.3429, -0.2442],
        [ 0.5017, -0.3429, -0.2442],
        [ 0.5017, -0.3429, -0.2442],
        [ 0.5017, -0.3429, -0.2442],
        [ 0.5017, -0.3429, -0.2442]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


65it [02:02,  1.83s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5035, -0.3413, -0.2478],
        [ 0.5035, -0.3413, -0.2478],
        [ 0.5035, -0.3413, -0.2478],
        [ 0.5035, -0.3413, -0.2478],
        [ 0.5035, -0.3413, -0.2478],
        [ 0.5035, -0.3413, -0.2478],
        [ 0.5035, -0.3413, -0.2478],
        [ 0.5035, -0.3413, -0.2478]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


66it [02:04,  1.82s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5037, -0.3405, -0.2487],
        [ 0.5037, -0.3405, -0.2487],
        [ 0.5037, -0.3405, -0.2487],
        [ 0.5037, -0.3405, -0.2487],
        [ 0.5037, -0.3405, -0.2487],
        [ 0.5037, -0.3405, -0.2487],
        [ 0.5037, -0.3405, -0.2487],
        [ 0.5037, -0.3405, -0.2487]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


67it [02:06,  1.88s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5036, -0.3396, -0.2496],
        [ 0.5036, -0.3396, -0.2496],
        [ 0.5036, -0.3396, -0.2496],
        [ 0.5036, -0.3396, -0.2496],
        [ 0.5036, -0.3396, -0.2496],
        [ 0.5036, -0.3396, -0.2496],
        [ 0.5036, -0.3396, -0.2496],
        [ 0.5036, -0.3396, -0.2496]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


68it [02:08,  1.98s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5022, -0.3379, -0.2495],
        [ 0.5022, -0.3379, -0.2495],
        [ 0.5022, -0.3379, -0.2495],
        [ 0.5022, -0.3379, -0.2495],
        [ 0.5022, -0.3379, -0.2495],
        [ 0.5022, -0.3379, -0.2495],
        [ 0.5022, -0.3379, -0.2495],
        [ 0.5022, -0.3379, -0.2495]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


69it [02:10,  1.96s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5014, -0.3370, -0.2495],
        [ 0.5014, -0.3370, -0.2495],
        [ 0.5014, -0.3370, -0.2495],
        [ 0.5014, -0.3370, -0.2495],
        [ 0.5014, -0.3370, -0.2495],
        [ 0.5014, -0.3370, -0.2495],
        [ 0.5014, -0.3370, -0.2495],
        [ 0.5014, -0.3370, -0.2495]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


70it [02:12,  1.92s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.5005, -0.3369, -0.2487],
        [ 0.5005, -0.3369, -0.2487],
        [ 0.5005, -0.3369, -0.2487],
        [ 0.5005, -0.3369, -0.2487],
        [ 0.5005, -0.3369, -0.2487],
        [ 0.5005, -0.3369, -0.2487],
        [ 0.5005, -0.3369, -0.2487],
        [ 0.5005, -0.3369, -0.2487]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


71it [02:13,  1.87s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4996, -0.3373, -0.2472],
        [ 0.4996, -0.3373, -0.2472],
        [ 0.4996, -0.3373, -0.2472],
        [ 0.4996, -0.3373, -0.2472],
        [ 0.4996, -0.3373, -0.2472],
        [ 0.4996, -0.3373, -0.2472],
        [ 0.4996, -0.3373, -0.2472],
        [ 0.4996, -0.3373, -0.2472]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


72it [02:15,  1.84s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4993, -0.3369, -0.2473],
        [ 0.4993, -0.3369, -0.2473],
        [ 0.4993, -0.3369, -0.2473],
        [ 0.4993, -0.3369, -0.2473],
        [ 0.4993, -0.3369, -0.2473],
        [ 0.4993, -0.3369, -0.2473],
        [ 0.4993, -0.3369, -0.2473],
        [ 0.4993, -0.3369, -0.2473]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


73it [02:17,  1.83s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4990, -0.3363, -0.2475],
        [ 0.4990, -0.3363, -0.2475],
        [ 0.4990, -0.3363, -0.2475],
        [ 0.4990, -0.3363, -0.2475],
        [ 0.4990, -0.3363, -0.2475],
        [ 0.4990, -0.3363, -0.2475],
        [ 0.4990, -0.3363, -0.2475],
        [ 0.4990, -0.3363, -0.2475]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


74it [02:19,  1.82s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4978, -0.3372, -0.2453],
        [ 0.4978, -0.3372, -0.2453],
        [ 0.4978, -0.3372, -0.2453],
        [ 0.4978, -0.3372, -0.2453],
        [ 0.4978, -0.3372, -0.2453],
        [ 0.4978, -0.3372, -0.2453],
        [ 0.4978, -0.3372, -0.2453],
        [ 0.4978, -0.3372, -0.2453]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


75it [02:21,  1.89s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4974, -0.3386, -0.2433],
        [ 0.4974, -0.3386, -0.2433],
        [ 0.4974, -0.3386, -0.2433],
        [ 0.4974, -0.3386, -0.2433],
        [ 0.4974, -0.3386, -0.2433],
        [ 0.4974, -0.3386, -0.2433],
        [ 0.4974, -0.3386, -0.2433],
        [ 0.4974, -0.3386, -0.2433]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


76it [02:23,  1.99s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4955, -0.3382, -0.2416],
        [ 0.4955, -0.3382, -0.2416],
        [ 0.4955, -0.3382, -0.2416],
        [ 0.4955, -0.3382, -0.2416],
        [ 0.4955, -0.3382, -0.2416],
        [ 0.4955, -0.3382, -0.2416],
        [ 0.4955, -0.3382, -0.2416],
        [ 0.4955, -0.3382, -0.2416]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


77it [02:25,  1.93s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4943, -0.3377, -0.2407],
        [ 0.4943, -0.3377, -0.2407],
        [ 0.4943, -0.3377, -0.2407],
        [ 0.4943, -0.3377, -0.2407],
        [ 0.4943, -0.3377, -0.2407],
        [ 0.4943, -0.3377, -0.2407],
        [ 0.4943, -0.3377, -0.2407],
        [ 0.4943, -0.3377, -0.2407]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


78it [02:27,  1.89s/it]


Train Loss at epoch 6: 1.0260907411575317



1it [00:00,  1.55it/s]

torch.Size([8, 20, 1024])


2it [00:01,  1.46it/s]

torch.Size([8, 20, 1024])


3it [00:02,  1.45it/s]

torch.Size([8, 20, 1024])


4it [00:02,  1.46it/s]

torch.Size([8, 20, 1024])


5it [00:03,  1.43it/s]

torch.Size([8, 20, 1024])


6it [00:04,  1.43it/s]

torch.Size([8, 20, 1024])


7it [00:04,  1.43it/s]

torch.Size([8, 20, 1024])


8it [00:06,  1.32it/s]


torch.Size([1, 20, 1024])
test Loss at epoch 6: 1.1142685413360596



0it [00:00, ?it/s]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4918, -0.3348, -0.2407],
        [ 0.4918, -0.3348, -0.2407],
        [ 0.4918, -0.3348, -0.2407],
        [ 0.4918, -0.3348, -0.2407],
        [ 0.4918, -0.3348, -0.2407],
        [ 0.4918, -0.3348, -0.2407],
        [ 0.4918, -0.3348, -0.2407],
        [ 0.4918, -0.3348, -0.2407]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


1it [00:02,  2.28s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4895, -0.3320, -0.2408],
        [ 0.4895, -0.3320, -0.2408],
        [ 0.4895, -0.3320, -0.2408],
        [ 0.4895, -0.3320, -0.2408],
        [ 0.4895, -0.3320, -0.2408],
        [ 0.4895, -0.3320, -0.2408],
        [ 0.4895, -0.3320, -0.2408],
        [ 0.4895, -0.3320, -0.2408]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


2it [00:04,  2.25s/it]

torch.Size([8, 20, 1024])
y_pred: tensor([[ 0.4859, -0.3286, -0.2400],
        [ 0.4859, -0.3286, -0.2400],
        [ 0.4859, -0.3286, -0.2400],
        [ 0.4859, -0.3286, -0.2400],
        [ 0.4859, -0.3286, -0.2400],
        [ 0.4859, -0.3286, -0.2400],
        [ 0.4859, -0.3286, -0.2400],
        [ 0.4859, -0.3286, -0.2400]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


3it [00:06,  2.16s/it]

In [22]:
# evaluate on test set every epoch
test_loss = []
test_loss_sum = []

correct = 0
total = 0

for idx, data in tqdm(enumerate(test_loader, 0)):
    test_x_numerical = data['x_numerical'].to(device, dtype = torch.float)
    test_ids = data['ids'].to(device, dtype = torch.long)
    test_masks = data['mask'].to(device, dtype = torch.long)
    test_token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
    test_targets = data['targets'].to(device, dtype = torch.long)

    y_pred = model(test_x_numerical, test_ids, test_masks, test_token_type_ids)
    _, pred_label = torch.max(y_pred.data, 1)

#         print('y_pred:', y_pred)
    tloss = criterion(y_pred, test_targets.reshape(-1))

    test_loss.append(tloss.data.cpu())
    test_loss_sum.append(tloss.data.cpu())

    wandb.log({'avg test loss in this batch': tloss.item(), 'epoch': epoch, 'batch_id': idx})

    # Get accuracy
    total += test_targets.reshape(-1).size(0)
    correct += (pred_label == test_targets.reshape(-1)).sum()
accuracy = 100 * correct / total

  'ids': torch.tensor(input_ids, dtype=torch.long),
  'mask': torch.tensor(attention_masks, dtype=torch.long),
  'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
  'targets': torch.tensor(y_train[index], dtype=torch.long)
0it [00:00, ?it/s]


OutOfMemoryError: ignored

In [None]:
print(e2k[0].shape)
print(e2k[1].shape)
print(e2k)

In [None]:
run_id = str(1)
!mkdir output/$run_id
output_model_file = 'roberta_stock_pred.bin'
output_vocab_file = './output' + str(run_id)

model_to_save = model
torch.save(model_to_save, output_model_file)
tokenizer.save_vocabulary(output_vocab_file)


In [None]:
plt.plot(loss_arr, label="Training loss")
plt.legend()
plt.show()

In [None]:

'''