In [1]:
!pip install yfinance
!pip install transformers
!pip install wandb
!pip install beautifulsoup4


Collecting transformers
  Downloading transformers-4.30.2-py3-none-any.whl (7.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m63.8 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.14.1 (from transformers)
  Downloading huggingface_hub-0.16.2-py3-none-any.whl (268 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.5/268.5 kB[0m [31m36.0 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m109.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m75.8 MB/s[0m eta [36m0:00:

In [2]:
import yfinance as yf
import datetime
import numpy as np
import pandas as pd
from pylab import mpl, plt
import math, time
import itertools
from datetime import datetime
from operator import itemgetter
from tqdm import tqdm
from math import sqrt
import torch
import torch.nn as nn
from torch.autograd import Variable
from transformers import RobertaTokenizer, RobertaModel
import torch.nn.functional as F

In [3]:
from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'

In [4]:
import wandb
wandb.login()
wandb.init(project="stock_prediction")

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mvisriv[0m. Use [1m`wandb login --relogin`[0m to force relogin


### Hyperparams

In [5]:
no_of_days_to_lookforward = 1
no_of_days_to_lookback = 5
up_threshold = 0.015
down_threshold = -0.015
max_text_per_iter = 20
batch_size = 8
MAX_LEN = 10
num_epochs = 200

### Get stocks data for last N days

In [6]:
!mkdir data


In [7]:
stock_symbols = [ 'XOM']
no_of_days = 4*365

EXPORT_DATA_FOLDER = './data/'
# !mkdir data
# Set the start and end dates for the data
# here matching it with dates of news text available
train_start = datetime.strptime('2020/01/04', '%Y/%m/%d')
train_end = datetime.strptime('2022/09/30', '%Y/%m/%d')
test_start = datetime.strptime('2022/10/01', '%Y/%m/%d')
test_end = datetime.strptime('2023/01/04', '%Y/%m/%d')


# start = datetime.datetime.now() - datetime.timedelta(days=no_of_days)
# end = datetime.datetime.now()

# Get training data
for symbol in stock_symbols:
    # Download the historical price and volume data using yfinance
    train_data_raw = yf.download(symbol, start=train_start, end=train_end)

    # Normalize features by percent of changes between today and yesterday
    pct_change_open = train_data_raw['Open'].pct_change().fillna(0)
    pct_change_high = train_data_raw['High'].pct_change().fillna(0)
    pct_change_high_over_open = (train_data_raw['High']-train_data_raw['Open'])/train_data_raw['Open']
    pct_change_low = train_data_raw['Low'].pct_change().fillna(0)
    pct_change_low_over_open = (train_data_raw['Low']-train_data_raw['Open'])/train_data_raw['Open']
    pct_change_close = train_data_raw['Close'].pct_change().fillna(0)
    pct_change_close_over_open = (train_data_raw['Close']-train_data_raw['Open'])/train_data_raw['Open']
    pct_change_adjclose = train_data_raw['Adj Close'].pct_change().fillna(0)
    pct_change_adjclose_over_open = (train_data_raw['Adj Close']-train_data_raw['Open'])/train_data_raw['Open']
    pct_change_volume = train_data_raw['Volume'].pct_change().fillna(0)

    # Prepare labels: 2 means the close price of tomorow is higher than today's close price; 1 is down; 0 means the movement is between up_threshold and down_threshold
    label = np.where(pct_change_close > up_threshold, 2, np.where(pct_change_close < down_threshold, 1, 0))[1:]
    label = np.append(label, 0)

    # Construct a train_data_norm data frame
    train_data_norm = pd.DataFrame({'Open_norm':pct_change_open,
                              'High_norm':pct_change_high,
                              'Low_norm': pct_change_low,
                              'Close_norm':pct_change_close,
                              'Volume_norm':pct_change_volume,
                              'High-Open_norm':pct_change_high_over_open,
                              'Low-Open_norm':pct_change_low_over_open,
                              'Close-Open_norm':pct_change_close_over_open,
                              'Label_2up1down':label})

    # Normalize by min-max normalization after the pct normalization
    train_data_norm['Open_norm'] = train_data_norm['Open_norm'].apply(lambda x: (x - train_data_norm['Open_norm'].min()) / (train_data_norm['Open_norm'].max() - train_data_norm['Open_norm'].min()))
    train_data_norm['High_norm'] = train_data_norm['High_norm'].apply(lambda x: (x - train_data_norm['High_norm'].min()) / (train_data_norm['High_norm'].max() - train_data_norm['High_norm'].min()))
    train_data_norm['Low_norm'] = train_data_norm['Low_norm'].apply(lambda x: (x - train_data_norm['Low_norm'].min()) / (train_data_norm['Low_norm'].max() - train_data_norm['Low_norm'].min()))
    train_data_norm['Close_norm'] = train_data_norm['Close_norm'].apply(lambda x: (x - train_data_norm['Close_norm'].min()) / (train_data_norm['Close_norm'].max() - train_data_norm['Close_norm'].min()))
    train_data_norm['Volume_norm'] = train_data_norm['Volume_norm'].apply(lambda x: (x - train_data_norm['Volume_norm'].min()) / (train_data_norm['Volume_norm'].max() - train_data_norm['Volume_norm'].min()))
    train_data_norm['High-Open_norm'] = train_data_norm['High-Open_norm'].apply(lambda x: (x - train_data_norm['High-Open_norm'].min()) / (train_data_norm['High-Open_norm'].max() - train_data_norm['High-Open_norm'].min()))
    train_data_norm['Low-Open_norm'] = train_data_norm['Low-Open_norm'].apply(lambda x: (x - train_data_norm['Low-Open_norm'].min()) / (train_data_norm['Low-Open_norm'].max() - train_data_norm['Low-Open_norm'].min()))
    train_data_norm['Close-Open_norm'] = train_data_norm['Close-Open_norm'].apply(lambda x: (x - train_data_norm['Close-Open_norm'].min()) / (train_data_norm['Close-Open_norm'].max() - train_data_norm['Close-Open_norm'].min()))

    # Remove the first and the last row, becuase of NAN values
    train_data_raw = train_data_raw.iloc[1:-1]
    train_data_norm = train_data_norm.iloc[1:-1]

    train_data_raw.to_csv(EXPORT_DATA_FOLDER+symbol+'train_raw_data.csv', index=True)
    train_data_norm.to_csv(EXPORT_DATA_FOLDER+symbol+'train_norm_data.csv', index=True)


# Get test data
for symbol in stock_symbols:
    # Download the historical price and volume data using yfinance
    test_data_raw = yf.download(symbol, start=test_start, end=test_end)

    # Normalize features by percent of changes between today and yesterday
    pct_change_open = test_data_raw['Open'].pct_change().fillna(0)
    pct_change_high = test_data_raw['High'].pct_change().fillna(0)
    pct_change_high_over_open = (test_data_raw['High']-test_data_raw['Open'])/test_data_raw['Open']
    pct_change_low = test_data_raw['Low'].pct_change().fillna(0)
    pct_change_low_over_open = (test_data_raw['Low']-test_data_raw['Open'])/test_data_raw['Open']
    pct_change_close = test_data_raw['Close'].pct_change().fillna(0)
    pct_change_close_over_open = (test_data_raw['Close']-test_data_raw['Open'])/test_data_raw['Open']
    pct_change_adjclose = test_data_raw['Adj Close'].pct_change().fillna(0)
    pct_change_adjclose_over_open = (test_data_raw['Adj Close']-test_data_raw['Open'])/test_data_raw['Open']
    pct_change_volume = test_data_raw['Volume'].pct_change().fillna(0)

    # Prepare labels: 2 means the close price of tomorow is higher than today's close price; 1 is down; 0 means the movement is between up_threshold and down_threshold
    label = np.where(pct_change_close > up_threshold, 2, np.where(pct_change_close < down_threshold, 1, 0))[1:]
    label = np.append(label, 0)

    # Construct a test_data_norm data frame
    test_data_norm = pd.DataFrame({'Open_norm':pct_change_open,
                              'High_norm':pct_change_high,
                              'Low_norm': pct_change_low,
                              'Close_norm':pct_change_close,
                              'Volume_norm':pct_change_volume,
                              'High-Open_norm':pct_change_high_over_open,
                              'Low-Open_norm':pct_change_low_over_open,
                              'Close-Open_norm':pct_change_close_over_open,
                              'Label_2up1down':label})

    # Normalize by min-max normalization after the pct normalization
    test_data_norm['Open_norm'] = test_data_norm['Open_norm'].apply(lambda x: (x - test_data_norm['Open_norm'].min()) / (test_data_norm['Open_norm'].max() - test_data_norm['Open_norm'].min()))
    test_data_norm['High_norm'] = test_data_norm['High_norm'].apply(lambda x: (x - test_data_norm['High_norm'].min()) / (test_data_norm['High_norm'].max() - test_data_norm['High_norm'].min()))
    test_data_norm['Low_norm'] = test_data_norm['Low_norm'].apply(lambda x: (x - test_data_norm['Low_norm'].min()) / (test_data_norm['Low_norm'].max() - test_data_norm['Low_norm'].min()))
    test_data_norm['Close_norm'] = test_data_norm['Close_norm'].apply(lambda x: (x - test_data_norm['Close_norm'].min()) / (test_data_norm['Close_norm'].max() - test_data_norm['Close_norm'].min()))
    test_data_norm['Volume_norm'] = test_data_norm['Volume_norm'].apply(lambda x: (x - test_data_norm['Volume_norm'].min()) / (test_data_norm['Volume_norm'].max() - test_data_norm['Volume_norm'].min()))
    test_data_norm['High-Open_norm'] = test_data_norm['High-Open_norm'].apply(lambda x: (x - test_data_norm['High-Open_norm'].min()) / (test_data_norm['High-Open_norm'].max() - test_data_norm['High-Open_norm'].min()))
    test_data_norm['Low-Open_norm'] = test_data_norm['Low-Open_norm'].apply(lambda x: (x - test_data_norm['Low-Open_norm'].min()) / (test_data_norm['Low-Open_norm'].max() - test_data_norm['Low-Open_norm'].min()))
    test_data_norm['Close-Open_norm'] = test_data_norm['Close-Open_norm'].apply(lambda x: (x - test_data_norm['Close-Open_norm'].min()) / (test_data_norm['Close-Open_norm'].max() - test_data_norm['Close-Open_norm'].min()))

    # Remove the first and the last row, becuase of NAN values
    test_data_raw = test_data_raw.iloc[1:-1]
    test_data_norm = test_data_norm.iloc[1:-1]

    test_data_raw.to_csv(EXPORT_DATA_FOLDER+symbol+'test_raw_data.csv', index=True)
    test_data_norm.to_csv(EXPORT_DATA_FOLDER+symbol+'test_norm_data.csv', index=True)


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


## TODO

In [None]:
'''
(2023-06-05)
cuda support check
//read textual data into correct shape
hyperparam tuning: number of neurons: tune to right number of neurons in FC in model
//max_text_per_iter -> code in dataloader to maintain the size

(2023-06-07)
cuda check
roberta encoder fix
multi label - how to create target label?

'''

## Prep textual data

### Crawl textual news data from internet

In [None]:
# Import libraries
from bs4 import BeautifulSoup
from urllib.request import urlopen
from urllib.request import Request
from nltk.sentiment.vader import SentimentIntensityAnalyzer

# Parameters
n = 3 #the # of article headlines displayed per ticker
tickers = ['AAPL', 'TSLA', 'AMZN']



# Get Data
finviz_url = 'https://finviz.com/quote.ashx?t='
news_tables = {}

for ticker in tickers:
    url = finviz_url + ticker
    req = Request(url=url,
                  headers={'user-agent': 'Mozilla/5.0',
                                   'referer': 'https://...'})
    resp = urlopen(req)
    html = BeautifulSoup(resp, features="lxml")
    news_table = html.find(id='news-table')
    news_tables[ticker] = news_table

try:
    for ticker in tickers:
        df = news_tables[ticker]
        df_tr = df.findAll('tr')

        print ('\n')
        print ('Recent News Headlines for {}: '.format(ticker))

        for i, table_row in enumerate(df_tr):
            a_text = table_row.a.text
            td_text = table_row.td.text
            td_text = td_text.strip()
            print(a_text,'(',td_text,')')
            if i == n-1:
                break
except KeyError:
    pass


# Iterate through the news
parsed_news = []
for file_name, news_table in news_tables.items():
    for x in news_table.findAll('tr'):
        text = x.a.get_text()
        date_scrape = x.td.text.split()

        if len(date_scrape) == 1:
            time = date_scrape[0]

        else:
            date = date_scrape[0]
            time = date_scrape[1]

        ticker = file_name.split('_')[0]

        parsed_news.append([ticker, date, time, text])



### Read downloaded data from saved files

In [8]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [9]:
text_data_df = pd.read_csv('/content/drive/MyDrive/machine_learning/projects/xai-seq/data/XOM_20200401_20230401_medium.csv',
                           sep= ',',
                           header= 0,
                           engine='python',
                           on_bad_lines = 'skip')
text_data_df = text_data_df[['Date', 'News']]


text_data_df = text_data_df.groupby('Date')['News'].apply('$$$###'.join)

text_data_df.index = pd.to_datetime(text_data_df.index, dayfirst=True)
# text_data_df


In [10]:
all_train_df = train_data_norm.join(text_data_df, how = 'inner')
all_test_df = test_data_norm.join(text_data_df, how = 'inner')

print(all_train_df.index.min())
print(all_train_df.index.max())
print(all_test_df.index.min())
print(all_test_df.index.max())

2020-04-01 00:00:00
2022-09-28 00:00:00
2022-10-04 00:00:00
2022-12-30 00:00:00


### Merge textual and numerical data


In [11]:
all_train = all_train_df.values

window_size = no_of_days_to_lookback

X_numerical_train = []
y_train = []
X_text_train = []
X_text_train_curr = []





for i in range(window_size, len(all_train) - no_of_days_to_lookforward + 1):
    X_numerical_train.append(all_train[i-window_size: i, :-2])

    # split and append sequence of text
    curr_seq = all_train[i-window_size: i, -1]
    for j in range(window_size):
        split_curr_seq = curr_seq[window_size - 1 -j].split('$$$###')
        X_text_train_curr = X_text_train_curr + split_curr_seq

    if len(X_text_train_curr) > max_text_per_iter:
        X_text_train_curr = X_text_train_curr[:max_text_per_iter]

    X_text_train.append(X_text_train_curr)

    # target labels
    y_train.append(all_train[i:i+no_of_days_to_lookforward, -2])

X_numerical_train, y_train = np.array(X_numerical_train).astype(np.float16), np.array(y_train).astype(np.int32)
print(type(X_numerical_train))
print(type(y_train))

X_numerical_train = torch.from_numpy(X_numerical_train).type(torch.Tensor)
y_train = torch.from_numpy(y_train).long()

print(len(X_numerical_train))
print(len(X_text_train))
print(len(y_train))
print(X_numerical_train.shape)

print(len(X_text_train))
print(len(X_text_train[2]))
# print(X_text_train[2])

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
624
624
624
torch.Size([624, 5, 8])
624
20


In [12]:
all_test = all_test_df.values


X_numerical_test = []
y_test = []
X_text_test = []
X_text_test_curr = []

for i in range(window_size, len(all_test) - no_of_days_to_lookforward + 1):
    X_numerical_test.append(all_test[i-window_size: i, :-2])

    # split and append sequence of text (in reverse order to add the latest news first)
    curr_seq = all_test[i-window_size: i, -1]
    for j in range(window_size):
        split_curr_seq = curr_seq[window_size - 1 -j].split('$$$###')
        X_text_test_curr = X_text_test_curr + split_curr_seq

    if len(X_text_test_curr) > max_text_per_iter:
        X_text_test_curr = X_text_test_curr[:max_text_per_iter]

    X_text_test.append(X_text_test_curr)

    # target labels
    y_test.append(all_test[i:i+no_of_days_to_lookforward, -2])

X_numerical_test, y_test = np.array(X_numerical_test).astype(np.float16), np.array(y_test).astype(np.int32)
print(type(X_numerical_test))
print(type(y_test))

X_numerical_test = torch.from_numpy(X_numerical_test).type(torch.Tensor)
y_test = torch.from_numpy(y_test).long()

print(len(X_numerical_test))
print(len(X_text_test))
print(len(y_test))
print(X_numerical_test.shape)

print(len(X_text_test))
print(len(X_text_test[2]))
# print(X_text_test[2])

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
57
57
57
torch.Size([57, 5, 8])
57
20


## Data loader

In [13]:
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

tokenizer = RobertaTokenizer.from_pretrained('roberta-large', truncation=True, do_lower_case=True)

class SiameseDataloader(Dataset):

    def __init__(self, X_numerical_train, y_train, X_text_train, MAX_LEN, tokenizer):
        self.X_numerical_train = X_numerical_train
        self.X_text_train = X_text_train
        self.MAX_LEN = MAX_LEN
        self.tokenizer = tokenizer

    def __getitem__(self, index):


        input_ids = []
        attention_masks = []
        token_type_ids = []
        input_seq = []

        for sent in X_text_train[index]:
            encoded_sent = self.tokenizer.encode_plus(
                text=sent,
                add_special_tokens=True,        # Add `[CLS]` and `[SEP]` special tokens
                max_length=self.MAX_LEN,             # Choose max length to truncate/pad
                pad_to_max_length=True,         # Pad sentence to max length
                #return_attention_mask=True      # Return attention mask
                return_token_type_ids=True
                )
            input_ids.append(encoded_sent.get('input_ids'))
            attention_masks.append(encoded_sent.get('attention_mask'))
            token_type_ids.append(encoded_sent.get('token_type_ids'))

        # Convert lists to tensors
        input_ids = torch.tensor(input_ids)
        attention_masks = torch.tensor(attention_masks)
        token_type_ids = torch.tensor(token_type_ids)


        return {
            'x_numerical': X_numerical_train[index],
            'ids': torch.tensor(input_ids, dtype=torch.long),
            'mask': torch.tensor(attention_masks, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'targets': torch.tensor(y_train[index], dtype=torch.long)
        }





    def __len__(self):
        return len(self.X_numerical_train)

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

In [14]:
train_set = SiameseDataloader(X_numerical_train, y_train, X_text_train, MAX_LEN, tokenizer)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=False)


test_set = SiameseDataloader(X_numerical_test, y_test, X_text_test, MAX_LEN, tokenizer)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)


## Build model


### lstm1, lstm2, roberta, concat, relu


In [15]:
import torch.nn as nn

class SiameseModel11(nn.Module):
    def __init__(self, input_dim1, input_dim2,
                 hidden_dim1, hidden_dim2, hidden_dim3, hidden_dim4,
                 num_layers1, num_layers2, output_dim1, output_dim2):
        super(SiameseModel11, self).__init__()
        self.input_dim1 = input_dim1
        self.input_dim2 = input_dim2
        self.hidden_dim1 = hidden_dim1
        self.hidden_dim2 = hidden_dim2
        self.hidden_dim3 = hidden_dim3
        self.hidden_dim4 = hidden_dim4
        self.num_layers1 = num_layers1
        self.num_layers2 = num_layers2
        self.output_dim1 = output_dim1
        self.output_dim2 = output_dim2

        self.roberta = RobertaModel.from_pretrained("roberta-large").to(device)

        self.lstm1 = nn.LSTM(input_dim1, hidden_dim1, num_layers1, batch_first=True)
        self.lstm2 = nn.LSTM(input_dim2, hidden_dim2, num_layers2, batch_first=True)

        self.fc1 = nn.Linear(hidden_dim1, output_dim1)
        self.fc2 = nn.Linear(hidden_dim2, output_dim2)
        self.fc3 = nn.Linear(output_dim1+output_dim2, hidden_dim3)
        self.fc4 = nn.Linear(hidden_dim3, hidden_dim4)
        self.fc5 = nn.Linear(hidden_dim4, 3)



    def forward(self, x1, ids, masks, token_type_ids):
        #left tower with numerical features
        h_10 = Variable(torch.zeros(self.num_layers1, x1.size(0), self.hidden_dim1)).to(device)
        c_10 = Variable(torch.zeros(self.num_layers1, x1.size(0), self.hidden_dim1)).to(device)
        ula1, (h_out1, _) = self.lstm1(x1, (h_10, c_10))
        h_out1 = h_out1.view(-1, self.hidden_dim1)
        out1 = self.fc1(h_out1)


        # right tower with roberta on textual features
        batch_size_here = ids.shape[0]
        e2 = torch.zeros(batch_size_here, max_text_per_iter,1024).to(device)

        for k in range(ids.shape[1]):
            seq_ids = ids[:,k,:]
            seq_masks = masks[:,k,:]
            seq_token_type_ids = token_type_ids[:,k,:]

            e2k = self.roberta(input_ids= seq_ids, attention_mask=seq_masks, token_type_ids=seq_token_type_ids)
            # first 0 is for last_hidden_state: https://huggingface.co/docs/transformers/main/en/model_doc/roberta#transformers.RobertaModel.forward.returns:~:text=transformers.modeling_outputs.-,BaseModelOutputWithPoolingAndCrossAttentions%20or%20tuple(torch.FloatTensor),-A%20transformers.modeling_outputs
            # the shape of e2k[0] is (batch_size, sequence_length (<=MAX_LEN), hidden_size (=1024))
            e2k1 = e2k[0][:, 0, :]
            e2[:,k,:] = e2k1


        print(e2.shape)
        h_20 = Variable(torch.zeros(self.num_layers2, e2.size(0), self.hidden_dim2)).to(device)
        c_20 = Variable(torch.zeros(self.num_layers2, e2.size(0), self.hidden_dim2)).to(device)
        ula2, (h_out2, _) = self.lstm2(e2, (h_20, c_20))
        h_out2 = h_out2.view(-1, self.hidden_dim2)
        out2 = self.fc2(h_out2)

        # siamese merging layers
        output = torch.cat((out1, out2),1)
        output = F.relu(self.fc3(output))
        output = F.relu(self.fc4(output))


        output = self.fc5(output)
        return output


### lstm1, mlp, relu

In [16]:
import torch.nn as nn

class SiameseModel10(nn.Module):
    def __init__(self, input_dim1, input_dim2,
                 hidden_dim1, hidden_dim2, hidden_dim3, hidden_dim4,
                 num_layers1, num_layers2, output_dim1, output_dim2):
        super(SiameseModel10, self).__init__()
        self.input_dim1 = input_dim1
        self.input_dim2 = input_dim2
        self.hidden_dim1 = hidden_dim1
        self.hidden_dim2 = hidden_dim2
        self.hidden_dim3 = hidden_dim3
        self.hidden_dim4 = hidden_dim4
        self.num_layers1 = num_layers1
        self.num_layers2 = num_layers2
        self.output_dim1 = output_dim1
        self.output_dim2 = output_dim2


        self.lstm1 = nn.LSTM(input_dim1, hidden_dim1, num_layers1, batch_first=True)

        self.fc1 = nn.Linear(hidden_dim1, output_dim1)
        self.fc3 = nn.Linear(output_dim1, hidden_dim3)
        self.fc4 = nn.Linear(hidden_dim3, hidden_dim4)
        self.fc5 = nn.Linear(hidden_dim4, 3)



    def forward(self, x1, ids, masks, token_type_ids):
        #left tower with numerical features
        h_10 = Variable(torch.zeros(self.num_layers1, x1.size(0), self.hidden_dim1)).to(device)
        c_10 = Variable(torch.zeros(self.num_layers1, x1.size(0), self.hidden_dim1)).to(device)
        ula1, (h_out1, _) = self.lstm1(x1, (h_10, c_10))
        h_out1 = h_out1.view(-1, self.hidden_dim1)
        out1 = self.fc1(h_out1)


        # mlp layers
        output = out1
        output = F.relu(self.fc3(output))
        output = F.relu(self.fc4(output))
        output = self.fc5(output)
        return output


### lstm1, mlp, lrelu

In [17]:
import torch.nn as nn

class SiameseModel10leaky(nn.Module):
    def __init__(self, input_dim1,
                 hidden_dim1, hidden_dim3, hidden_dim4,
                 num_layers1, output_dim1):
        super(SiameseModel10leaky, self).__init__()
        self.input_dim1 = input_dim1
        self.hidden_dim1 = hidden_dim1
        self.hidden_dim3 = hidden_dim3
        self.hidden_dim4 = hidden_dim4
        self.num_layers1 = num_layers1
        self.output_dim1 = output_dim1


        self.lstm1 = nn.LSTM(input_dim1, hidden_dim1, num_layers1, batch_first=True)

        self.fc1 = nn.Linear(hidden_dim1, output_dim1)
        self.fc3 = nn.Linear(output_dim1, hidden_dim3)
        # self.fc4 = nn.Linear(hidden_dim3, hidden_dim4)
        self.fc5 = nn.Linear(hidden_dim3, 3)
        self.lrelu = nn.LeakyReLU(0.1)



    def forward(self, x1, ids, masks, token_type_ids):
        #left tower with numerical features
        h_10 = Variable(torch.zeros(self.num_layers1, x1.size(0), self.hidden_dim1)).to(device)
        c_10 = Variable(torch.zeros(self.num_layers1, x1.size(0), self.hidden_dim1)).to(device)
        ula1, (h_out1, _) = self.lstm1(x1, (h_10, c_10))
        h_out1 = h_out1.view(-1, self.hidden_dim1)
        out1 = self.fc1(h_out1)


        # mlp layers
        output = out1
        output = self.lrelu(self.fc3(output))
        # output = self.lrelu(self.fc4(output))
        output = self.fc5(output)
        return output


### tranformer, mlp, relu


In [None]:
import torch.nn as nn

class SiameseModel20(nn.Module):
    def __init__(self, input_dim1, input_dim2,
                 hidden_dim1, hidden_dim2, hidden_dim3, hidden_dim4,
                 num_layers1, num_layers2, output_dim1, output_dim2):
        super(SiameseModel20, self).__init__()
        self.input_dim1 = input_dim1
        self.input_dim2 = input_dim2
        self.hidden_dim1 = hidden_dim1
        self.hidden_dim2 = hidden_dim2
        self.hidden_dim3 = hidden_dim3
        self.hidden_dim4 = hidden_dim4
        self.num_layers1 = num_layers1
        self.num_layers2 = num_layers2
        self.output_dim1 = output_dim1
        self.output_dim2 = output_dim2


        self.lstm1 = nn.LSTM(input_dim1, hidden_dim1, num_layers1, batch_first=True)


        self.fc1 = nn.Linear(hidden_dim1, output_dim1)
        self.fc3 = nn.Linear(output_dim1, hidden_dim3)
        self.fc4 = nn.Linear(hidden_dim3, hidden_dim4)
        self.fc5 = nn.Linear(hidden_dim4, 3)



    def forward(self, x1, ids, masks, token_type_ids):
        #left tower with numerical features
        h_10 = Variable(torch.zeros(self.num_layers1, x1.size(0), self.hidden_dim1)).to(device)
        c_10 = Variable(torch.zeros(self.num_layers1, x1.size(0), self.hidden_dim1)).to(device)
        ula1, (h_out1, _) = self.lstm1(x1, (h_10, c_10))
        h_out1 = h_out1.view(-1, self.hidden_dim1)
        out1 = self.fc1(h_out1)

        # mlp  layers

        output = out1
        output = F.relu(self.fc3(output))
        output = F.relu(self.fc4(output))
        output = self.fc5(output)
        return output








### Choose a model

In [18]:
# model = SiameseModel11(input_dim1 = 8, input_dim2 = 1024,
#                  hidden_dim1 = 20, hidden_dim2 = 768, hidden_dim3 = 10, hidden_dim4 = 8,
#                  num_layers1 = 1, num_layers2 = 1, output_dim1 = 10, output_dim2 = 256).to(device)

model = SiameseModel10leaky(input_dim1 = 8,
                 hidden_dim1 = 20, output_dim1 = 10, hidden_dim3 = 10, hidden_dim4 = 8,
                 num_layers1 = 1).to(device)

# model = SiameseModel20(input_dim1 = 8, input_dim2 = 1024,
#                  hidden_dim1 = 20, hidden_dim2 = 768, hidden_dim3 = 10, hidden_dim4 = 8,
#                  num_layers1 = 1, num_layers2 = 1, output_dim1 = 10, output_dim2 = 256).to(device)

# model = SiameseModel10(input_dim1 = 8, input_dim2 = 1024,
#                  hidden_dim1 = 20, hidden_dim2 = 768, hidden_dim3 = 10, hidden_dim4 = 8,
#                  num_layers1 = 1, num_layers2 = 1, output_dim1 = 10, output_dim2 = 256).to(device)

'''
SiameseModel10
SiameseModel10leaky
SiameseModel20
'''

print(model)
print(len(list(model.parameters())))



SiameseModel10leaky(
  (lstm1): LSTM(8, 20, batch_first=True)
  (fc1): Linear(in_features=20, out_features=10, bias=True)
  (fc3): Linear(in_features=10, out_features=10, bias=True)
  (fc5): Linear(in_features=10, out_features=3, bias=True)
  (lrelu): LeakyReLU(negative_slope=0.1)
)
10


## Train model

In [19]:
criterion = nn.CrossEntropyLoss()
optimiser = torch.optim.Adam(model.parameters(), lr=0.0001)
loss_arr = np.zeros(num_epochs)

In [None]:
wandb.init(project="stock_prediction")

In [20]:
# roberta = RobertaModel.from_pretrained("roberta-large").to(device)
train_loss_record = []
wandb.watch(model, log = 'all')
for epoch in range(num_epochs):
    train_loss = []
    train_loss_sum = []
    train_correct = 0
    train_total = 0

    for idx, data in tqdm(enumerate(train_loader, 0)):
        x_numerical = data['x_numerical'].to(device, dtype = torch.float)
        ids = data['ids'].to(device, dtype = torch.long)
        masks = data['mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        targets = data['targets'].to(device, dtype = torch.long)




        y_pred = model(x_numerical, ids, masks, token_type_ids)
        # print('y_pred:', y_pred)
        _, pred_label = torch.max(y_pred.data, 1)

        loss = criterion(y_pred, targets.reshape(-1))

         # Zero out gradient, else they will accumulate between epochs
        optimiser.zero_grad()

        # Backward pass
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1)

        for name, p in model.named_parameters():
            if(p.requires_grad) and ("bias" not in name):
                if p.grad is not None:
                    wandb.log({'gradient_' + name: p.grad.abs().mean().cpu().data.numpy()})
                else:
                    wandb.log({'gradient_' + name: 0})


        # plot_grad_flow(model.named_parameters(), idx, epoch) # version 1

        # Update parameters
        optimiser.step()



        train_loss.append(loss.data.cpu())
        train_loss_sum.append(loss.data.cpu())


        wandb.log({'avg train loss in this batch': loss.item(), 'epoch': epoch, 'batch_id': idx})
        # Get accuracy
        train_total += targets.reshape(-1).size(0)
        train_correct += (pred_label == targets.reshape(-1)).sum()
    train_accuracy = 100 * train_correct / train_total

    # Record at every epoch
    print('Train Loss at epoch {}: {}\n'.format(epoch, np.mean(train_loss_sum)))
    train_loss_record.append(np.mean(train_loss_sum))
    wandb.log({'avg train loss in this epoch': np.mean(train_loss_sum), 'epoch': epoch})
    wandb.log({'train accuracy in this epoch': train_accuracy, 'epoch': epoch})






    # evaluate on test set every epoch
    test_loss = []
    test_loss_sum = []

    correct = 0
    total = 0

    for idx, data in tqdm(enumerate(test_loader, 0)):
        test_x_numerical = data['x_numerical'].to(device, dtype = torch.float)
        test_ids = data['ids'].to(device, dtype = torch.long)
        test_masks = data['mask'].to(device, dtype = torch.long)
        test_token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        test_targets = data['targets'].to(device, dtype = torch.long)

        y_pred = model(test_x_numerical, test_ids, test_masks, test_token_type_ids)
        _, pred_label = torch.max(y_pred.data, 1)

#         print('y_pred:', y_pred)
        tloss = criterion(y_pred, test_targets.reshape(-1))

        test_loss.append(tloss.data.cpu())
        test_loss_sum.append(tloss.data.cpu())

        wandb.log({'avg test loss in this batch': tloss.item(), 'epoch': epoch, 'batch_id': idx})

        # Get accuracy
        total += test_targets.reshape(-1).size(0)
        correct += (pred_label == test_targets.reshape(-1)).sum()
    accuracy = 100 * correct / total

    # Record at every epoch
    print('test Loss at epoch {}: {}\n'.format(epoch, np.mean(test_loss_sum)))
    wandb.log({'avg test loss in this epoch': np.mean(test_loss_sum), 'epoch': epoch})
    wandb.log({'test accuracy in this epoch': accuracy, 'epoch': epoch})











0it [00:00, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
  'ids': torch.tensor(input_ids, dtype=torch.long),
  'mask': torch.tensor(attention_masks, dtype=torch.long),
  'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
  'targets': torch.tensor(y_train[index], dtype=torch.long)
78it [00:09,  8.19it/s]


Train Loss at epoch 0: 1.070517659187317



8it [00:00, 16.75it/s]


test Loss at epoch 0: 1.0928128957748413



78it [00:05, 13.14it/s]


Train Loss at epoch 1: 1.0628247261047363



8it [00:00, 16.54it/s]


test Loss at epoch 1: 1.0911345481872559



78it [00:08,  8.80it/s]


Train Loss at epoch 2: 1.0555905103683472



8it [00:00, 16.48it/s]


test Loss at epoch 2: 1.0902132987976074



78it [00:05, 13.36it/s]


Train Loss at epoch 3: 1.0488941669464111



8it [00:00, 16.59it/s]


test Loss at epoch 3: 1.090166687965393



78it [00:08,  9.01it/s]


Train Loss at epoch 4: 1.0430463552474976



8it [00:00, 16.75it/s]


test Loss at epoch 4: 1.0909204483032227



78it [00:05, 13.37it/s]


Train Loss at epoch 5: 1.0381989479064941



8it [00:00, 16.01it/s]


test Loss at epoch 5: 1.092613697052002



78it [00:08,  8.97it/s]


Train Loss at epoch 6: 1.0341769456863403



8it [00:00, 16.60it/s]


test Loss at epoch 6: 1.0951330661773682



78it [00:05, 13.21it/s]


Train Loss at epoch 7: 1.0310696363449097



8it [00:00, 16.14it/s]


test Loss at epoch 7: 1.097749948501587



78it [00:08,  8.93it/s]


Train Loss at epoch 8: 1.029147982597351



8it [00:00, 16.52it/s]


test Loss at epoch 8: 1.1002439260482788



78it [00:05, 13.37it/s]


Train Loss at epoch 9: 1.027945637702942



8it [00:00, 15.84it/s]


test Loss at epoch 9: 1.1026464700698853



78it [00:08,  9.15it/s]


Train Loss at epoch 10: 1.027143955230713



8it [00:00, 10.24it/s]


test Loss at epoch 10: 1.1047937870025635



78it [00:06, 12.59it/s]


Train Loss at epoch 11: 1.026614785194397



8it [00:00, 15.66it/s]


test Loss at epoch 11: 1.106650948524475



78it [00:09,  8.09it/s]


Train Loss at epoch 12: 1.0262744426727295



8it [00:00, 13.55it/s]


test Loss at epoch 12: 1.108187198638916



78it [00:07, 10.76it/s]


Train Loss at epoch 13: 1.0260558128356934



8it [00:00, 14.61it/s]


test Loss at epoch 13: 1.1094985008239746



78it [00:10,  7.58it/s]


Train Loss at epoch 14: 1.0259184837341309



8it [00:00, 15.02it/s]


test Loss at epoch 14: 1.1104974746704102



78it [00:07, 10.89it/s]


Train Loss at epoch 15: 1.0258288383483887



8it [00:00, 13.21it/s]


test Loss at epoch 15: 1.1112946271896362



78it [00:10,  7.70it/s]


Train Loss at epoch 16: 1.025758147239685



8it [00:00, 15.33it/s]


test Loss at epoch 16: 1.111972689628601



78it [00:08,  9.42it/s]


Train Loss at epoch 17: 1.0257216691970825



8it [00:01,  7.94it/s]


test Loss at epoch 17: 1.1125013828277588



78it [00:08,  9.03it/s]


Train Loss at epoch 18: 1.0256972312927246



8it [00:00, 14.81it/s]


test Loss at epoch 18: 1.1129863262176514



78it [00:09,  8.05it/s]


Train Loss at epoch 19: 1.025653600692749



8it [00:00,  8.28it/s]


test Loss at epoch 19: 1.113291621208191



78it [00:07, 10.72it/s]


Train Loss at epoch 20: 1.0256363153457642



8it [00:00, 14.65it/s]


test Loss at epoch 20: 1.11360764503479



78it [00:10,  7.70it/s]


Train Loss at epoch 21: 1.0256078243255615



8it [00:00, 14.64it/s]


test Loss at epoch 21: 1.1138262748718262



78it [00:07, 10.81it/s]


Train Loss at epoch 22: 1.025590181350708



8it [00:00, 14.44it/s]


test Loss at epoch 22: 1.113981008529663



78it [00:10,  7.66it/s]


Train Loss at epoch 23: 1.0255801677703857



8it [00:00, 14.91it/s]


test Loss at epoch 23: 1.1141331195831299



78it [00:07, 10.02it/s]


Train Loss at epoch 24: 1.025568962097168



8it [00:00,  8.12it/s]


test Loss at epoch 24: 1.1142398118972778



78it [00:09,  8.64it/s]


Train Loss at epoch 25: 1.0255539417266846



8it [00:00, 14.70it/s]


test Loss at epoch 25: 1.114321231842041



78it [00:09,  8.46it/s]


Train Loss at epoch 26: 1.0255411863327026



8it [00:00,  8.40it/s]


test Loss at epoch 26: 1.1143815517425537



78it [00:07, 10.08it/s]


Train Loss at epoch 27: 1.025530219078064



8it [00:00, 15.00it/s]


test Loss at epoch 27: 1.1144338846206665



78it [00:10,  7.60it/s]


Train Loss at epoch 28: 1.0255156755447388



8it [00:00, 15.41it/s]


test Loss at epoch 28: 1.114454746246338



78it [00:07, 10.76it/s]


Train Loss at epoch 29: 1.0255122184753418



8it [00:00, 15.01it/s]


test Loss at epoch 29: 1.1145050525665283



78it [00:10,  7.65it/s]


Train Loss at epoch 30: 1.025489091873169



8it [00:00, 15.04it/s]


test Loss at epoch 30: 1.1144871711730957



78it [00:07, 10.84it/s]


Train Loss at epoch 31: 1.0254861116409302



8it [00:00,  8.22it/s]


test Loss at epoch 31: 1.1145316362380981



78it [00:09,  7.97it/s]


Train Loss at epoch 32: 1.0254679918289185



8it [00:00, 14.98it/s]


test Loss at epoch 32: 1.114511489868164



78it [00:08,  8.80it/s]


Train Loss at epoch 33: 1.025457501411438



8it [00:00,  8.32it/s]


test Loss at epoch 33: 1.1145209074020386



78it [00:07,  9.94it/s]


Train Loss at epoch 34: 1.0254473686218262



8it [00:00, 15.36it/s]


test Loss at epoch 34: 1.1145222187042236



78it [00:10,  7.66it/s]


Train Loss at epoch 35: 1.0254391431808472



8it [00:00, 14.98it/s]


test Loss at epoch 35: 1.1145260334014893



78it [00:07, 10.93it/s]


Train Loss at epoch 36: 1.0254290103912354



8it [00:00, 14.66it/s]


test Loss at epoch 36: 1.1145236492156982



78it [00:10,  7.62it/s]


Train Loss at epoch 37: 1.0254175662994385



8it [00:00, 14.47it/s]


test Loss at epoch 37: 1.1145151853561401



78it [00:07, 10.83it/s]


Train Loss at epoch 38: 1.0254076719284058



8it [00:00, 10.71it/s]


test Loss at epoch 38: 1.1145057678222656



78it [00:09,  7.82it/s]


Train Loss at epoch 39: 1.0253976583480835



8it [00:00, 15.48it/s]


test Loss at epoch 39: 1.114500880241394



78it [00:08,  9.32it/s]


Train Loss at epoch 40: 1.0253866910934448



8it [00:00,  8.02it/s]


test Loss at epoch 40: 1.1144886016845703



78it [00:08,  9.15it/s]


Train Loss at epoch 41: 1.0253756046295166



8it [00:00, 14.28it/s]


test Loss at epoch 41: 1.1144719123840332



78it [00:10,  7.79it/s]


Train Loss at epoch 42: 1.0253667831420898



8it [00:00, 10.57it/s]


test Loss at epoch 42: 1.1144664287567139



78it [00:07, 10.91it/s]


Train Loss at epoch 43: 1.0253559350967407



8it [00:00, 15.26it/s]


test Loss at epoch 43: 1.114452600479126



78it [00:10,  7.66it/s]


Train Loss at epoch 44: 1.0253479480743408



8it [00:00, 14.87it/s]


test Loss at epoch 44: 1.1144459247589111



78it [00:07, 10.85it/s]


Train Loss at epoch 45: 1.0253371000289917



8it [00:00, 14.65it/s]


test Loss at epoch 45: 1.1144300699234009



78it [00:10,  7.67it/s]


Train Loss at epoch 46: 1.0253291130065918



8it [00:00, 14.90it/s]


test Loss at epoch 46: 1.1144226789474487



78it [00:08,  9.39it/s]


Train Loss at epoch 47: 1.0253188610076904



8it [00:01,  7.95it/s]


test Loss at epoch 47: 1.1144113540649414



78it [00:08,  9.05it/s]


Train Loss at epoch 48: 1.0253106355667114



8it [00:00, 15.13it/s]


test Loss at epoch 48: 1.1143972873687744



78it [00:10,  7.79it/s]


Train Loss at epoch 49: 1.0253026485443115



8it [00:00,  9.78it/s]


test Loss at epoch 49: 1.1143940687179565



78it [00:07, 10.95it/s]


Train Loss at epoch 50: 1.0252946615219116



8it [00:00, 15.21it/s]


test Loss at epoch 50: 1.1143798828125



78it [00:10,  7.65it/s]


Train Loss at epoch 51: 1.025286316871643



8it [00:00, 14.98it/s]


test Loss at epoch 51: 1.1143689155578613



78it [00:07, 10.89it/s]


Train Loss at epoch 52: 1.0252785682678223



8it [00:00, 15.14it/s]


test Loss at epoch 52: 1.1143498420715332



78it [00:10,  7.65it/s]


Train Loss at epoch 53: 1.0252653360366821



8it [00:00, 14.84it/s]


test Loss at epoch 53: 1.1143391132354736



78it [00:08,  9.55it/s]


Train Loss at epoch 54: 1.0252591371536255



8it [00:00,  8.14it/s]


test Loss at epoch 54: 1.1143252849578857



78it [00:09,  8.62it/s]


Train Loss at epoch 55: 1.0252487659454346



8it [00:00, 13.95it/s]


test Loss at epoch 55: 1.114306926727295



78it [00:09,  8.17it/s]


Train Loss at epoch 56: 1.0252416133880615



8it [00:00,  8.67it/s]


test Loss at epoch 56: 1.1143038272857666



78it [00:07, 10.47it/s]


Train Loss at epoch 57: 1.0252225399017334



8it [00:00, 14.17it/s]


test Loss at epoch 57: 1.1142692565917969



78it [00:10,  7.65it/s]


Train Loss at epoch 58: 1.0252149105072021



8it [00:00, 14.61it/s]


test Loss at epoch 58: 1.1142641305923462



78it [00:07, 10.79it/s]


Train Loss at epoch 59: 1.0252066850662231



8it [00:00, 14.79it/s]


test Loss at epoch 59: 1.114253282546997



78it [00:10,  7.56it/s]


Train Loss at epoch 60: 1.0251973867416382



8it [00:00, 14.42it/s]


test Loss at epoch 60: 1.1142414808273315



78it [00:07, 10.32it/s]


Train Loss at epoch 61: 1.0251874923706055



8it [00:00,  8.06it/s]


test Loss at epoch 61: 1.1142303943634033



78it [00:09,  8.33it/s]


Train Loss at epoch 62: 1.0251715183258057



8it [00:00, 14.63it/s]


test Loss at epoch 62: 1.1142147779464722



78it [00:09,  8.54it/s]


Train Loss at epoch 63: 1.0251644849777222



8it [00:00,  8.18it/s]


test Loss at epoch 63: 1.114180564880371



78it [00:07,  9.83it/s]


Train Loss at epoch 64: 1.0251532793045044



8it [00:00, 14.50it/s]


test Loss at epoch 64: 1.114195704460144



78it [00:10,  7.66it/s]


Train Loss at epoch 65: 1.0251387357711792



8it [00:00, 14.44it/s]


test Loss at epoch 65: 1.1141769886016846



78it [00:07, 10.91it/s]


Train Loss at epoch 66: 1.025134801864624



8it [00:00, 14.70it/s]


test Loss at epoch 66: 1.1141715049743652



78it [00:10,  7.57it/s]


Train Loss at epoch 67: 1.0251166820526123



8it [00:00, 14.94it/s]


test Loss at epoch 67: 1.1141527891159058



78it [00:07, 10.57it/s]


Train Loss at epoch 68: 1.0251096487045288



8it [00:01,  7.87it/s]


test Loss at epoch 68: 1.1141228675842285



78it [00:09,  8.24it/s]


Train Loss at epoch 69: 1.025094747543335



8it [00:00, 14.41it/s]


test Loss at epoch 69: 1.1141126155853271



78it [00:09,  8.64it/s]


Train Loss at epoch 70: 1.025086760520935



8it [00:00,  8.16it/s]


test Loss at epoch 70: 1.1141235828399658



78it [00:07,  9.84it/s]


Train Loss at epoch 71: 1.0250784158706665



8it [00:00, 14.58it/s]


test Loss at epoch 71: 1.1141095161437988



78it [00:10,  7.66it/s]


Train Loss at epoch 72: 1.025065541267395



8it [00:00, 14.74it/s]


test Loss at epoch 72: 1.114095687866211



78it [00:07, 10.58it/s]


Train Loss at epoch 73: 1.025050401687622



8it [00:00, 14.43it/s]


test Loss at epoch 73: 1.1140823364257812



78it [00:10,  7.69it/s]


Train Loss at epoch 74: 1.025038719177246



8it [00:00, 14.07it/s]


test Loss at epoch 74: 1.1140531301498413



78it [00:07, 10.29it/s]


Train Loss at epoch 75: 1.0250252485275269



8it [00:00,  8.39it/s]


test Loss at epoch 75: 1.1140409708023071



78it [00:09,  8.29it/s]


Train Loss at epoch 76: 1.0250111818313599



8it [00:00, 14.96it/s]


test Loss at epoch 76: 1.1140282154083252



78it [00:09,  8.51it/s]


Train Loss at epoch 77: 1.0249969959259033



8it [00:00,  8.15it/s]


test Loss at epoch 77: 1.114014744758606



78it [00:07,  9.90it/s]


Train Loss at epoch 78: 1.0249823331832886



8it [00:00, 14.33it/s]


test Loss at epoch 78: 1.1140005588531494



78it [00:10,  7.60it/s]


Train Loss at epoch 79: 1.0249674320220947



8it [00:00, 14.48it/s]


test Loss at epoch 79: 1.1139726638793945



78it [00:07, 10.55it/s]


Train Loss at epoch 80: 1.0249532461166382



8it [00:00, 14.44it/s]


test Loss at epoch 80: 1.1139968633651733



78it [00:10,  7.49it/s]


Train Loss at epoch 81: 1.0249348878860474



8it [00:00, 14.68it/s]


test Loss at epoch 81: 1.113943338394165



78it [00:07, 10.04it/s]


Train Loss at epoch 82: 1.0249207019805908



8it [00:01,  7.80it/s]


test Loss at epoch 82: 1.1139564514160156



78it [00:09,  8.15it/s]


Train Loss at epoch 83: 1.0249029397964478



8it [00:00, 14.28it/s]


test Loss at epoch 83: 1.1139386892318726



78it [00:09,  8.24it/s]


Train Loss at epoch 84: 1.0248839855194092



8it [00:00,  8.26it/s]


test Loss at epoch 84: 1.1138780117034912



78it [00:07,  9.96it/s]


Train Loss at epoch 85: 1.0248751640319824



8it [00:00, 13.88it/s]


test Loss at epoch 85: 1.1139321327209473



78it [00:10,  7.44it/s]


Train Loss at epoch 86: 1.02485191822052



8it [00:00, 14.17it/s]


test Loss at epoch 86: 1.1138578653335571



78it [00:07, 10.52it/s]


Train Loss at epoch 87: 1.0248432159423828



8it [00:00, 14.25it/s]


test Loss at epoch 87: 1.1139098405838013



78it [00:10,  7.43it/s]


Train Loss at epoch 88: 1.024810552597046



8it [00:00, 14.80it/s]


test Loss at epoch 88: 1.1138124465942383



78it [00:08,  9.33it/s]


Train Loss at epoch 89: 1.0248034000396729



8it [00:00,  8.05it/s]


test Loss at epoch 89: 1.113884449005127



78it [00:09,  8.55it/s]


Train Loss at epoch 90: 1.0247687101364136



8it [00:00, 14.29it/s]


test Loss at epoch 90: 1.1137781143188477



78it [00:09,  7.94it/s]


Train Loss at epoch 91: 1.0247634649276733



8it [00:00,  8.24it/s]


test Loss at epoch 91: 1.1137983798980713



78it [00:07, 10.20it/s]


Train Loss at epoch 92: 1.0247431993484497



8it [00:00, 14.49it/s]


test Loss at epoch 92: 1.1137886047363281



78it [00:10,  7.40it/s]


Train Loss at epoch 93: 1.024720311164856



8it [00:00, 14.79it/s]


test Loss at epoch 93: 1.1137700080871582



78it [00:07, 10.65it/s]


Train Loss at epoch 94: 1.0246998071670532



8it [00:00, 14.41it/s]


test Loss at epoch 94: 1.1137633323669434



78it [00:10,  7.44it/s]


Train Loss at epoch 95: 1.0246775150299072



8it [00:00, 15.20it/s]


test Loss at epoch 95: 1.113752841949463



78it [00:08,  9.43it/s]


Train Loss at epoch 96: 1.0246552228927612



8it [00:01,  7.86it/s]


test Loss at epoch 96: 1.1137316226959229



78it [00:09,  8.53it/s]


Train Loss at epoch 97: 1.0246318578720093



8it [00:00, 14.59it/s]


test Loss at epoch 97: 1.1137176752090454



78it [00:10,  7.54it/s]


Train Loss at epoch 98: 1.02460777759552



8it [00:00, 10.09it/s]


test Loss at epoch 98: 1.1137042045593262



78it [00:07, 10.47it/s]


Train Loss at epoch 99: 1.0245838165283203



8it [00:00, 14.42it/s]


test Loss at epoch 99: 1.1136887073516846



78it [00:10,  7.40it/s]


Train Loss at epoch 100: 1.0245599746704102



8it [00:00, 14.36it/s]


test Loss at epoch 100: 1.1136715412139893



78it [00:07, 10.38it/s]


Train Loss at epoch 101: 1.024533748626709



8it [00:00,  8.34it/s]


test Loss at epoch 101: 1.113620400428772



78it [00:10,  7.79it/s]


Train Loss at epoch 102: 1.024515151977539



8it [00:00, 14.10it/s]


test Loss at epoch 102: 1.1136138439178467



78it [00:08,  8.74it/s]


Train Loss at epoch 103: 1.0244848728179932



8it [00:00,  8.02it/s]


test Loss at epoch 103: 1.1135773658752441



78it [00:08,  9.19it/s]


Train Loss at epoch 104: 1.0244606733322144



8it [00:00, 13.77it/s]


test Loss at epoch 104: 1.1135727167129517



78it [00:10,  7.38it/s]


Train Loss at epoch 105: 1.0244301557540894



8it [00:00, 14.36it/s]


test Loss at epoch 105: 1.1135456562042236



78it [00:07, 10.59it/s]


Train Loss at epoch 106: 1.0244040489196777



8it [00:00, 14.18it/s]


test Loss at epoch 106: 1.1135385036468506



78it [00:10,  7.48it/s]


Train Loss at epoch 107: 1.0243749618530273



8it [00:00, 13.85it/s]


test Loss at epoch 107: 1.113525152206421



78it [00:07, 10.13it/s]


Train Loss at epoch 108: 1.0243418216705322



8it [00:01,  7.91it/s]


test Loss at epoch 108: 1.1135070323944092



78it [00:09,  8.11it/s]


Train Loss at epoch 109: 1.024316668510437



8it [00:00, 14.44it/s]


test Loss at epoch 109: 1.1135015487670898



78it [00:09,  8.50it/s]


Train Loss at epoch 110: 1.024288535118103



8it [00:00,  8.32it/s]


test Loss at epoch 110: 1.1134765148162842



78it [00:08,  9.52it/s]


Train Loss at epoch 111: 1.024251937866211



8it [00:00, 14.52it/s]


test Loss at epoch 111: 1.113461971282959



78it [00:10,  7.53it/s]


Train Loss at epoch 112: 1.0242252349853516



8it [00:00, 14.02it/s]


test Loss at epoch 112: 1.113448143005371



78it [00:07, 10.72it/s]


Train Loss at epoch 113: 1.0241894721984863



8it [00:00, 14.96it/s]


test Loss at epoch 113: 1.113417148590088



78it [00:10,  7.45it/s]


Train Loss at epoch 114: 1.0241609811782837



8it [00:00, 14.40it/s]


test Loss at epoch 114: 1.1134047508239746



78it [00:07, 10.75it/s]


Train Loss at epoch 115: 1.0241252183914185



8it [00:00,  8.79it/s]


test Loss at epoch 115: 1.1133780479431152



78it [00:09,  7.86it/s]


Train Loss at epoch 116: 1.024094581604004



8it [00:00, 14.18it/s]


test Loss at epoch 116: 1.113365888595581



78it [00:08,  8.78it/s]


Train Loss at epoch 117: 1.0240575075149536



8it [00:01,  7.98it/s]


test Loss at epoch 117: 1.1133379936218262



78it [00:08,  9.42it/s]


Train Loss at epoch 118: 1.0240230560302734



8it [00:00, 14.43it/s]


test Loss at epoch 118: 1.1133124828338623



78it [00:10,  7.69it/s]


Train Loss at epoch 119: 1.023987889289856



8it [00:00, 13.83it/s]


test Loss at epoch 119: 1.1132882833480835



78it [00:07, 10.69it/s]


Train Loss at epoch 120: 1.023956298828125



8it [00:00, 14.53it/s]


test Loss at epoch 120: 1.1132792234420776



78it [00:10,  7.65it/s]


Train Loss at epoch 121: 1.0239156484603882



8it [00:00, 14.96it/s]


test Loss at epoch 121: 1.1132519245147705



78it [00:07, 10.79it/s]


Train Loss at epoch 122: 1.023879051208496



8it [00:00, 12.86it/s]


test Loss at epoch 122: 1.1132280826568604



78it [00:10,  7.67it/s]


Train Loss at epoch 123: 1.0238416194915771



8it [00:00, 14.88it/s]


test Loss at epoch 123: 1.1132051944732666



78it [00:08,  9.16it/s]


Train Loss at epoch 124: 1.0238014459609985



8it [00:00,  8.00it/s]


test Loss at epoch 124: 1.1131806373596191



78it [00:08,  9.07it/s]


Train Loss at epoch 125: 1.0237606763839722



8it [00:00, 14.93it/s]


test Loss at epoch 125: 1.1131205558776855



78it [00:09,  7.93it/s]


Train Loss at epoch 126: 1.023730754852295



8it [00:00,  8.53it/s]


test Loss at epoch 126: 1.1131165027618408



78it [00:07, 10.70it/s]


Train Loss at epoch 127: 1.0236916542053223



8it [00:00, 14.08it/s]


test Loss at epoch 127: 1.1130943298339844



78it [00:10,  7.64it/s]


Train Loss at epoch 128: 1.0236437320709229



8it [00:00, 14.04it/s]


test Loss at epoch 128: 1.1130545139312744



78it [00:07, 10.58it/s]


Train Loss at epoch 129: 1.023602843284607



8it [00:00, 14.63it/s]


test Loss at epoch 129: 1.1130375862121582



78it [00:10,  7.53it/s]


Train Loss at epoch 130: 1.0235625505447388



8it [00:00, 14.50it/s]


test Loss at epoch 130: 1.1130036115646362



78it [00:08,  9.20it/s]


Train Loss at epoch 131: 1.0235201120376587



8it [00:00,  8.21it/s]


test Loss at epoch 131: 1.1129887104034424



78it [00:08,  9.00it/s]


Train Loss at epoch 132: 1.0234787464141846



8it [00:00, 14.11it/s]


test Loss at epoch 132: 1.112941026687622



78it [00:10,  7.53it/s]


Train Loss at epoch 133: 1.0234516859054565



8it [00:00, 11.76it/s]


test Loss at epoch 133: 1.1129933595657349



78it [00:07, 10.58it/s]


Train Loss at epoch 134: 1.023369312286377



8it [00:00, 14.61it/s]


test Loss at epoch 134: 1.1129157543182373



78it [00:10,  7.49it/s]


Train Loss at epoch 135: 1.0233601331710815



8it [00:00, 13.32it/s]


test Loss at epoch 135: 1.1129131317138672



78it [00:07, 10.42it/s]


Train Loss at epoch 136: 1.0233062505722046



8it [00:00,  8.09it/s]


test Loss at epoch 136: 1.1128690242767334



78it [00:09,  7.86it/s]


Train Loss at epoch 137: 1.0232709646224976



8it [00:00, 14.23it/s]


test Loss at epoch 137: 1.1128634214401245



78it [00:09,  8.54it/s]


Train Loss at epoch 138: 1.0232062339782715



8it [00:01,  7.85it/s]


test Loss at epoch 138: 1.1127920150756836



78it [00:08,  9.35it/s]


Train Loss at epoch 139: 1.023183822631836



8it [00:00, 14.80it/s]


test Loss at epoch 139: 1.1128206253051758



78it [00:10,  7.46it/s]


Train Loss at epoch 140: 1.0231170654296875



8it [00:00, 14.74it/s]


test Loss at epoch 140: 1.1127581596374512



78it [00:07, 10.61it/s]


Train Loss at epoch 141: 1.0230872631072998



8it [00:00, 14.83it/s]


test Loss at epoch 141: 1.1127493381500244



78it [00:10,  7.45it/s]


Train Loss at epoch 142: 1.0230463743209839



8it [00:00, 14.32it/s]


test Loss at epoch 142: 1.1127605438232422



78it [00:07, 10.68it/s]


Train Loss at epoch 143: 1.0229747295379639



8it [00:01,  7.90it/s]


test Loss at epoch 143: 1.1126881837844849



78it [00:09,  8.13it/s]


Train Loss at epoch 144: 1.022955060005188



8it [00:00, 14.87it/s]


test Loss at epoch 144: 1.11271071434021



78it [00:08,  8.95it/s]


Train Loss at epoch 145: 1.0228835344314575



8it [00:00,  8.14it/s]


test Loss at epoch 145: 1.112659215927124



78it [00:08,  9.54it/s]


Train Loss at epoch 146: 1.0228509902954102



8it [00:00, 13.89it/s]


test Loss at epoch 146: 1.112666368484497



78it [00:10,  7.60it/s]


Train Loss at epoch 147: 1.0227900743484497



8it [00:00, 14.57it/s]


test Loss at epoch 147: 1.112607479095459



78it [00:07, 10.81it/s]


Train Loss at epoch 148: 1.022752046585083



8it [00:00, 13.95it/s]


test Loss at epoch 148: 1.112576961517334



78it [00:10,  7.60it/s]


Train Loss at epoch 149: 1.0227023363113403



8it [00:00, 14.57it/s]


test Loss at epoch 149: 1.112557053565979



78it [00:07, 10.85it/s]


Train Loss at epoch 150: 1.0226540565490723



8it [00:00, 10.70it/s]


test Loss at epoch 150: 1.1125354766845703



78it [00:09,  7.81it/s]


Train Loss at epoch 151: 1.0226054191589355



8it [00:00, 15.19it/s]


test Loss at epoch 151: 1.1125123500823975



78it [00:08,  9.15it/s]


Train Loss at epoch 152: 1.022559642791748



8it [00:01,  7.97it/s]


test Loss at epoch 152: 1.1124920845031738



78it [00:08,  9.18it/s]


Train Loss at epoch 153: 1.022513747215271



8it [00:00, 15.00it/s]


test Loss at epoch 153: 1.1124577522277832



78it [00:09,  7.87it/s]


Train Loss at epoch 154: 1.0224593877792358



8it [00:00,  9.17it/s]


test Loss at epoch 154: 1.112405776977539



78it [00:07, 10.75it/s]


Train Loss at epoch 155: 1.022411584854126



8it [00:00, 13.91it/s]


test Loss at epoch 155: 1.1123815774917603



78it [00:10,  7.61it/s]


Train Loss at epoch 156: 1.0223559141159058



8it [00:00, 13.88it/s]


test Loss at epoch 156: 1.1123371124267578



78it [00:07, 10.90it/s]


Train Loss at epoch 157: 1.0223041772842407



8it [00:00, 15.08it/s]


test Loss at epoch 157: 1.1123309135437012



78it [00:10,  7.54it/s]


Train Loss at epoch 158: 1.0222465991973877



8it [00:00, 14.46it/s]


test Loss at epoch 158: 1.1123031377792358



78it [00:08,  9.56it/s]


Train Loss at epoch 159: 1.0221925973892212



8it [00:01,  7.92it/s]


test Loss at epoch 159: 1.112273931503296



78it [00:08,  8.91it/s]


Train Loss at epoch 160: 1.022137999534607



8it [00:00, 14.66it/s]


test Loss at epoch 160: 1.1122503280639648



78it [00:09,  8.07it/s]


Train Loss at epoch 161: 1.0220837593078613



8it [00:00,  8.16it/s]


test Loss at epoch 161: 1.112245798110962



78it [00:07, 10.72it/s]


Train Loss at epoch 162: 1.0220403671264648



8it [00:00, 13.98it/s]


test Loss at epoch 162: 1.1121917963027954



78it [00:10,  7.70it/s]


Train Loss at epoch 163: 1.0219732522964478



8it [00:00, 14.78it/s]


test Loss at epoch 163: 1.1121779680252075



78it [00:07, 10.77it/s]


Train Loss at epoch 164: 1.0219203233718872



8it [00:00, 14.54it/s]


test Loss at epoch 164: 1.112137794494629



78it [00:10,  7.62it/s]


Train Loss at epoch 165: 1.0218647718429565



8it [00:00, 14.96it/s]


test Loss at epoch 165: 1.112095594406128



78it [00:07, 10.26it/s]


Train Loss at epoch 166: 1.0218061208724976



8it [00:01,  7.70it/s]


test Loss at epoch 166: 1.1120684146881104



78it [00:09,  8.50it/s]


Train Loss at epoch 167: 1.0217398405075073



8it [00:00, 14.98it/s]


test Loss at epoch 167: 1.1120634078979492



78it [00:09,  8.63it/s]


Train Loss at epoch 168: 1.021680235862732



8it [00:01,  7.91it/s]


test Loss at epoch 168: 1.1120250225067139



78it [00:07,  9.95it/s]


Train Loss at epoch 169: 1.0216227769851685



8it [00:00, 14.78it/s]


test Loss at epoch 169: 1.111994743347168



78it [00:10,  7.55it/s]


Train Loss at epoch 170: 1.021575689315796



8it [00:00, 14.46it/s]


test Loss at epoch 170: 1.1119003295898438



78it [00:07, 10.74it/s]


Train Loss at epoch 171: 1.021514654159546



8it [00:00, 15.00it/s]


test Loss at epoch 171: 1.111792802810669



78it [00:10,  7.63it/s]


Train Loss at epoch 172: 1.021438479423523



8it [00:00, 14.24it/s]


test Loss at epoch 172: 1.1116812229156494



78it [00:07, 10.62it/s]


Train Loss at epoch 173: 1.0213576555252075



8it [00:01,  7.86it/s]


test Loss at epoch 173: 1.11165452003479



78it [00:09,  8.03it/s]


Train Loss at epoch 174: 1.0212972164154053



8it [00:00, 14.26it/s]


test Loss at epoch 174: 1.1116876602172852



78it [00:08,  9.12it/s]


Train Loss at epoch 175: 1.0212408304214478



8it [00:00,  8.03it/s]


test Loss at epoch 175: 1.1117112636566162



78it [00:08,  9.38it/s]


Train Loss at epoch 176: 1.0211763381958008



8it [00:00, 14.68it/s]


test Loss at epoch 176: 1.1117470264434814



78it [00:09,  7.84it/s]


Train Loss at epoch 177: 1.0211201906204224



8it [00:00, 10.94it/s]


test Loss at epoch 177: 1.1117771863937378



78it [00:07, 10.79it/s]


Train Loss at epoch 178: 1.021058440208435



8it [00:00, 14.93it/s]


test Loss at epoch 178: 1.1118099689483643



78it [00:10,  7.69it/s]


Train Loss at epoch 179: 1.0209965705871582



8it [00:00, 15.16it/s]


test Loss at epoch 179: 1.1118342876434326



78it [00:07, 10.75it/s]


Train Loss at epoch 180: 1.0209418535232544



8it [00:00, 13.02it/s]


test Loss at epoch 180: 1.1118576526641846



78it [00:10,  7.54it/s]


Train Loss at epoch 181: 1.0208832025527954



8it [00:00, 13.30it/s]


test Loss at epoch 181: 1.1118649244308472



78it [00:08,  9.46it/s]


Train Loss at epoch 182: 1.0208252668380737



8it [00:00,  8.27it/s]


test Loss at epoch 182: 1.1118842363357544



78it [00:08,  8.85it/s]


Train Loss at epoch 183: 1.0207505226135254



8it [00:00, 14.67it/s]


test Loss at epoch 183: 1.111884593963623



78it [00:09,  8.18it/s]


Train Loss at epoch 184: 1.0206907987594604



8it [00:00,  8.39it/s]


test Loss at epoch 184: 1.1119019985198975



78it [00:07, 10.63it/s]


Train Loss at epoch 185: 1.0206326246261597



8it [00:00, 14.27it/s]


test Loss at epoch 185: 1.1119340658187866



78it [00:10,  7.58it/s]


Train Loss at epoch 186: 1.0205750465393066



8it [00:00, 14.80it/s]


test Loss at epoch 186: 1.1119616031646729



78it [00:07, 10.80it/s]


Train Loss at epoch 187: 1.0205156803131104



8it [00:00, 14.72it/s]


test Loss at epoch 187: 1.1119840145111084



78it [00:10,  7.61it/s]


Train Loss at epoch 188: 1.020445704460144



8it [00:00, 15.08it/s]


test Loss at epoch 188: 1.1119786500930786



78it [00:07,  9.76it/s]


Train Loss at epoch 189: 1.0203882455825806



8it [00:01,  7.98it/s]


test Loss at epoch 189: 1.111978530883789



78it [00:09,  8.60it/s]


Train Loss at epoch 190: 1.0203280448913574



8it [00:00, 14.49it/s]


test Loss at epoch 190: 1.1119811534881592



78it [00:09,  8.27it/s]


Train Loss at epoch 191: 1.0202594995498657



8it [00:00,  8.46it/s]


test Loss at epoch 191: 1.1118935346603394



78it [00:07, 10.13it/s]


Train Loss at epoch 192: 1.0202007293701172



8it [00:00, 14.02it/s]


test Loss at epoch 192: 1.111837387084961



78it [00:10,  7.60it/s]


Train Loss at epoch 193: 1.0201412439346313



8it [00:00, 14.23it/s]


test Loss at epoch 193: 1.1117510795593262



78it [00:07, 10.70it/s]


Train Loss at epoch 194: 1.0200673341751099



8it [00:00, 14.47it/s]


test Loss at epoch 194: 1.1117820739746094



78it [00:10,  7.64it/s]


Train Loss at epoch 195: 1.0200029611587524



8it [00:00, 14.98it/s]


test Loss at epoch 195: 1.1118106842041016



78it [00:07, 10.76it/s]


Train Loss at epoch 196: 1.0199346542358398



8it [00:01,  7.95it/s]


test Loss at epoch 196: 1.1118391752243042



78it [00:09,  7.89it/s]


Train Loss at epoch 197: 1.0198419094085693



8it [00:00, 14.20it/s]


test Loss at epoch 197: 1.1118026971817017



78it [00:08,  8.97it/s]


Train Loss at epoch 198: 1.0197921991348267



8it [00:00,  8.27it/s]


test Loss at epoch 198: 1.1118221282958984



78it [00:08,  9.27it/s]


Train Loss at epoch 199: 1.01972496509552



8it [00:00, 14.58it/s]


test Loss at epoch 199: 1.111795425415039



In [None]:
fig, ax = plt.plot(loss.cpu().data.numpy() )



In [None]:
# evaluate on test set every epoch
test_loss = []
test_loss_sum = []

correct = 0
total = 0

for idx, data in tqdm(enumerate(test_loader, 0)):
    test_x_numerical = data['x_numerical'].to(device, dtype = torch.float)
    test_ids = data['ids'].to(device, dtype = torch.long)
    test_masks = data['mask'].to(device, dtype = torch.long)
    test_token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
    test_targets = data['targets'].to(device, dtype = torch.long)

    y_pred = model(test_x_numerical, test_ids, test_masks, test_token_type_ids)
    _, pred_label = torch.max(y_pred.data, 1)

#         print('y_pred:', y_pred)
    tloss = criterion(y_pred, test_targets.reshape(-1))

    test_loss.append(tloss.data.cpu())
    test_loss_sum.append(tloss.data.cpu())

    wandb.log({'avg test loss in this batch': tloss.item(), 'epoch': epoch, 'batch_id': idx})

    # Get accuracy
    total += test_targets.reshape(-1).size(0)
    correct += (pred_label == test_targets.reshape(-1)).sum()
accuracy = 100 * correct / total

In [None]:
print(e2k[0].shape)
print(e2k[1].shape)
print(e2k)

In [None]:
run_id = str(1)
!mkdir output/$run_id
output_model_file = 'roberta_stock_pred.bin'
output_vocab_file = './output' + str(run_id)

model_to_save = model
torch.save(model_to_save, output_model_file)
tokenizer.save_vocabulary(output_vocab_file)


In [None]:
plt.plot(loss_arr, label="Training loss")
plt.legend()
plt.show()

In [None]:

'''