<a href="https://colab.research.google.com/github/unfortunate-code/Neural-Network-For-Taking-Derivatives/blob/main/Derivatives_LSTM_Big.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd drive/MyDrive

/content/drive/MyDrive


In [None]:
!pip install torchsummaryX

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torchsummaryX
  Downloading torchsummaryX-1.3.0-py3-none-any.whl (3.6 kB)
Installing collected packages: torchsummaryX
Successfully installed torchsummaryX-1.3.0


In [None]:
import itertools
import random
import re
import torch
import torch.optim as optim
from torch import nn
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from torchsummaryX import summary

In [None]:
def simplify_equation(line):
  x, y = line.strip().split('=')
  matches = re.match('d\((.*)\)/d(.*)', x)
  return matches.group(1), matches.group(2), y

In [None]:
with open('derivatives-takehome/train.txt', 'r') as f:
  data = [simplify_equation(line) for line in f.readlines()]

In [None]:
def tokenize_equations(eqn, var, var_map):
  if not var_map:
    var_map[var] = 'var0'
    var_index = 1
  else:
    var_index = int(sorted(list(var_map.values()))[-1][-1]) + 1
  curr = ''
  tokens = []
  for i in range(len(eqn)):
    if 'a' <= eqn[i] <='z' or 'A' <= eqn[i] <= 'Z':
      curr += eqn[i]
    else:
      if curr:
        if len(curr) == 1:
          if curr in var_map:
            tokens.append(var_map[curr])
          else:
            var_map[curr] = 'var' + str(var_index)
            var_index += 1
            tokens.append(var_map[curr])
        else:
          tokens.append(curr)
        curr = ''
      tokens.append(eqn[i])
  return tokens, var_map

In [None]:
tokenized_data = []
for x, v, y in data:
  x_tokens, var_map = tokenize_equations(x, v, {})
  y_tokens, var_map = tokenize_equations(y, v, var_map)
  tokenized_data.append((x_tokens, y_tokens, {v: k for k, v in var_map.items()}))

In [None]:
input_vocabulary = set(itertools.chain.from_iterable([x for x, y, _ in tokenized_data]))
output_vocabulary = set(itertools.chain.from_iterable([y for x, y, _ in tokenized_data]))

In [None]:
variables = set(itertools.chain.from_iterable([z.values() for _, _, z in tokenized_data]))

In [None]:
print(variables)
print(input_vocabulary)
print(output_vocabulary)
print(len(variables))
print(len(input_vocabulary))
print(len(output_vocabulary))

{'a', 's', 'c', 'r', 't', 'y', 'u', 'x', 'p', 'b', 'm', 'w', 'i', 'k', 'z', 'v', 'n', 'e', 'o'}
{'4', '1', '*', '(', '7', 'var1', '6', '3', '+', '8', '2', '^', 'exp', 'sin', 'var0', 'cos', '5', '0', '9', '-', ')'}
{'4', '1', '*', '(', '7', 'var1', '6', '3', '+', '2', '8', '^', 'exp', 'sin', 'var0', 'cos', '5', '0', '9', '-', ')'}
19
21
21


In [None]:
start_token = '<SOS>'
end_token = '<EOS>'
input_vocabulary.add(start_token)
input_vocabulary.add(end_token)
output_vocabulary.add(start_token)
output_vocabulary.add(end_token)
input_token_to_index = {}
input_index_to_token = {}
index = 0
for token in input_vocabulary:
  input_token_to_index[token] = index
  input_index_to_token[index] = token
  index += 1
output_token_to_index = {}
output_index_to_token = {}
index = 0
for token in output_vocabulary:
  output_token_to_index[token] = index
  output_index_to_token[index] = token
  index += 1

In [None]:
all_indices = set(range(1000000))
train_indices = set(random.sample(all_indices, 9 * len(all_indices) // 10))
all_indices -= train_indices
val_indices = set(random.sample(all_indices, len(all_indices) // 2))
all_indices -= val_indices
test_indices = all_indices

In [None]:
print(len(train_indices))
print(len(test_indices))
print(len(val_indices))

900000
50000
50000


In [None]:
train_data = [tokenized_data[i] for i in train_indices]
test_data = [tokenized_data[i] for i in test_indices]
val_data = [tokenized_data[i] for i in val_indices]

In [None]:
class DerivativesDataset(Dataset):
  def __init__(self, data):
    self.data = data
  
  def __len__(self):
    return len(self.data)
  
  def __getitem__(self, index):
    x, y, map = self.data[index]
    x = torch.tensor([input_token_to_index[token] for token in x] + [input_token_to_index[end_token]])
    y = torch.tensor([output_token_to_index[token] for token in y] + [output_token_to_index[end_token]])
    return x, y, map

In [None]:
class PadSequence:
  def __call__(self, batch):
    return torch.nn.utils.rnn.pad_sequence([x[0] for x in batch], batch_first=True, padding_value=input_token_to_index[end_token]), torch.nn.utils.rnn.pad_sequence([x[1] for x in batch], batch_first=True, padding_value=output_token_to_index[end_token]), [x[2] for x in batch]

In [None]:
train_dataset = DerivativesDataset(train_data)
train_dataloader = DataLoader(train_dataset, batch_size=4096, shuffle=True, pin_memory=True, num_workers=2, collate_fn=PadSequence())

In [None]:
class Encoder(nn.Module):
  def __init__(self, input_size, hidden_size, layers, dropout):
    super(Encoder, self).__init__()
    self.input_size = input_size
    self.hidden_size = hidden_size
    self.layers = layers
    self.embedding_layer = nn.Embedding(input_size, hidden_size)
    self.dropout1 = nn.Dropout(dropout)
    self.linear1 = nn.Linear(hidden_size, 2 * hidden_size)
    self.linear2 = nn.Linear(hidden_size * 2, hidden_size)
    self.dropout2 = nn.Dropout(dropout)
    self.lstm = nn.LSTM(input_size=hidden_size, hidden_size=hidden_size, num_layers=layers, batch_first=True)
    nn.init.kaiming_normal_(self.linear1.weight, mode='fan_in')
    nn.init.kaiming_normal_(self.linear2.weight, mode='fan_in')
  
  def forward(self, x):
    embeddings = self.embedding_layer(x)
    embeddings = self.dropout1(embeddings)
    embeddings = self.linear1(embeddings)
    embeddings = nn.functional.relu(embeddings)
    embeddings = self.linear2(embeddings)
    embeddings = nn.functional.relu(embeddings)
    embeddings = self.dropout2(embeddings)
    h0 = torch.zeros(self.layers, x.size(0), self.hidden_size).cuda()
    c0 = torch.zeros_like(h0).cuda()
    return self.lstm(embeddings, (h0, c0))

In [None]:
class Decoder(nn.Module):
  def __init__(self, hidden_size, output_size, layers, dropout):
    super(Decoder, self).__init__()
    self.output_size = output_size
    self.hidden_size = hidden_size
    self.layers = layers
    self.embedding_layer = nn.Embedding(output_size, hidden_size)
    self.dropout1 = nn.Dropout(dropout)
    self.linear1 = nn.Linear(hidden_size, 2 * hidden_size)
    self.linear2 = nn.Linear(hidden_size * 2, hidden_size)
    self.dropout2 = nn.Dropout(dropout)
    self.lstm = nn.LSTM(input_size=hidden_size, hidden_size=hidden_size, num_layers=layers, batch_first=True)
    self.fc_linear1 = nn.Linear(hidden_size, 2 * hidden_size)
    self.fc_linear2 = nn.Linear(2 * hidden_size, 4 * hidden_size)
    self.fc_linear3 = nn.Linear(4 * hidden_size, 2 * hidden_size)
    self.fc = nn.Sequential(self.fc_linear1,
                            nn.ReLU(),
                            self.fc_linear2,
                            nn.ReLU(),
                            self.fc_linear3,
                            nn.ReLU())
    self.out = nn.Linear(2 * hidden_size, output_size)
    nn.init.kaiming_normal_(self.linear1.weight, mode='fan_in')
    nn.init.kaiming_normal_(self.linear2.weight, mode='fan_in')
    nn.init.kaiming_normal_(self.fc_linear1.weight, mode='fan_in')
    nn.init.kaiming_normal_(self.fc_linear2.weight, mode='fan_in')
    nn.init.kaiming_normal_(self.fc_linear3.weight, mode='fan_in')

  def forward(self, x, h0, c0):
    embeddings = self.embedding_layer(x)
    embeddings = self.dropout1(embeddings)
    embeddings = self.linear1(embeddings)
    embeddings = nn.functional.relu(embeddings)
    embeddings = self.linear2(embeddings)
    embeddings = nn.functional.relu(embeddings)
    embeddings = self.dropout2(embeddings)
    hiddens, (hn, cn) = self.lstm(embeddings, (h0, c0))
    pred = self.fc(hn[-1])
    pred = self.out(pred)
    return pred, hn, cn

In [None]:
hidden_size = 23
num_layers = 2

In [None]:
encoder = Encoder(len(input_vocabulary), hidden_size, num_layers,0.2)
decoder = Decoder(len(output_vocabulary), hidden_size, num_layers, 0.2)
encoder = encoder.train().cuda()
decoder = decoder.train().cuda()

In [None]:
print(encoder)


Encoder(
  (embedding_layer): Embedding(23, 23)
  (dropout1): Dropout(p=0.2, inplace=False)
  (linear1): Linear(in_features=23, out_features=46, bias=True)
  (linear2): Linear(in_features=46, out_features=23, bias=True)
  (dropout2): Dropout(p=0.2, inplace=False)
  (lstm): LSTM(23, 23, num_layers=2, batch_first=True)
)


In [None]:
print(decoder)

Decoder(
  (embedding_layer): Embedding(23, 23)
  (dropout1): Dropout(p=0.2, inplace=False)
  (linear1): Linear(in_features=23, out_features=46, bias=True)
  (linear2): Linear(in_features=46, out_features=23, bias=True)
  (dropout2): Dropout(p=0.2, inplace=False)
  (lstm): LSTM(23, 23, num_layers=2, batch_first=True)
  (fc_linear1): Linear(in_features=23, out_features=46, bias=True)
  (fc_linear2): Linear(in_features=46, out_features=92, bias=True)
  (fc_linear3): Linear(in_features=92, out_features=46, bias=True)
  (fc): Sequential(
    (0): Linear(in_features=23, out_features=46, bias=True)
    (1): ReLU()
    (2): Linear(in_features=46, out_features=92, bias=True)
    (3): ReLU()
    (4): Linear(in_features=92, out_features=46, bias=True)
    (5): ReLU()
  )
  (out): Linear(in_features=46, out_features=23, bias=True)
)


In [None]:
learning_rate = 0.001
epochs = 100
encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

In [None]:
for epoch in range(epochs):
  print('Epoch', epoch)
  total_loss = []
  for x, y, map in tqdm(train_dataloader):
    x = x.cuda()
    y = y.cuda()
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()
    _, (enc_hn, enc_cn) = encoder(x)
    decoder_input = torch.IntTensor(len(x), 1).fill_(output_token_to_index[start_token]).cuda()
    dec_h, dec_c = enc_hn, enc_cn
    loss = 0
    for i in range(y.shape[1]):
      pred, dec_h, dec_c = decoder(decoder_input, dec_h, dec_c)
      decoder_input = y[:, i].unsqueeze(1)
      loss += criterion(pred, y[:, i])
    loss.backward()
    encoder_optimizer.step()
    decoder_optimizer.step()
    total_loss.append(loss.item() / y.shape[1])
  print(sum(total_loss) / len(total_loss))

Epoch 0


100%|██████████| 220/220 [00:36<00:00,  6.00it/s]


1.6940062868803327
Epoch 1


100%|██████████| 220/220 [00:36<00:00,  6.03it/s]


0.799174789034704
Epoch 2


100%|██████████| 220/220 [00:36<00:00,  6.09it/s]


0.7104045884899601
Epoch 3


100%|██████████| 220/220 [00:37<00:00,  5.86it/s]


0.6671109808285951
Epoch 4


100%|██████████| 220/220 [00:36<00:00,  6.09it/s]


0.6344411417533616
Epoch 5


100%|██████████| 220/220 [00:35<00:00,  6.14it/s]


0.603416954083299
Epoch 6


100%|██████████| 220/220 [00:38<00:00,  5.67it/s]


0.5791728324402629
Epoch 7


100%|██████████| 220/220 [00:37<00:00,  5.83it/s]


0.5604572245390987
Epoch 8


100%|██████████| 220/220 [00:36<00:00,  6.03it/s]


0.5452368720387147
Epoch 9


100%|██████████| 220/220 [00:36<00:00,  6.06it/s]


0.5316031951616098
Epoch 10


100%|██████████| 220/220 [00:37<00:00,  5.80it/s]


0.5203874373435975
Epoch 11


100%|██████████| 220/220 [00:36<00:00,  6.03it/s]


0.5111040927977807
Epoch 12


100%|██████████| 220/220 [00:36<00:00,  6.07it/s]


0.5010919341167317
Epoch 13


100%|██████████| 220/220 [00:38<00:00,  5.77it/s]


0.4936302245038715
Epoch 14


100%|██████████| 220/220 [00:36<00:00,  6.06it/s]


0.48655275256608715
Epoch 15


100%|██████████| 220/220 [00:36<00:00,  6.03it/s]


0.478694383200904
Epoch 16


100%|██████████| 220/220 [00:36<00:00,  6.07it/s]


0.4728909721326822
Epoch 17


100%|██████████| 220/220 [00:37<00:00,  5.84it/s]


0.466352622010384
Epoch 18


100%|██████████| 220/220 [00:36<00:00,  6.09it/s]


0.46200670445264513
Epoch 19


100%|██████████| 220/220 [00:36<00:00,  6.05it/s]


0.4562809942696005
Epoch 20


100%|██████████| 220/220 [00:36<00:00,  6.07it/s]


0.45180932278205344
Epoch 21


100%|██████████| 220/220 [00:37<00:00,  5.83it/s]


0.4486670428063771
Epoch 22


100%|██████████| 220/220 [00:36<00:00,  6.11it/s]


0.44227277350899064
Epoch 23


100%|██████████| 220/220 [00:36<00:00,  6.07it/s]


0.4395707052000875
Epoch 24


100%|██████████| 220/220 [00:38<00:00,  5.66it/s]


0.43559747088821715
Epoch 25


100%|██████████| 220/220 [00:36<00:00,  5.97it/s]


0.43216312954863223
Epoch 26


100%|██████████| 220/220 [00:36<00:00,  5.98it/s]


0.4293676222251554
Epoch 27


100%|██████████| 220/220 [00:36<00:00,  6.05it/s]


0.42637482533153803
Epoch 28


100%|██████████| 220/220 [00:38<00:00,  5.79it/s]


0.4221810955203748
Epoch 29


100%|██████████| 220/220 [00:36<00:00,  6.04it/s]


0.421282465042223
Epoch 30


100%|██████████| 220/220 [00:36<00:00,  6.02it/s]


0.41737338603307067
Epoch 31


100%|██████████| 220/220 [00:36<00:00,  6.02it/s]


0.4139814446819132
Epoch 32


100%|██████████| 220/220 [00:37<00:00,  5.80it/s]


0.4122780818237369
Epoch 33


100%|██████████| 220/220 [00:36<00:00,  6.03it/s]


0.40877557039581164
Epoch 34


100%|██████████| 220/220 [00:36<00:00,  6.05it/s]


0.4072619704077047
Epoch 35


100%|██████████| 220/220 [00:37<00:00,  5.84it/s]


0.40468423642548434
Epoch 36


100%|██████████| 220/220 [00:36<00:00,  6.08it/s]


0.4020988445038703
Epoch 37


100%|██████████| 220/220 [00:36<00:00,  6.07it/s]


0.3996210270965156
Epoch 38


100%|██████████| 220/220 [00:36<00:00,  6.09it/s]


0.4068242795276258
Epoch 39


100%|██████████| 220/220 [00:37<00:00,  5.84it/s]


0.3951281780862521
Epoch 40


100%|██████████| 220/220 [00:37<00:00,  5.92it/s]


0.3930239701431215
Epoch 41


100%|██████████| 220/220 [00:36<00:00,  5.95it/s]


0.3939215096486839
Epoch 42


100%|██████████| 220/220 [00:36<00:00,  6.05it/s]


0.38827545076055536
Epoch 43


100%|██████████| 220/220 [00:38<00:00,  5.68it/s]


0.3885415637065853
Epoch 44


100%|██████████| 220/220 [00:36<00:00,  5.95it/s]


0.38663437371341647
Epoch 45


100%|██████████| 220/220 [00:37<00:00,  5.94it/s]


0.3852696626530528
Epoch 46


100%|██████████| 220/220 [00:37<00:00,  5.79it/s]


0.38334853396520363
Epoch 47


100%|██████████| 220/220 [00:36<00:00,  6.05it/s]


0.38146237589277615
Epoch 48


100%|██████████| 220/220 [00:36<00:00,  6.04it/s]


0.37917886873486717
Epoch 49


100%|██████████| 220/220 [00:36<00:00,  6.07it/s]


0.3776588541149581
Epoch 50


100%|██████████| 220/220 [00:38<00:00,  5.72it/s]


0.37778720133018795
Epoch 51


100%|██████████| 220/220 [00:36<00:00,  6.05it/s]


0.37342328747949893
Epoch 52


100%|██████████| 220/220 [00:36<00:00,  6.11it/s]


0.3738618120514286
Epoch 53


100%|██████████| 220/220 [00:36<00:00,  6.04it/s]


0.3710374741530061
Epoch 54


100%|██████████| 220/220 [00:38<00:00,  5.77it/s]


0.3743383978112368
Epoch 55


100%|██████████| 220/220 [00:35<00:00,  6.13it/s]


0.36555576886669006
Epoch 56


100%|██████████| 220/220 [00:36<00:00,  6.07it/s]


0.36618631109825406
Epoch 57


100%|██████████| 220/220 [00:37<00:00,  5.88it/s]


0.3884395051418909
Epoch 58


100%|██████████| 220/220 [00:35<00:00,  6.13it/s]


0.3624100526304801
Epoch 59


100%|██████████| 220/220 [00:37<00:00,  5.84it/s]


0.36565938753799454
Epoch 60


100%|██████████| 220/220 [00:36<00:00,  6.07it/s]


0.3599739322895508
Epoch 61


100%|██████████| 220/220 [00:37<00:00,  5.80it/s]


0.36319304939668023
Epoch 62


100%|██████████| 220/220 [00:36<00:00,  6.08it/s]


0.3599390643435566
Epoch 63


100%|██████████| 220/220 [00:36<00:00,  6.04it/s]


0.3564080329393024
Epoch 64


100%|██████████| 220/220 [00:36<00:00,  6.07it/s]


0.36538633243318425
Epoch 65


100%|██████████| 220/220 [00:37<00:00,  5.81it/s]


0.35335838615207243
Epoch 66


100%|██████████| 220/220 [00:36<00:00,  6.06it/s]


0.3535529830420151
Epoch 67


100%|██████████| 220/220 [00:36<00:00,  6.06it/s]


0.3834779048607024
Epoch 68


100%|██████████| 220/220 [00:35<00:00,  6.15it/s]


0.3502900521065678
Epoch 69


100%|██████████| 220/220 [00:37<00:00,  5.91it/s]


0.34997248243726803
Epoch 70


100%|██████████| 220/220 [00:35<00:00,  6.13it/s]


0.3602571129570718
Epoch 71


100%|██████████| 220/220 [00:35<00:00,  6.20it/s]


0.34758024028549295
Epoch 72


100%|██████████| 220/220 [00:37<00:00,  5.93it/s]


0.354353873946885
Epoch 73


100%|██████████| 220/220 [00:35<00:00,  6.26it/s]


0.34582640442105805
Epoch 74


100%|██████████| 220/220 [00:35<00:00,  6.22it/s]


0.34504877541630746
Epoch 75


100%|██████████| 220/220 [00:36<00:00,  6.05it/s]


0.36507153925245844
Epoch 76


100%|██████████| 220/220 [00:38<00:00,  5.68it/s]


0.34316292664250664
Epoch 77


100%|██████████| 220/220 [00:35<00:00,  6.19it/s]


0.34240900753240033
Epoch 78


100%|██████████| 220/220 [00:35<00:00,  6.19it/s]


0.3501436198393006
Epoch 79


100%|██████████| 220/220 [00:35<00:00,  6.19it/s]


0.34071533376449126
Epoch 80


100%|██████████| 220/220 [00:36<00:00,  5.96it/s]


0.36304412857753154
Epoch 81


100%|██████████| 220/220 [00:35<00:00,  6.15it/s]


0.34151768626829976
Epoch 82


100%|██████████| 220/220 [00:35<00:00,  6.13it/s]


0.33767463917731505
Epoch 83


100%|██████████| 220/220 [00:35<00:00,  6.16it/s]


0.33810947696939314
Epoch 84


100%|██████████| 220/220 [00:37<00:00,  5.94it/s]


0.3496947360930884
Epoch 85


100%|██████████| 220/220 [00:35<00:00,  6.17it/s]


0.3351632262649238
Epoch 86


100%|██████████| 220/220 [00:35<00:00,  6.18it/s]


0.3353872728374667
Epoch 87


100%|██████████| 220/220 [00:35<00:00,  6.17it/s]


0.3514532884428227
Epoch 88


100%|██████████| 220/220 [00:37<00:00,  5.93it/s]


0.3324706068368507
Epoch 89


100%|██████████| 220/220 [00:35<00:00,  6.20it/s]


0.3329743090329336
Epoch 90


100%|██████████| 220/220 [00:35<00:00,  6.21it/s]


0.34281360814903994
Epoch 91


100%|██████████| 220/220 [00:35<00:00,  6.21it/s]


0.33138131200066084
Epoch 92


100%|██████████| 220/220 [00:38<00:00,  5.73it/s]


0.331284253616992
Epoch 93


100%|██████████| 220/220 [00:37<00:00,  5.89it/s]


0.3557431748334868
Epoch 94


100%|██████████| 220/220 [00:38<00:00,  5.75it/s]


0.3287909985292914
Epoch 95


100%|██████████| 220/220 [00:37<00:00,  5.86it/s]


0.3286769915187814
Epoch 96


100%|██████████| 220/220 [00:36<00:00,  6.06it/s]


0.32823355266963294
Epoch 97


100%|██████████| 220/220 [00:35<00:00,  6.13it/s]


0.34070174782862733
Epoch 98


100%|██████████| 220/220 [00:36<00:00,  6.09it/s]


0.32605949406438706
Epoch 99


100%|██████████| 220/220 [00:37<00:00,  5.89it/s]

0.32693521063171704





In [None]:
torch.save(encoder, 'encoder-large-hidden-epochs-100.model')
torch.save(decoder, 'decoder-large-hidden-epochs-100.model')

In [None]:
def is_match(y, pred):
  for i in range(len(y)):
    if y[i] == output_token_to_index[end_token]: return True
    if i >= len(pred) or y[i] != pred[i]: return False
  return True

In [None]:
def evaluate(dataloader, cutoff_length, encoder, decoder):
  encoder = encoder.eval()
  decoder = decoder.eval()
  count = correct_count = 0
  with torch.no_grad():
    for x, y, map in tqdm(dataloader):
      count += len(x)
      x, y = x.cuda(), y.cuda()
      _, (enc_hn, enc_cn) = encoder(x)
      for i in range(len(x)):
        prediction = []
        dec_h_i, dec_c_i = enc_hn[:, i, :].contiguous().unsqueeze(1), enc_cn[:, i, :].contiguous().unsqueeze(1)
        decoder_input = torch.IntTensor(1, 1).fill_(output_token_to_index[start_token]).cuda()
        decoded_indices = []
        for j in range(cutoff_length):
          pred, dec_h_i, dec_c_i = decoder(decoder_input, dec_h_i, dec_c_i)
          _, topi = pred.topk(1)
          topi = topi.item()
          
          if topi == output_token_to_index[end_token]:
            break
          else:
            decoded_indices.append(topi)
          decoder_input = torch.IntTensor(1, 1).fill_(topi).cuda()
        if is_match(y[i], decoded_indices): correct_count += 1
  return correct_count / count

In [None]:
val_dataset = DerivativesDataset(val_data)
val_dataloader = DataLoader(val_dataset, batch_size=4096, shuffle=False, pin_memory=True, num_workers=2, collate_fn=PadSequence())
evaluate(val_dataloader, 40, encoder, decoder)

100%|██████████| 13/13 [09:40<00:00, 44.67s/it]


0.01214

In [None]:
test_dataset = DerivativesDataset(test_data)
test_dataloader = DataLoader(test_dataset, batch_size=4096, shuffle=False, pin_memory=True, num_workers=2, collate_fn=PadSequence())
evaluate(test_dataloader, 40, encoder, decoder)

100%|██████████| 13/13 [09:39<00:00, 44.58s/it]


0.01164

In [None]:
summary(encoder, test_dataset[0][0].cuda().unsqueeze(0))

                  Kernel Shape Output Shape  Params Mult-Adds
Layer                                                        
0_embedding_layer     [23, 23]  [1, 11, 23]   529.0     529.0
1_dropout1                   -  [1, 11, 23]       -         -
2_linear1             [23, 46]  [1, 11, 46]  1.104k    1.058k
3_linear2             [46, 23]  [1, 11, 23]  1.081k    1.058k
4_dropout2                   -  [1, 11, 23]       -         -
5_lstm                       -  [1, 11, 23]  8.832k    8.464k
--------------------------------------------------------------
                       Totals
Total params          11.546k
Trainable params      11.546k
Non-trainable params      0.0
Mult-Adds             11.109k


  df_sum = df.sum()


Unnamed: 0_level_0,Kernel Shape,Output Shape,Params,Mult-Adds
Layer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0_embedding_layer,"[23, 23]","[1, 11, 23]",529.0,529.0
1_dropout1,-,"[1, 11, 23]",,
2_linear1,"[23, 46]","[1, 11, 46]",1104.0,1058.0
3_linear2,"[46, 23]","[1, 11, 23]",1081.0,1058.0
4_dropout2,-,"[1, 11, 23]",,
5_lstm,-,"[1, 11, 23]",8832.0,8464.0
