### Tested all hyperparameter combinations for 2 steps to ensure no bugs

In [1]:
# importing required libraries for the notebook
import lightning as lt
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks import ModelCheckpoint
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader, Dataset
from torchaudio.functional import edit_distance as edit_dist
import random
from language import *
from dataset_dataloader import *
from encoder_decoder import *
from runner import Runner

In [2]:
# know the accelerator available - NOT USED as we have switched to lightning
device = ('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


#### Defining the source and target languages; loading data to create Language objects

In [3]:
# define the source and target languages
TARGET = 'tam'
SOURCE = 'eng'

In [4]:
# load all the available data and print sample counts for each set
x_train, y_train = load_data(TARGET, 'train')
x_valid, y_valid = load_data(TARGET, 'valid')
x_test, y_test = load_data(TARGET, 'test')

print(f'Number of train samples = {len(x_train)}')
print(f'Number of valid samples = {len(x_valid)}')
print(f'Number of test samples = {len(x_test)}')

Number of train samples = 51200
Number of valid samples = 4096
Number of test samples = 4096


In [6]:
# create language objects for storing vocabulary, index2sym and sym2index
SRC_LANG = Language(SOURCE)
TAR_LANG = Language(TARGET)

# creating vocabulary using train data only
SRC_LANG.create_vocabulary(*(x_train))
TAR_LANG.create_vocabulary(*(y_train))

# generate mappings from characters to numbers and vice versa
SRC_LANG.generate_mappings()
TAR_LANG.generate_mappings()

# print the source and target vocabularies
print(f'Source Vocabulary Size = {len(SRC_LANG.symbols)}')
print(f'Source Vocabulary = {SRC_LANG.symbols}')
print(f'Source Mapping {SRC_LANG.index2sym}')
print(f'Target Vocabulary Size = {len(TAR_LANG.symbols)}')
print(f'Target Vocabulary = {TAR_LANG.symbols}')
print(f'Target Mapping {TAR_LANG.index2sym}')

Source Vocabulary Size = 26
Source Vocabulary = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
Source Mapping {0: '@', 1: '$', 2: '!', 3: '%', 4: 'a', 5: 'b', 6: 'c', 7: 'd', 8: 'e', 9: 'f', 10: 'g', 11: 'h', 12: 'i', 13: 'j', 14: 'k', 15: 'l', 16: 'm', 17: 'n', 18: 'o', 19: 'p', 20: 'q', 21: 'r', 22: 's', 23: 't', 24: 'u', 25: 'v', 26: 'w', 27: 'x', 28: 'y', 29: 'z'}
Target Vocabulary Size = 46
Target Vocabulary = ['ஃ', 'அ', 'ஆ', 'இ', 'ஈ', 'உ', 'ஊ', 'எ', 'ஏ', 'ஐ', 'ஒ', 'ஓ', 'க', 'ங', 'ச', 'ஜ', 'ஞ', 'ட', 'ண', 'த', 'ந', 'ன', 'ப', 'ம', 'ய', 'ர', 'ற', 'ல', 'ள', 'ழ', 'வ', 'ஷ', 'ஸ', 'ஹ', 'ா', 'ி', 'ீ', 'ு', 'ூ', 'ெ', 'ே', 'ை', 'ொ', 'ோ', 'ௌ', '்']
Target Mapping {0: '@', 1: '$', 2: '!', 3: '%', 4: 'ஃ', 5: 'அ', 6: 'ஆ', 7: 'இ', 8: 'ஈ', 9: 'உ', 10: 'ஊ', 11: 'எ', 12: 'ஏ', 13: 'ஐ', 14: 'ஒ', 15: 'ஓ', 16: 'க', 17: 'ங', 18: 'ச', 19: 'ஜ', 20: 'ஞ', 21: 'ட', 22: 'ண', 23: 'த', 24: 'ந', 25: 'ன', 26: 'ப', 27: 'ம', 28: 'ய',

##### Bug Search Section

In [7]:
# testing all combinations to catch bugs
# RESULT -> all clear; no bugs caught
num_lay = [1,3]
ctype = ['LSTM', 'GRU', 'RNN']
bidirect = [True, False]
attn = [True, False]

for n in num_lay:
    for c in ctype:
        for b in bidirect:
            for a in attn:
                runner = Runner(SOURCE, TARGET, SRC_LANG, TAR_LANG, 128, n, 256, c, 0.8, b, a, 0.05, 'Adam', learning_rate=2e-3, batch_size=128)
                trainer = lt.Trainer(max_steps=2)
                trainer.fit(runner) 

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type             | Params
----------------------------------------------------
0 | encoder        | EncoderNet       | 925 K 
1 | attn_layer     | Attention        | 197 K 
2 | decoder        | DecoderNet       | 1.1 M 
3 | model          | EncoderDecoder   | 2.1 M 
4 | loss_criterion | CrossEntropyLoss | 0     
----------------------------------------------------
2.1 M     Trainable params
0         Non-trainable params
2.1 M     Total params
8.246     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_steps=2` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type             | Params
----------------------------------------------------
0 | encoder        | EncoderNet       | 925 K 
1 | decoder        | DecoderNet       | 414 K 
2 | model          | EncoderDecoder   | 1.3 M 
3 | loss_criterion | CrossEntropyLoss | 0     
----------------------------------------------------
1.3 M     Trainable params
0         Non-trainable params
1.3 M     Total params
5.361     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_steps=2` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type             | Params
----------------------------------------------------
0 | encoder        | EncoderNet       | 399 K 
1 | attn_layer     | Attention        | 131 K 
2 | decoder        | DecoderNet       | 808 K 
3 | model          | EncoderDecoder   | 1.2 M 
4 | loss_criterion | CrossEntropyLoss | 0     
----------------------------------------------------
1.2 M     Trainable params
0         Non-trainable params
1.2 M     Total params
4.829     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_steps=2` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type             | Params
----------------------------------------------------
0 | encoder        | EncoderNet       | 399 K 
1 | decoder        | DecoderNet       | 414 K 
2 | model          | EncoderDecoder   | 813 K 
3 | loss_criterion | CrossEntropyLoss | 0     
----------------------------------------------------
813 K     Trainable params
0         Non-trainable params
813 K     Total params
3.254     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_steps=2` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type             | Params
----------------------------------------------------
0 | encoder        | EncoderNet       | 728 K 
1 | attn_layer     | Attention        | 197 K 
2 | decoder        | DecoderNet       | 906 K 
3 | model          | EncoderDecoder   | 1.6 M 
4 | loss_criterion | CrossEntropyLoss | 0     
----------------------------------------------------
1.6 M     Trainable params
0         Non-trainable params
1.6 M     Total params
6.536     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_steps=2` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type             | Params
----------------------------------------------------
0 | encoder        | EncoderNet       | 728 K 
1 | decoder        | DecoderNet       | 315 K 
2 | model          | EncoderDecoder   | 1.0 M 
3 | loss_criterion | CrossEntropyLoss | 0     
----------------------------------------------------
1.0 M     Trainable params
0         Non-trainable params
1.0 M     Total params
4.175     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_steps=2` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type             | Params
----------------------------------------------------
0 | encoder        | EncoderNet       | 300 K 
1 | attn_layer     | Attention        | 131 K 
2 | decoder        | DecoderNet       | 643 K 
3 | model          | EncoderDecoder   | 944 K 
4 | loss_criterion | CrossEntropyLoss | 0     
----------------------------------------------------
944 K     Trainable params
0         Non-trainable params
944 K     Total params
3.777     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_steps=2` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type             | Params
----------------------------------------------------
0 | encoder        | EncoderNet       | 300 K 
1 | decoder        | DecoderNet       | 315 K 
2 | model          | EncoderDecoder   | 615 K 
3 | loss_criterion | CrossEntropyLoss | 0     
----------------------------------------------------
615 K     Trainable params
0         Non-trainable params
615 K     Total params
2.464     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_steps=2` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type             | Params
----------------------------------------------------
0 | encoder        | EncoderNet       | 332 K 
1 | attn_layer     | Attention        | 197 K 
2 | decoder        | DecoderNet       | 446 K 
3 | model          | EncoderDecoder   | 779 K 
4 | loss_criterion | CrossEntropyLoss | 0     
----------------------------------------------------
779 K     Trainable params
0         Non-trainable params
779 K     Total params
3.116     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_steps=2` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type             | Params
----------------------------------------------------
0 | encoder        | EncoderNet       | 332 K 
1 | decoder        | DecoderNet       | 118 K 
2 | model          | EncoderDecoder   | 450 K 
3 | loss_criterion | CrossEntropyLoss | 0     
----------------------------------------------------
450 K     Trainable params
0         Non-trainable params
450 K     Total params
1.803     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_steps=2` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type             | Params
----------------------------------------------------
0 | encoder        | EncoderNet       | 102 K 
1 | attn_layer     | Attention        | 131 K 
2 | decoder        | DecoderNet       | 315 K 
3 | model          | EncoderDecoder   | 417 K 
4 | loss_criterion | CrossEntropyLoss | 0     
----------------------------------------------------
417 K     Trainable params
0         Non-trainable params
417 K     Total params
1.671     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_steps=2` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type             | Params
----------------------------------------------------
0 | encoder        | EncoderNet       | 102 K 
1 | decoder        | DecoderNet       | 118 K 
2 | model          | EncoderDecoder   | 220 K 
3 | loss_criterion | CrossEntropyLoss | 0     
----------------------------------------------------
220 K     Trainable params
0         Non-trainable params
220 K     Total params
0.883     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_steps=2` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type             | Params
----------------------------------------------------
0 | encoder        | EncoderNet       | 4.3 M 
1 | attn_layer     | Attention        | 197 K 
2 | decoder        | DecoderNet       | 2.2 M 
3 | model          | EncoderDecoder   | 6.5 M 
4 | loss_criterion | CrossEntropyLoss | 0     
----------------------------------------------------
6.5 M     Trainable params
0         Non-trainable params
6.5 M     Total params
26.123    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_steps=2` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type             | Params
----------------------------------------------------
0 | encoder        | EncoderNet       | 4.3 M 
1 | decoder        | DecoderNet       | 1.5 M 
2 | model          | EncoderDecoder   | 5.8 M 
3 | loss_criterion | CrossEntropyLoss | 0     
----------------------------------------------------
5.8 M     Trainable params
0         Non-trainable params
5.8 M     Total params
23.238    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_steps=2` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type             | Params
----------------------------------------------------
0 | encoder        | EncoderNet       | 1.5 M 
1 | attn_layer     | Attention        | 131 K 
2 | decoder        | DecoderNet       | 1.9 M 
3 | model          | EncoderDecoder   | 3.3 M 
4 | loss_criterion | CrossEntropyLoss | 0     
----------------------------------------------------
3.3 M     Trainable params
0         Non-trainable params
3.3 M     Total params
13.251    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_steps=2` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type             | Params
----------------------------------------------------
0 | encoder        | EncoderNet       | 1.5 M 
1 | decoder        | DecoderNet       | 1.5 M 
2 | model          | EncoderDecoder   | 2.9 M 
3 | loss_criterion | CrossEntropyLoss | 0     
----------------------------------------------------
2.9 M     Trainable params
0         Non-trainable params
2.9 M     Total params
11.676    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_steps=2` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type             | Params
----------------------------------------------------
0 | encoder        | EncoderNet       | 3.4 M 
1 | attn_layer     | Attention        | 197 K 
2 | decoder        | DecoderNet       | 1.7 M 
3 | model          | EncoderDecoder   | 5.1 M 
4 | loss_criterion | CrossEntropyLoss | 0     
----------------------------------------------------
5.1 M     Trainable params
0         Non-trainable params
5.1 M     Total params
20.207    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_steps=2` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type             | Params
----------------------------------------------------
0 | encoder        | EncoderNet       | 3.4 M 
1 | decoder        | DecoderNet       | 1.1 M 
2 | model          | EncoderDecoder   | 4.5 M 
3 | loss_criterion | CrossEntropyLoss | 0     
----------------------------------------------------
4.5 M     Trainable params
0         Non-trainable params
4.5 M     Total params
17.845    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_steps=2` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type             | Params
----------------------------------------------------
0 | encoder        | EncoderNet       | 1.1 M 
1 | attn_layer     | Attention        | 131 K 
2 | decoder        | DecoderNet       | 1.4 M 
3 | model          | EncoderDecoder   | 2.5 M 
4 | loss_criterion | CrossEntropyLoss | 0     
----------------------------------------------------
2.5 M     Trainable params
0         Non-trainable params
2.5 M     Total params
10.093    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_steps=2` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type             | Params
----------------------------------------------------
0 | encoder        | EncoderNet       | 1.1 M 
1 | decoder        | DecoderNet       | 1.1 M 
2 | model          | EncoderDecoder   | 2.2 M 
3 | loss_criterion | CrossEntropyLoss | 0     
----------------------------------------------------
2.2 M     Trainable params
0         Non-trainable params
2.2 M     Total params
8.780     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_steps=2` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type             | Params
----------------------------------------------------
0 | encoder        | EncoderNet       | 1.4 M 
1 | attn_layer     | Attention        | 197 K 
2 | decoder        | DecoderNet       | 709 K 
3 | model          | EncoderDecoder   | 2.1 M 
4 | loss_criterion | CrossEntropyLoss | 0     
----------------------------------------------------
2.1 M     Trainable params
0         Non-trainable params
2.1 M     Total params
8.373     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_steps=2` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type             | Params
----------------------------------------------------
0 | encoder        | EncoderNet       | 1.4 M 
1 | decoder        | DecoderNet       | 381 K 
2 | model          | EncoderDecoder   | 1.8 M 
3 | loss_criterion | CrossEntropyLoss | 0     
----------------------------------------------------
1.8 M     Trainable params
0         Non-trainable params
1.8 M     Total params
7.061     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_steps=2` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type             | Params
----------------------------------------------------
0 | encoder        | EncoderNet       | 365 K 
1 | attn_layer     | Attention        | 131 K 
2 | decoder        | DecoderNet       | 578 K 
3 | model          | EncoderDecoder   | 944 K 
4 | loss_criterion | CrossEntropyLoss | 0     
----------------------------------------------------
944 K     Trainable params
0         Non-trainable params
944 K     Total params
3.777     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_steps=2` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type             | Params
----------------------------------------------------
0 | encoder        | EncoderNet       | 365 K 
1 | decoder        | DecoderNet       | 381 K 
2 | model          | EncoderDecoder   | 747 K 
3 | loss_criterion | CrossEntropyLoss | 0     
----------------------------------------------------
747 K     Trainable params
0         Non-trainable params
747 K     Total params
2.988     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_steps=2` reached.
