# CS6910 Assignment 3 (RNN Frameworks for transliteration) - Without attention

In [334]:
# importing required libraries for the notebook

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
from tqdm import tqdm
import wandb
import numpy as np
from torch.utils.data import DataLoader, Dataset
import pandas as pd
import random

In [426]:
# set the device to 'cuda' if available
device = ('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

# define the source and target languages
TARGET = 'hin'
SOURCE = 'eng'
# define the special tokens that stand for start of seq, end of seq, 
# an unknown symbol.
SOS_SYM = '@'
EOS_SYM = '$'
UNK_SYM = '!'
# define a special token for padding - this helps with batch processing 
PAD_SYM = '%'

cuda


## Preprocessing Functions and Helpers

In [427]:
# function to load the 'cat' (= train/val/test) data of language 'lang'
def load_data(lang, cat):
    fcontents = open(f'aksharantar_sampled/{lang}/{lang}_{cat}.csv','r', encoding='utf-8').readlines()
    pairs = [tuple(l.strip().split(',')) for l in fcontents]
    x_data, y_data = list(map(list,zip(*pairs)))
    return x_data, y_data

# class for a language with useful functions.
class Language:
    def __init__(self, name):
        self.lname = name
    
    # function to create the vocabulary(set of tokens) using the words in 'data'
    # here, a token is either a special token or a lang character
    def create_vocabulary(self, *data):
        symbols = set()
        for wd in data:
            for c in wd:
                symbols.add(c)
        self.symbols = symbols
    
    # function to generate the index2sym (a number to a token) and 
    # sym2index (a token to a number) mappings using the vocabulary
    def generate_mappings(self):
        self.index2sym = {0: SOS_SYM, 1 : EOS_SYM, 2 : UNK_SYM, 3 : PAD_SYM}
        self.sym2index = {SOS_SYM : 0, EOS_SYM : 1, UNK_SYM : 2, PAD_SYM : 3}
        self.symbols = list(self.symbols)
        self.symbols.sort()

        for i, sym in enumerate(self.symbols):
            self.sym2index[sym] = i + 4
            self.index2sym[i+4] = sym
        
        self.num_tokens = len(self.index2sym.keys())
    
    # function to tokenize a word and convert all the tokens to
    # their corr. numbers using sym2index
    def convert_to_numbers(self, word):
        enc = [self.sym2index[SOS_SYM]]
        for ch in word:
            if ch in self.sym2index.keys():
                enc.append(self.sym2index[ch])
            else:
                enc.append(self.sym2index[UNK_SYM])
        enc.append(self.sym2index[EOS_SYM])
        return enc
    
    # convert a list of predictions (each prediction is a list of numbers)
    # to the corresponding list of words using index2sym
    # pred should be numpy array of shape (number_of_words, max_word_length)
    # tokens after EOS_SYM are discarded
    def convert_to_words(self, preds):
        num = preds.shape[0]
        words = [] 
        for i in range(num):
            wd = ''
            for idx in preds[i][1:]:
                ch = self.index2sym[idx]
                if ch != EOS_SYM:
                    wd += ch
            words.append(wd)
        return words

    # get the number assigned to a token
    def get_index(self, sym):
        return self.sym2index[sym]
    
    # get the number of tokens in the vocabulary
    def get_size(self):
        return self.num_tokens

In [428]:
# load all the available data and print sample counts for each set
x_train, y_train = load_data(TARGET, 'train')
x_valid, y_valid = load_data(TARGET, 'valid')
x_test, y_test = load_data(TARGET, 'test')

print(f'Number of train samples = {len(x_train)}')
print(f'Number of valid samples = {len(x_valid)}')
print(f'Number of test samples = {len(x_test)}')

Number of train samples = 51200
Number of valid samples = 4096
Number of test samples = 4096


In [429]:
# create language objects for storing vocabulary, index2sym and sym2index
SRC_LANG = Language(SOURCE)
TAR_LANG = Language(TARGET)

# creating vocabulary using train data only
SRC_LANG.create_vocabulary(*(x_train))
TAR_LANG.create_vocabulary(*(y_train))

# otherwise, use unicode characters (assigned codepoints) in the script's range
# src_lang.create_vocabulary_range()
# tar_lang.create_vocabulary_range()

# generate mappings from characters to numbers and vice versa
SRC_LANG.generate_mappings()
TAR_LANG.generate_mappings()

# print the source and target vocabularies
print(f'Source Vocabulary Size = {len(SRC_LANG.symbols)}')
print(f'Source Vocabulary = {SRC_LANG.symbols}')
print(f'Source Mapping {SRC_LANG.index2sym}')
print(f'Target Vocabulary Size = {len(TAR_LANG.symbols)}')
print(f'Target Vocabulary = {TAR_LANG.symbols}')
print(f'Target Mapping {TAR_LANG.index2sym}')

Source Vocabulary Size = 26
Source Vocabulary = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
Source Mapping {0: '@', 1: '$', 2: '!', 3: '%', 4: 'a', 5: 'b', 6: 'c', 7: 'd', 8: 'e', 9: 'f', 10: 'g', 11: 'h', 12: 'i', 13: 'j', 14: 'k', 15: 'l', 16: 'm', 17: 'n', 18: 'o', 19: 'p', 20: 'q', 21: 'r', 22: 's', 23: 't', 24: 'u', 25: 'v', 26: 'w', 27: 'x', 28: 'y', 29: 'z'}
Target Vocabulary Size = 64
Target Vocabulary = ['ँ', 'ं', 'ः', 'अ', 'आ', 'इ', 'ई', 'उ', 'ऊ', 'ऋ', 'ए', 'ऐ', 'ऑ', 'ओ', 'औ', 'क', 'ख', 'ग', 'घ', 'ङ', 'च', 'छ', 'ज', 'झ', 'ञ', 'ट', 'ठ', 'ड', 'ढ', 'ण', 'त', 'थ', 'द', 'ध', 'न', 'प', 'फ', 'ब', 'भ', 'म', 'य', 'र', 'ल', 'ळ', 'व', 'श', 'ष', 'स', 'ह', '़', 'ऽ', 'ा', 'ि', 'ी', 'ु', 'ू', 'ृ', 'ॅ', 'े', 'ै', 'ॉ', 'ो', 'ौ', '्']
Target Mapping {0: '@', 1: '$', 2: '!', 3: '%', 4: 'ँ', 5: 'ं', 6: 'ः', 7: 'अ', 8: 'आ', 9: 'इ', 10: 'ई', 11: 'उ', 12: 'ऊ', 13: 'ऋ', 14: 'ए', 15: 'ऐ', 16: 'ऑ', 17: 'ओ', 18: 'औ',

In [350]:
class TransliterateDataset(Dataset):
    def __init__(self, x_data, y_data, src_lang : Language, tar_lang : Language):
        self.x_data = x_data
        self.y_data = y_data
        self.src_lang = src_lang
        self.tar_lang = tar_lang
        
    def __len__(self):
        return len(self.y_data)

    def __getitem__(self, idx):
        x_enc, y_enc = self.x_data[idx], self.y_data[idx]
        x_enc = self.src_lang.convert_to_numbers(x_enc)
        y_enc = self.tar_lang.convert_to_numbers(y_enc) 
        return torch.tensor(x_enc, dtype=int), torch.tensor(y_enc,dtype=int), self.y_data[idx]

class CollationFunction:
    def __init__(self, src_lang : Language, tar_lang : Language):
        self.src_lang = src_lang
        self.tar_lang = tar_lang
    
    def __call__(self, batch):
        # sorting is to save encoder computation. 
        # reasoning : https://stackoverflow.com/questions/51030782/why-do-we-pack-the-sequences-in-pytorch
        batch.sort(key = lambda x : len(x[0]), reverse=True)
        src, tar, tar_words = zip(*batch)
        src_lens = torch.tensor([len(x) for x in src], dtype=int)
        src = nn.utils.rnn.pad_sequence(list(src), batch_first=True, padding_value=self.src_lang.get_index(PAD_SYM))
        tar = nn.utils.rnn.pad_sequence(list(tar), batch_first=True, padding_value=self.tar_lang.get_index(PAD_SYM))
        return src, tar, src_lens, tar_words

## Encoder Network

In [407]:
class EncoderNet(nn.Module):
    def __init__(self, vocab_size, embed_size, num_layers, hid_size, cell_type, 
                 bidirect=False, dropout=0):
        super(EncoderNet, self).__init__()
        self.hidden_size = hid_size
        self.embed_size = embed_size
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.dropout = nn.Dropout(p=dropout)
        self.num_layers = num_layers

        # we create the required architecture using the received parameters
        kwargs = {'input_size':embed_size, 'hidden_size':hid_size, 'num_layers':num_layers, 
                 'bidirectional':bidirect, 'batch_first':True}
        if num_layers > 1:
            kwargs['dropout'] = dropout
        if cell_type == 'RNN':
            self.network = nn.RNN(**kwargs)
        elif cell_type == 'LSTM':
            self.network = nn.LSTM(**kwargs)
        else:
            self.network = nn.GRU(**kwargs)
        
        self.cell_type = cell_type
        self.bidirect = bidirect

        # for combining the final layer's forward and reverse directions' final hidden state
        if (self.bidirect):
            self.combine_forward_backward = nn.Linear(2 * hid_size, hid_size)

    def forward(self, batch_x, batch_lens):
        batch_x = self.embedding(batch_x)
        batch_x = self.dropout(batch_x)
        packed_batch_x = nn.utils.rnn.pack_padded_sequence(batch_x, lengths=batch_lens, batch_first=True, 
                                                           enforce_sorted=True)
        if self.cell_type == 'LSTM':
            packed_outputs, (hidden_outputs, _) = self.network(packed_batch_x,)
        else:
            packed_outputs, hidden_outputs = self.network(packed_batch_x)
        outputs, _ = nn.utils.rnn.pad_packed_sequence(packed_outputs, batch_first=True)
        if self.bidirect:
            # remember 2nd dim in hidden_outputs; so, we have to concatenate forward and backward
            # final hidden states along **1st dimension**
            concat_hidden_state = torch.cat((hidden_outputs[-2,:,:], hidden_outputs[-1,:,:]), dim=1)
            hidden_state = self.combine_forward_backward(concat_hidden_state)
            hidden_state = torch.tanh(hidden_state)
        else:
            hidden_state = hidden_outputs[-1, :, :]
        # hidden_state = (batch_size, hid_size); outputs = (batch_size, max_seq_len_batch, D * hid_size)
        # d = 2 if bidirectional; else d = 1
        return outputs, hidden_state

## Decoder Network


In [408]:
class DecoderNet(nn.Module):
    def __init__(self, vocab_size, embed_size, num_layers, hid_size, cell_type, 
                 dropout=0):
        super(DecoderNet, self).__init__()
        self.hidden_size = hid_size
        self.embed_size = embed_size
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.dropout = nn.Dropout(p=dropout)
        self.num_layers = num_layers
        self.vocab_size = vocab_size

       # we create the required architecture using the received parameters
        kwargs = {'input_size':embed_size, 'hidden_size':hid_size, 'num_layers':num_layers, 
                 'batch_first':True}
        if num_layers > 1:
            kwargs['dropout'] = dropout
        if cell_type == 'RNN':
            self.network = nn.RNN(**kwargs)
        elif cell_type == 'LSTM':
            self.network = nn.LSTM(**kwargs)
        else:
            self.network = nn.GRU(**kwargs)

        self.cell_type = cell_type
        self.out_layer = nn.Linear(hid_size, vocab_size)

    # will always go 1 step forward in time (seqlen = L = 1)
    # previous decoder state shape = [num_layers, batch_size, hid_size]
    def forward(self, batch_y, prev_decoder_state):
        batch_y = batch_y.unsqueeze(1) # batch_size is first dim
        batch_y = self.embedding(batch_y)
        batch_y = self.dropout(batch_y)
        if self.cell_type == 'LSTM':
            decoder_hidden_state, decoder_cell_state = prev_decoder_state
            outputs, (decoder_hidden_state, decoder_cell_state) = self.network(batch_y, (decoder_hidden_state, decoder_cell_state))
        else:
            outputs, decoder_hidden_state = self.network(batch_y, prev_decoder_state)
        
        outputs = outputs.squeeze(1) # remove seqlen dimension
        logits = self.out_layer(outputs)
        if self.cell_type == 'LSTM':
            return logits, (decoder_hidden_state, decoder_cell_state)
        else:
            return logits, decoder_hidden_state

## Seq2Seq(Encoder-Decoder) Model

In [409]:
# explore changing teacher forcing ratio to something epoch-based as sir suggested
class EncoderDecoder(nn.Module):
    def __init__(self, encoder :EncoderNet, decoder : DecoderNet, src_lang, tar_lang) -> None:
        super(EncoderDecoder, self).__init__()
        self.enc_model = encoder
        self.dec_model = decoder
        self.src_lang = src_lang
        self.tar_lang = tar_lang
        self.cell_type = self.dec_model.cell_type
    
    def forward(self, batch_X, batch_Y, X_lens, tf_ratio):
        batch_size = batch_X.size(0)
        _, final_enc_hidden_state = self.enc_model(batch_X, X_lens)
        num_dec_layers = self.dec_model.num_layers
        decoder_state = torch.stack([final_enc_hidden_state for _ in range(num_dec_layers)], dim=0)
        # we will feed the encoder output into each decoder layer's initial hidden state
        # hidden_dec = (num_layers, batch_size, hid_size)

        tarlength = batch_Y.size(1)
        outlogits = torch.zeros(batch_size, tarlength, self.dec_model.vocab_size).to(device)
        preds = torch.zeros(batch_size, tarlength).to(device)
        dec_input = batch_Y[:,0]

        if (self.cell_type == 'LSTM'):
            init_dec_cell_state = torch.stack([torch.zeros_like(final_enc_hidden_state) for _ in range(num_dec_layers)], dim=0).to(device)
            decoder_state = (decoder_state, init_dec_cell_state)
        
        for tstep in range(1, tarlength):
            curlogits, decoder_state = self.dec_model(dec_input, decoder_state)
            tf_force_input = batch_Y[:, tstep]
            pred = torch.argmax(curlogits, dim=1).to(device)
            dec_input = pred
            if tf_ratio != None:
                rand_num = torch.randn(1)[0]
                if rand_num <= tf_ratio:
                    dec_input = tf_force_input
            outlogits[:, tstep, :] = curlogits 
            preds[:, tstep] = pred

        return outlogits, preds 

## Train/Evaluation/Inference Class

In [410]:
class Runner():
    def __init__(self, src_lang : Language, tar_lang : Language, common_embed_size, num_enc_layers, num_dec_layers, 
                 common_hidden_size, common_cell_type, enc_bidirect, dropout, opt_name='Adam',
                 learning_rate=1e-3):
        self.src_lang = src_lang
        self.tar_lang = tar_lang
        # create all the sub-networks and the main model
        self.encoder = EncoderNet(vocab_size=src_lang.get_size(), embed_size=common_embed_size,
                             num_layers=num_enc_layers, hid_size=common_hidden_size,
                             cell_type= common_cell_type, bidirect=enc_bidirect, dropout=dropout)
        self.decoder = DecoderNet(vocab_size=tar_lang.get_size(), embed_size=common_embed_size,
                             num_layers=num_dec_layers, hid_size=common_hidden_size,
                             cell_type=common_cell_type, dropout=dropout)
        self.model = EncoderDecoder(encoder=self.encoder, decoder=self.decoder, src_lang=src_lang, 
                                    tar_lang=tar_lang)
        # move model to the torch device
        self.model.to(device)
        # for reproducibility - seed everything with 42
        torch.manual_seed(42); torch.cuda.manual_seed(42); np.random.seed(42); random.seed(42)
        self.model.apply(self.init_weights) # initialize model weights
        self.trainLoader, self.validLoader, self.testLoader = None, None, None
        self.optimizer = None
        self.loss_criterion = nn.CrossEntropyLoss(ignore_index=tar_lang.sym2index[PAD_SYM])
        if opt_name == 'Adam':
            self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)

    @staticmethod
    def init_weights(m):
        for name, param in m.named_parameters():
            if 'weight' in name:
                nn.init.normal_(param.data, mean=0, std=0.05)
            else:
                nn.init.constant_(param.data, 0)
    
    def generate_data_loaders(self, data_X, data_y, batch_size):
        dataset = TransliterateDataset(data_X, data_y, src_lang=SRC_LANG, tar_lang=TAR_LANG)
        dataloader = DataLoader(dataset=dataset, batch_size=batch_size,
                                collate_fn=CollationFunction(SRC_LANG, TAR_LANG))
        return dataloader

    def make_all_loaders(self, train_data, valid_data, test_data, batch_size):
        train_X, train_y = train_data
        valid_X, valid_y = valid_data
        test_X, test_y = test_data

        self.trainLoader = self.generate_data_loaders(train_X, train_y, batch_size)
        self.validLoader = self.generate_data_loaders(valid_X, valid_y, batch_size)
        self.testLoader = self.generate_data_loaders(test_X, test_y, batch_size)

    def get_accuracy(self, pred_words, tar_words):
        assert(len(pred_words) == len(tar_words))
        count = 0
        for i in range(len(pred_words)):
            if pred_words[i] == tar_words[i]:
                count += 1
        return count / len(pred_words)

    def train_one_epoch(self, epoch_number, tf_ratio=0.6):
        assert(self.trainLoader != None); assert(self.optimizer != None)

        # set model in training mode for autograd to be activated
        self.model.train(); self.optimizer.zero_grad()
        train_loss = 0.0
        pred_words, true_words = [], []
        with tqdm(self.trainLoader, unit='batch') as tqdmLoader:
            for batch_X, batch_y, X_lens, y_words in tqdmLoader:
                tqdmLoader.set_description(f'Epoch {epoch_number}')
                batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                
                # get the logits, preds for the current batch
                logits, preds = self.model(batch_X, batch_y, X_lens, tf_ratio=tf_ratio)
                # ignore loss for the first time step
                targets = batch_y[:, 1:]; logits = logits[:, 1:, :]
                logits = logits.swapaxes(1, 2) # make class logits the second dimension as needed
                loss = self.loss_criterion(logits, targets)
                loss.backward()
                self.optimizer.step(); self.optimizer.zero_grad()
                train_loss += loss.item()
                batch_pred_words = self.tar_lang.convert_to_words(preds.cpu().numpy())
                tqdmLoader.set_postfix(loss=loss.item())
                true_words += y_words
                pred_words += batch_pred_words
        train_loss /= len(self.trainLoader)
        train_acc = self.get_accuracy(pred_words, true_words)
        print(f'Train Loss = {train_loss}; Train Accuracy = {train_acc * 100}')
        return pred_words, true_words # return train loss and accuracy

    def evaluate(self, ):
        assert(self.validLoader != None)

    def inference(self, ):
        pass
    
    def beam_search_inference(self, ):
        pass

In [439]:
# testing runner

runner = Runner(SRC_LANG, TAR_LANG, 64, 1, 1, 256, 'LSTM', False, 0.3)

In [440]:
runner.make_all_loaders((x_train, y_train), (x_valid, y_valid), (x_test, y_test), 128)

In [441]:
for i in range(25):
    p, w = runner.train_one_epoch(i, 0.7)
    for x, y in list(zip(p,w))[:10]:
        print(x,y)

Epoch 0: 100%|██████████| 400/400 [00:13<00:00, 28.79batch/s, loss=2.77]


Train Loss = 3.0325488102436067; Train Accuracy = 0.0
छॅशःूफऽओलऔईञईिईईई देशभरामध्ये
भँधधञॅबबऽऋईऋऽखखईख पेशनधारियों
षँईऑऑंनवञळओमओशँँख अनुक्रमानुपात
षऔञऔऋञ़ञञगःःः@ेँई वाहिन्यांसाठी
दषईचटॉअबऽूूुञुषषख लोकरचनाओं
भईवःःऋछधऔटऋटववबईझ ट्रांसक्रिप्टेज़
भॉूदटऽऽटटजऋऋचअईईख गंतिविधियों
भःःःःःओझनयःयईईईईँ शासनाबद्दल
भःनईधनछय़नदईढढईखख सर्वसंग्रह
गमददषअईऋइसओफईईईईई तुमच्यापैकी


Epoch 1: 100%|██████████| 400/400 [00:13<00:00, 28.80batch/s, loss=2.82]


Train Loss = 2.7656224447488786; Train Accuracy = 0.0
स्र््राा्या देशभरामध्ये
स्र््रा्यां पेशनधारियों
स््र्र्ार्र अनुक्रमानुपात
स्रायारां वाहिन्यांसाठी
स्र््रारं लोकरचनाओं
स्र्र्र्रायाय ट्रांसक्रिप्टेज़
स्ग्यार्यां गंतिविधियों
स्र््रा्य शासनाबद्दल
स््रार्ाय सर्वसंग्रह
स्र््यार्ं तुमच्यापैकी


Epoch 2: 100%|██████████| 400/400 [00:13<00:00, 28.67batch/s, loss=2.29]


Train Loss = 2.47019292473793; Train Accuracy = 0.0
कार्यायांया देशभरामध्ये
स्र्यारारां पेशनधारियों
अं्र्यांदार्र अनुक्रमानुपात
किरार्यांग् वाहिन्यांसाठी
मांानाार लोकरचनाओं
प्र्सस्रि्स्र ट्रांसक्रिप्टेज़
अुद्रात्रां गंतिविधियों
सार्ययारा शासनाबद्दल
स््या्त्य सर्वसंग्रह
सिर्ययांगं तुमच्यापैकी


Epoch 3: 100%|██████████| 400/400 [00:13<00:00, 29.10batch/s, loss=1.89]


Train Loss = 2.1450579410791395; Train Accuracy = 0.000625
चिश्ा्ंायया देशभरामध्ये
प्र्ा्व्या पेशनधारियों
अन्यायााप्तरर अनुक्रमानुपात
वायाय्यापत्र वाहिन्यांसाठी
मिल्ााा लोकरचनाओं
क्र्सस््टस््टस ट्रांसक्रिप्टेज़
बित्वाय्या गंतिविधियों
तिर्ांा्य शासनाबद्दल
स््तस्धा सर्वसंग्रह
शिर्ायास्ं तुमच्यापैकी


Epoch 4: 100%|██████████| 400/400 [00:13<00:00, 28.62batch/s, loss=1.5] 


Train Loss = 1.8139053601026536; Train Accuracy = 0.0055078125
चिश्ा्याी देशभरामध्ये
प्श्ािनीयों पेशनधारियों
अर्र्रांप्र्र अनुक्रमानुपात
वहहियायाश्ी वाहिन्यांसाठी
लाकााा्यं लोकरचनाओं
ट्रेइस्टटिट्ट ट्रांसक्रिप्टेज़
गरत्लायायां गंतिविधियों
शिर्भभाचया शासनाबद्दल
सर्वस्च्या सर्वसंग्रह
श्द्ियातोंी तुमच्यापैकी


Epoch 5: 100%|██████████| 400/400 [00:14<00:00, 28.23batch/s, loss=1.19]


Train Loss = 1.5165264144539834; Train Accuracy = 0.02517578125
दिश्ानंाीया देशभरामध्ये
पिच्ायंायां पेशनधारियों
अनुकुर्ाप्पुरर अनुक्रमानुपात
वहहनन्रासचा वाहिन्यांसाठी
लोकाायांं लोकरचनाओं
ट्रेनस््रिप्र ट्रांसक्रिप्टेज़
गात्ंगधायां गंतिविधियों
शार््दा्यी शासनाबद्दल
सररवस्द्र सर्वसंग्रह
तुमाासापीनी तुमच्यापैकी


Epoch 6: 100%|██████████| 400/400 [00:14<00:00, 28.21batch/s, loss=1.11] 


Train Loss = 1.3017342576384545; Train Accuracy = 0.05568359375
दिच्ा्चच्या देशभरामध्ये
पेशााारीयों पेशनधारियों
अनुकररापु्पार अनुक्रमानुपात
वहििनियासत्ठी वाहिन्यांसाठी
मोकााााओं लोकरचनाओं
ट्रानसपप््पा ट्रांसक्रिप्टेज़
गारिनिकायों गंतिविधियों
शस्मुगु्यी शासनाबद्दल
सरववसंस्र सर्वसंग्रह
तुम्ायापीकी तुमच्यापैकी


Epoch 7: 100%|██████████| 400/400 [00:14<00:00, 27.88batch/s, loss=1.07] 


Train Loss = 1.1268962325155736; Train Accuracy = 0.08427734375
दिश्ा्गधीये देशभरामध्ये
पेश््ारीयों पेशनधारियों
अनुक्नमपपपपतत अनुक्रमानुपात
वहििन्यासस्ठी वाहिन्यांसाठी
लोकााननंं लोकरचनाओं
ट्रांसपपरपप्टा ट्रांसक्रिप्टेज़
गारिरियियों गंतिविधियों
शसममगदद्या शासनाबद्दल
सरववसंग्रह सर्वसंग्रह
तुम्ियाप्कीी तुमच्यापैकी


Epoch 8: 100%|██████████| 400/400 [00:15<00:00, 26.42batch/s, loss=0.766]


Train Loss = 1.0017341522872447; Train Accuracy = 0.1105859375
देश्ार्ध्या देशभरामध्ये
पिश््धिियोंी पेशनधारियों
अनुकुनाुपुरतत अनुक्रमानुपात
वहहिन्यास्ाठी वाहिन्यांसाठी
लोकरचाननं लोकरचनाओं
ट्रानस््रिप्पिस ट्रांसक्रिप्टेज़
गातिविधियों गंतिविधियों
शासाबंद् शासनाबद्दल
सरववसंगररह सर्वसंग्रह
तमम्ियातीी तुमच्यापैकी


Epoch 9: 100%|██████████| 400/400 [00:14<00:00, 28.50batch/s, loss=0.817]


Train Loss = 0.9176895959675312; Train Accuracy = 0.13095703125
देश्रामाकयेी देशभरामध्ये
पेश्धारियों पेशनधारियों
अंुकररमनपपपतत अनुक्रमानुपात
वहहिन्यांसाठी वाहिन्यांसाठी
लोकराननओं लोकरचनाओं
ट्रांस््रेप्टिशन ट्रांसक्रिप्टेज़
गातिविधियों गंतिविधियों
शससंबदद्दल शासनाबद्दल
सरवंसंगररह सर्वसंग्रह
तुम््यापिकीी तुमच्यापैकी


Epoch 10: 100%|██████████| 400/400 [00:14<00:00, 27.75batch/s, loss=0.728]


Train Loss = 0.8493151672184467; Train Accuracy = 0.15138671875
देश्भहभध्या देशभरामध्ये
पेश्नारायों पेशनधारियों
अन्कुरूुपपुततत अनुक्रमानुपात
वहहिन्यासस्ठी वाहिन्यांसाठी
लोकरचनाओं लोकरचनाओं
ट्रांस््रेप्टे ट्रांसक्रिप्टेज़
गातिविधायों गंतिविधियों
शससमबबद्दल शासनाबद्दल
सर्वसंगर्ह सर्वसंग्रह
तुम्भयापककी तुमच्यापैकी


Epoch 11:  96%|█████████▌| 382/400 [00:14<00:00, 27.25batch/s, loss=0.743]


KeyboardInterrupt: 