In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import math
import copy
import os
import zipfile
import requests
from tqdm import tqdm
import pandas as pd
import torch
import numpy as np
from transformers import AutoTokenizer
from sklearn.model_selection import train_test_split
import pandas as pd
from torch.utils.data import DataLoader, TensorDataset

In [2]:
class MultiHeadAttention(nn.Module):
    def __init__(self, d_model, num_heads):
        super(MultiHeadAttention, self).__init__()
        # Ensure that the model dimension (d_model) is divisible by the number of heads
        assert d_model % num_heads == 0, "d_model must be divisible by num_heads"
        
        # Initialize dimensions
        self.d_model = d_model # Model's dimension
        self.num_heads = num_heads # Number of attention heads
        self.d_k = d_model // num_heads # Dimension of each head's key, query, and value
        
        # Linear layers for transforming inputs
        self.W_q = nn.Linear(d_model, d_model) # Query transformation
        self.W_k = nn.Linear(d_model, d_model) # Key transformation
        self.W_v = nn.Linear(d_model, d_model) # Value transformation
        self.W_o = nn.Linear(d_model, d_model) # Output transformation
        
    def scaled_dot_product_attention(self, Q, K, V, mask=None):
        # Calculate attention scores
        attn_scores = torch.matmul(Q, K.transpose(-2, -1)) / math.sqrt(self.d_k)
        
        # Apply mask if provided (useful for preventing attention to certain parts like padding)
        if mask is not None:
            attn_scores = attn_scores.masked_fill(mask == 0, -1e9)
        
        # Softmax is applied to obtain attention probabilities
        attn_probs = torch.softmax(attn_scores, dim=-1)
        
        # Multiply by values to obtain the final output
        output = torch.matmul(attn_probs, V)
        return output
        
    def split_heads(self, x):
        # Reshape the input to have num_heads for multi-head attention
        batch_size, seq_length, d_model = x.size()
        return x.view(batch_size, seq_length, self.num_heads, self.d_k).transpose(1, 2)
        
    def combine_heads(self, x):
        # Combine the multiple heads back to original shape
        batch_size, _, seq_length, d_k = x.size()
        return x.transpose(1, 2).contiguous().view(batch_size, seq_length, self.d_model)
        
    def forward(self, Q, K, V, mask=None):
        # Apply linear transformations and split heads
        Q = self.split_heads(self.W_q(Q))
        K = self.split_heads(self.W_k(K))
        V = self.split_heads(self.W_v(V))
        
        # Perform scaled dot-product attention
        attn_output = self.scaled_dot_product_attention(Q, K, V, mask)
        
        # Combine heads and apply output transformation
        output = self.W_o(self.combine_heads(attn_output))
        return output

In [3]:
class PositionWiseFeedForward(nn.Module):
    def __init__(self, d_model, d_ff):
        super(PositionWiseFeedForward, self).__init__()
        self.fc1 = nn.Linear(d_model, d_ff)
        self.fc2 = nn.Linear(d_ff, d_model)
        self.relu = nn.ReLU()

    def forward(self, x):
        return self.fc2(self.relu(self.fc1(x)))

In [4]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_seq_length):
        super(PositionalEncoding, self).__init__()
        
        pe = torch.zeros(max_seq_length, d_model)
        position = torch.arange(0, max_seq_length, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model))
        
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        
        self.register_buffer('pe', pe.unsqueeze(0))
        
    def forward(self, x):
        return x + self.pe[:, :x.size(1)]

In [5]:
class EncoderLayer(nn.Module):
    def __init__(self, d_model, num_heads, d_ff, dropout):
        super(EncoderLayer, self).__init__()
        self.self_attn = MultiHeadAttention(d_model, num_heads)
        self.feed_forward = PositionWiseFeedForward(d_model, d_ff)
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x, mask):
        attn_output = self.self_attn(x, x, x, mask)
        x = self.norm1(x + self.dropout(attn_output))
        ff_output = self.feed_forward(x)
        x = self.norm2(x + self.dropout(ff_output))
        return x

In [6]:
class DecoderLayer(nn.Module):
    def __init__(self, d_model, num_heads, d_ff, dropout):
        super(DecoderLayer, self).__init__()
        self.self_attn = MultiHeadAttention(d_model, num_heads)
        self.cross_attn = MultiHeadAttention(d_model, num_heads)
        self.feed_forward = PositionWiseFeedForward(d_model, d_ff)
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.norm3 = nn.LayerNorm(d_model)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x, enc_output, src_mask, tgt_mask):
        attn_output = self.self_attn(x, x, x, tgt_mask)
        x = self.norm1(x + self.dropout(attn_output))
        attn_output = self.cross_attn(x, enc_output, enc_output, src_mask)
        x = self.norm2(x + self.dropout(attn_output))
        ff_output = self.feed_forward(x)
        x = self.norm3(x + self.dropout(ff_output))
        return x

In [7]:
class Transformer(nn.Module):
    def __init__(self, src_vocab_size, tgt_vocab_size, d_model, num_heads, num_layers, d_ff, max_seq_length, dropout):
        super(Transformer, self).__init__()
        self.encoder_embedding = nn.Embedding(src_vocab_size, d_model)
        self.decoder_embedding = nn.Embedding(tgt_vocab_size, d_model)
        self.positional_encoding = PositionalEncoding(d_model, max_seq_length)

        self.encoder_layers = nn.ModuleList([EncoderLayer(d_model, num_heads, d_ff, dropout) for _ in range(num_layers)])
        self.decoder_layers = nn.ModuleList([DecoderLayer(d_model, num_heads, d_ff, dropout) for _ in range(num_layers)])

        self.fc = nn.Linear(d_model, tgt_vocab_size)
        self.dropout = nn.Dropout(dropout)

    def generate_mask(self, src, tgt):
        src_mask = (src != 0).unsqueeze(1).unsqueeze(2)
        tgt_mask = (tgt != 0).unsqueeze(1).unsqueeze(3)
        seq_length = tgt.size(1)
        nopeak_mask = (1 - torch.triu(torch.ones(1, seq_length, seq_length), diagonal=1)).bool()
#         print (tgt_mask.shape)
#         print (nopeak_mask.shape)
        tgt_mask = tgt_mask & nopeak_mask

        return src_mask, tgt_mask

    def forward(self, src, tgt):
        src_mask, tgt_mask = self.generate_mask(src, tgt)
        src_embedded = self.dropout(self.positional_encoding(self.encoder_embedding(src)))
        tgt_embedded = self.dropout(self.positional_encoding(self.decoder_embedding(tgt)))

        enc_output = src_embedded
        for enc_layer in self.encoder_layers:
            enc_output = enc_layer(enc_output, src_mask)

        dec_output = tgt_embedded
        for dec_layer in self.decoder_layers:
            dec_output = dec_layer(dec_output, enc_output, src_mask, tgt_mask)

        output = self.fc(dec_output)
        return output

In [8]:
src_vocab_size = 32000
tgt_vocab_size = 32000
d_model = 512
num_heads = 8
num_layers = 6
d_ff = 2048
max_seq_length = 20
dropout = 0.1

transformer = Transformer(src_vocab_size, tgt_vocab_size, d_model, num_heads, num_layers, d_ff, max_seq_length, dropout)


In [9]:
## Read training data

# Constants
DOWNLOAD_URL = "https://opus.nlpl.eu/download.php?f=OpenSubtitles/v2018/moses/en-es.txt.zip"
EXTRACT_DIR = "/Users/fangzhang/Desktop/build_transformer/en-es.txt"
ZIP_FILE = "en-es.txt.zip"
OUTPUT_FILE = "opensubtitles_en_es.tsv"

# Step 1: Read and clean the sentence pairs

src_file_path = os.path.join(EXTRACT_DIR, "OpenSubtitles.en-es.en")
tgt_file_path = os.path.join(EXTRACT_DIR, "OpenSubtitles.en-es.es")

print("Processing sentence pairs...")
with open(src_file_path, 'r', encoding='utf-8') as src_file, \
     open(tgt_file_path, 'r', encoding='utf-8') as tgt_file, \
     open(OUTPUT_FILE, 'w', encoding='utf-8') as out_file:
    for src_line, tgt_line in tqdm(zip(src_file, tgt_file), desc="Cleaning", unit="lines"):
        src_line = src_line.strip()
        tgt_line = tgt_line.strip()
        if src_line and tgt_line and len(src_line.split()) < 50 and len(tgt_line.split()) < 50:
            out_file.write(f"{src_line}\t{tgt_line}\n")

print(f"\nSaved cleaned sentence pairs to {OUTPUT_FILE} ✅")


# Step 2: Load the TSV file with error handling
df = pd.read_csv(
    'opensubtitles_en_es.tsv',
    sep='\t',
    header=None,
    names=['en', 'es'],
    engine='python',
    on_bad_lines='warn'  # Skips bad lines and warns about them
)

# Step 3: processing
# Drop rows with null values
df.dropna(inplace=True)

# Filter sentence pairs by length
df = df[df['en'].str.split().str.len().between(3, 50)]
df = df[df['es'].str.split().str.len().between(3, 50)]

# Remove duplicate pairs
df.drop_duplicates(inplace=True)

# Reset index
df.reset_index(drop=True, inplace=True)


# Step 4: Initialize tokenizers for English and Spanish
tokenizer_en = AutoTokenizer.from_pretrained('bert-base-uncased')
tokenizer_es = AutoTokenizer.from_pretrained('dccuchile/bert-base-spanish-wwm-cased')

# Tokenize the sentences
df['en_tokens'] = df['en'].apply(lambda x: tokenizer_en.encode(x, add_special_tokens=True))
df['es_tokens'] = df['es'].apply(lambda x: tokenizer_es.encode(x, add_special_tokens=True))



# Step 5: Split into training and temp (which will be further split into validation and test)
train_df, temp_df = train_test_split(df, test_size=0.2, random_state=42)

# Split temp into validation and test
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)



# Example pandas Series
# Each value is a list of integers representing tokens

# Parameters
max_len = 20
pad_token = 0  # You can choose any value as the padding token

# Step 6: convert to tensors

def gen_tensor(s):

    # Pad or truncate each list
    padded = s.apply(lambda x: (x + [pad_token]*max_len)[:max_len])

    # Convert to tensor
    data = torch.tensor(padded.tolist(), dtype=torch.long)
    return data

src_data = gen_tensor(train_df['en_tokens'])
tgt_data = gen_tensor(train_df['es_tokens'])

src_test = gen_tensor(test_df['en_tokens'])
tgt_test = gen_tensor(test_df['es_tokens'])



Skipping line 589: '	' expected after '"'
Skipping line 1782: '	' expected after '"'
Skipping line 2129: '	' expected after '"'
Skipping line 2352: '	' expected after '"'
Skipping line 2353: '	' expected after '"'
Skipping line 2354: '	' expected after '"'
Skipping line 2580: '	' expected after '"'
Skipping line 2882: '	' expected after '"'
Skipping line 2883: '	' expected after '"'
Skipping line 2908: '	' expected after '"'
Skipping line 2910: '	' expected after '"'
Skipping line 2911: '	' expected after '"'
Skipping line 3355: '	' expected after '"'
Skipping line 3703: '	' expected after '"'
Skipping line 4442: '	' expected after '"'
Skipping line 4696: '	' expected after '"'
Skipping line 4758: '	' expected after '"'
Skipping line 4759: '	' expected after '"'
Skipping line 4760: '	' expected after '"'
Skipping line 4841: '	' expected after '"'
Skipping line 4842: '	' expected after '"'
Skipping line 4843: '	' expected after '"'
Skipping line 5357: '	' expected after '"'
Skipping lin

Skipping line 179920: '	' expected after '"'
Skipping line 180137: '	' expected after '"'
Skipping line 180326: '	' expected after '"'
Skipping line 181193: '	' expected after '"'
Skipping line 181194: '	' expected after '"'
Skipping line 181195: '	' expected after '"'
Skipping line 181200: '	' expected after '"'
Skipping line 181201: '	' expected after '"'
Skipping line 181202: '	' expected after '"'
Skipping line 181203: '	' expected after '"'
Skipping line 181206: '	' expected after '"'
Skipping line 181207: '	' expected after '"'
Skipping line 181208: '	' expected after '"'
Skipping line 181209: '	' expected after '"'
Skipping line 181232: '	' expected after '"'
Skipping line 181233: '	' expected after '"'
Skipping line 181604: '	' expected after '"'
Skipping line 181829: '	' expected after '"'
Skipping line 181973: '	' expected after '"'
Skipping line 182465: '	' expected after '"'
Skipping line 182497: '	' expected after '"'
Skipping line 182501: '	' expected after '"'
Skipping l

Skipping line 443702: '	' expected after '"'
Skipping line 444033: '	' expected after '"'
Skipping line 444248: '	' expected after '"'
Skipping line 444422: '	' expected after '"'
Skipping line 444425: '	' expected after '"'
Skipping line 444445: '	' expected after '"'
Skipping line 444843: '	' expected after '"'
Skipping line 445456: '	' expected after '"'
Skipping line 445468: '	' expected after '"'
Skipping line 446301: '	' expected after '"'
Skipping line 446532: '	' expected after '"'
Skipping line 446841: '	' expected after '"'
Skipping line 446912: '	' expected after '"'
Skipping line 446971: '	' expected after '"'
Skipping line 447784: '	' expected after '"'
Skipping line 449549: '	' expected after '"'
Skipping line 449550: '	' expected after '"'
Skipping line 449704: '	' expected after '"'
Skipping line 449889: '	' expected after '"'
Skipping line 449902: '	' expected after '"'
Skipping line 449903: '	' expected after '"'
Skipping line 449904: '	' expected after '"'
Skipping l

In [14]:
src_data.shape

torch.Size([284734, 20])

In [89]:
transformer = Transformer(src_vocab_size, tgt_vocab_size, d_model, num_heads, num_layers, d_ff, max_seq_length, dropout)

In [10]:
criterion = nn.CrossEntropyLoss(ignore_index=0)
optimizer = optim.Adam(transformer.parameters(), lr=0.0001, betas=(0.9, 0.98), eps=1e-9)

transformer.train()

Transformer(
  (encoder_embedding): Embedding(32000, 512)
  (decoder_embedding): Embedding(32000, 512)
  (positional_encoding): PositionalEncoding()
  (encoder_layers): ModuleList(
    (0-5): 6 x EncoderLayer(
      (self_attn): MultiHeadAttention(
        (W_q): Linear(in_features=512, out_features=512, bias=True)
        (W_k): Linear(in_features=512, out_features=512, bias=True)
        (W_v): Linear(in_features=512, out_features=512, bias=True)
        (W_o): Linear(in_features=512, out_features=512, bias=True)
      )
      (feed_forward): PositionWiseFeedForward(
        (fc1): Linear(in_features=512, out_features=2048, bias=True)
        (fc2): Linear(in_features=2048, out_features=512, bias=True)
        (relu): ReLU()
      )
      (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
      (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
  )
  (decoder_layers): ModuleList(
    (0-5): 6 x DecoderLayer(

In [16]:



dataset = TensorDataset(src_data, tgt_data)

# Create DataLoader with batching
loader = DataLoader(dataset, batch_size=64, shuffle=True)

# Usage in training loop
for batch in loader:
    src_batch, tgt_batch = batch  # both are shape (batch_size, seq_len)
    # Feed into transformer model here
    
    for epoch in range(2):
        optimizer.zero_grad()
        output = transformer(src_batch, tgt_batch[:, :-1])
        loss = criterion(output.contiguous().view(-1, tgt_vocab_size), tgt_batch[:, 1:].contiguous().view(-1))
        loss.backward()
        optimizer.step()
        print(f"Epoch: {epoch+1}, Loss: {loss.item()}")


Epoch: 1, Loss: 7.527031421661377
Epoch: 2, Loss: 7.415209770202637
Epoch: 1, Loss: 7.341157913208008
Epoch: 2, Loss: 7.220810413360596
Epoch: 1, Loss: 7.134400844573975
Epoch: 2, Loss: 7.037831783294678
Epoch: 1, Loss: 6.976173400878906
Epoch: 2, Loss: 6.912863731384277
Epoch: 1, Loss: 6.910312652587891
Epoch: 2, Loss: 6.8360209465026855
Epoch: 1, Loss: 7.260377407073975
Epoch: 2, Loss: 7.207149982452393
Epoch: 1, Loss: 6.904798984527588
Epoch: 2, Loss: 6.843395233154297
Epoch: 1, Loss: 6.905842304229736
Epoch: 2, Loss: 6.85593843460083
Epoch: 1, Loss: 6.582320213317871
Epoch: 2, Loss: 6.522158145904541
Epoch: 1, Loss: 6.829554080963135
Epoch: 2, Loss: 6.777528285980225
Epoch: 1, Loss: 6.763797283172607
Epoch: 2, Loss: 6.6898512840271
Epoch: 1, Loss: 6.627096176147461
Epoch: 2, Loss: 6.544992923736572
Epoch: 1, Loss: 6.64716911315918
Epoch: 2, Loss: 6.609177112579346
Epoch: 1, Loss: 6.538753509521484
Epoch: 2, Loss: 6.488523960113525
Epoch: 1, Loss: 6.682265281677246
Epoch: 2, Loss: 6

Epoch: 1, Loss: 5.289556980133057
Epoch: 2, Loss: 5.221793174743652
Epoch: 1, Loss: 5.270346164703369
Epoch: 2, Loss: 5.23259973526001
Epoch: 1, Loss: 5.708643436431885
Epoch: 2, Loss: 5.644121170043945
Epoch: 1, Loss: 5.420557498931885
Epoch: 2, Loss: 5.380421161651611
Epoch: 1, Loss: 5.38323450088501
Epoch: 2, Loss: 5.323607921600342
Epoch: 1, Loss: 5.6000752449035645
Epoch: 2, Loss: 5.534358024597168
Epoch: 1, Loss: 5.094754695892334
Epoch: 2, Loss: 5.0509514808654785
Epoch: 1, Loss: 5.33390998840332
Epoch: 2, Loss: 5.31030797958374
Epoch: 1, Loss: 5.346582412719727
Epoch: 2, Loss: 5.284322738647461
Epoch: 1, Loss: 5.3942790031433105
Epoch: 2, Loss: 5.387412071228027
Epoch: 1, Loss: 5.200820446014404
Epoch: 2, Loss: 5.140238285064697
Epoch: 1, Loss: 5.432964324951172
Epoch: 2, Loss: 5.3962812423706055
Epoch: 1, Loss: 5.582995891571045
Epoch: 2, Loss: 5.526856899261475
Epoch: 1, Loss: 5.60748815536499
Epoch: 2, Loss: 5.573823928833008
Epoch: 1, Loss: 5.389953136444092
Epoch: 2, Loss:

Epoch: 2, Loss: 4.902703285217285
Epoch: 1, Loss: 5.108667373657227
Epoch: 2, Loss: 5.04913330078125
Epoch: 1, Loss: 5.289192199707031
Epoch: 2, Loss: 5.2305006980896
Epoch: 1, Loss: 4.965766429901123
Epoch: 2, Loss: 4.916149139404297
Epoch: 1, Loss: 4.870975494384766
Epoch: 2, Loss: 4.827175617218018
Epoch: 1, Loss: 5.271073341369629
Epoch: 2, Loss: 5.211513519287109
Epoch: 1, Loss: 5.266929626464844
Epoch: 2, Loss: 5.213219165802002
Epoch: 1, Loss: 5.343790054321289
Epoch: 2, Loss: 5.322932720184326
Epoch: 1, Loss: 5.171017646789551
Epoch: 2, Loss: 5.108606815338135
Epoch: 1, Loss: 4.952931880950928
Epoch: 2, Loss: 4.913938999176025
Epoch: 1, Loss: 5.376008033752441
Epoch: 2, Loss: 5.3405609130859375
Epoch: 1, Loss: 5.3449578285217285
Epoch: 2, Loss: 5.283731460571289
Epoch: 1, Loss: 5.201436996459961
Epoch: 2, Loss: 5.168420791625977
Epoch: 1, Loss: 5.270475387573242
Epoch: 2, Loss: 5.21876859664917
Epoch: 1, Loss: 5.3214898109436035
Epoch: 2, Loss: 5.268990993499756
Epoch: 1, Loss:

Epoch: 1, Loss: 4.856122970581055
Epoch: 2, Loss: 4.80445671081543
Epoch: 1, Loss: 5.001396656036377
Epoch: 2, Loss: 4.970744609832764
Epoch: 1, Loss: 4.7276611328125
Epoch: 2, Loss: 4.668872833251953
Epoch: 1, Loss: 4.967513084411621
Epoch: 2, Loss: 4.913802623748779
Epoch: 1, Loss: 5.124876499176025
Epoch: 2, Loss: 5.094095706939697
Epoch: 1, Loss: 4.934475421905518
Epoch: 2, Loss: 4.875233173370361
Epoch: 1, Loss: 4.7463555335998535
Epoch: 2, Loss: 4.725415229797363
Epoch: 1, Loss: 4.593967437744141
Epoch: 2, Loss: 4.564427852630615
Epoch: 1, Loss: 4.903465747833252
Epoch: 2, Loss: 4.864099502563477
Epoch: 1, Loss: 4.826308250427246
Epoch: 2, Loss: 4.767971515655518
Epoch: 1, Loss: 4.758874416351318
Epoch: 2, Loss: 4.7173380851745605
Epoch: 1, Loss: 5.050273895263672
Epoch: 2, Loss: 5.003370761871338
Epoch: 1, Loss: 4.922666549682617
Epoch: 2, Loss: 4.857700347900391
Epoch: 1, Loss: 4.99091911315918
Epoch: 2, Loss: 4.947790622711182
Epoch: 1, Loss: 5.053999900817871
Epoch: 2, Loss: 

Epoch: 2, Loss: 4.912747383117676
Epoch: 1, Loss: 4.676827430725098
Epoch: 2, Loss: 4.6146321296691895
Epoch: 1, Loss: 4.931901931762695
Epoch: 2, Loss: 4.899764537811279
Epoch: 1, Loss: 4.721207141876221
Epoch: 2, Loss: 4.675357818603516
Epoch: 1, Loss: 4.570675373077393
Epoch: 2, Loss: 4.5432562828063965
Epoch: 1, Loss: 5.185649394989014
Epoch: 2, Loss: 5.124850749969482
Epoch: 1, Loss: 4.5594401359558105
Epoch: 2, Loss: 4.503262519836426
Epoch: 1, Loss: 4.791454792022705
Epoch: 2, Loss: 4.735711097717285
Epoch: 1, Loss: 4.905610084533691
Epoch: 2, Loss: 4.854436874389648
Epoch: 1, Loss: 4.430503845214844
Epoch: 2, Loss: 4.366552829742432
Epoch: 1, Loss: 4.438321113586426
Epoch: 2, Loss: 4.401785850524902
Epoch: 1, Loss: 4.672057628631592
Epoch: 2, Loss: 4.624536037445068
Epoch: 1, Loss: 4.696045398712158
Epoch: 2, Loss: 4.652597904205322
Epoch: 1, Loss: 4.8215012550354
Epoch: 2, Loss: 4.7721757888793945
Epoch: 1, Loss: 4.869983673095703
Epoch: 2, Loss: 4.827706813812256
Epoch: 1, Lo

Epoch: 1, Loss: 4.4533514976501465
Epoch: 2, Loss: 4.3898820877075195
Epoch: 1, Loss: 4.873962879180908
Epoch: 2, Loss: 4.8253679275512695
Epoch: 1, Loss: 4.413312911987305
Epoch: 2, Loss: 4.3708882331848145
Epoch: 1, Loss: 4.726988792419434
Epoch: 2, Loss: 4.667461395263672
Epoch: 1, Loss: 4.7550129890441895
Epoch: 2, Loss: 4.686906814575195
Epoch: 1, Loss: 4.675963401794434
Epoch: 2, Loss: 4.619740962982178
Epoch: 1, Loss: 4.737489700317383
Epoch: 2, Loss: 4.658373832702637
Epoch: 1, Loss: 4.626954078674316
Epoch: 2, Loss: 4.584479331970215
Epoch: 1, Loss: 4.776989936828613
Epoch: 2, Loss: 4.721635341644287
Epoch: 1, Loss: 4.634243488311768
Epoch: 2, Loss: 4.566357135772705
Epoch: 1, Loss: 4.555591106414795
Epoch: 2, Loss: 4.494915008544922
Epoch: 1, Loss: 4.657522678375244
Epoch: 2, Loss: 4.5860276222229
Epoch: 1, Loss: 4.754052639007568
Epoch: 2, Loss: 4.690174102783203
Epoch: 1, Loss: 4.572351932525635
Epoch: 2, Loss: 4.535778522491455
Epoch: 1, Loss: 4.658369064331055
Epoch: 2, L

Epoch: 2, Loss: 4.6592888832092285
Epoch: 1, Loss: 4.649891376495361
Epoch: 2, Loss: 4.605686187744141
Epoch: 1, Loss: 4.4066386222839355
Epoch: 2, Loss: 4.37138032913208
Epoch: 1, Loss: 4.815515041351318
Epoch: 2, Loss: 4.767199993133545
Epoch: 1, Loss: 4.571701526641846
Epoch: 2, Loss: 4.527827262878418
Epoch: 1, Loss: 4.79980993270874
Epoch: 2, Loss: 4.728933811187744
Epoch: 1, Loss: 4.447392463684082
Epoch: 2, Loss: 4.406910419464111
Epoch: 1, Loss: 4.123437404632568
Epoch: 2, Loss: 4.0517401695251465
Epoch: 1, Loss: 4.769296169281006
Epoch: 2, Loss: 4.719817638397217
Epoch: 1, Loss: 4.634632587432861
Epoch: 2, Loss: 4.586216449737549
Epoch: 1, Loss: 4.68894100189209
Epoch: 2, Loss: 4.611804485321045
Epoch: 1, Loss: 5.101170539855957
Epoch: 2, Loss: 5.070700168609619
Epoch: 1, Loss: 4.621747016906738
Epoch: 2, Loss: 4.560249328613281
Epoch: 1, Loss: 4.699093818664551
Epoch: 2, Loss: 4.634554386138916
Epoch: 1, Loss: 4.380018711090088
Epoch: 2, Loss: 4.322223663330078
Epoch: 1, Loss

Epoch: 1, Loss: 4.296052932739258
Epoch: 2, Loss: 4.2305684089660645
Epoch: 1, Loss: 4.130228519439697
Epoch: 2, Loss: 4.068828582763672
Epoch: 1, Loss: 4.380415916442871
Epoch: 2, Loss: 4.321408748626709
Epoch: 1, Loss: 4.128132343292236
Epoch: 2, Loss: 4.0942769050598145
Epoch: 1, Loss: 4.529300689697266
Epoch: 2, Loss: 4.46923828125
Epoch: 1, Loss: 4.560235023498535
Epoch: 2, Loss: 4.507630348205566
Epoch: 1, Loss: 4.601528167724609
Epoch: 2, Loss: 4.547314643859863
Epoch: 1, Loss: 4.57459020614624
Epoch: 2, Loss: 4.513974666595459
Epoch: 1, Loss: 4.514572620391846
Epoch: 2, Loss: 4.464396953582764
Epoch: 1, Loss: 3.889112949371338
Epoch: 2, Loss: 3.861638069152832
Epoch: 1, Loss: 4.622213363647461
Epoch: 2, Loss: 4.566308975219727
Epoch: 1, Loss: 4.53391170501709
Epoch: 2, Loss: 4.4982008934021
Epoch: 1, Loss: 4.360091686248779
Epoch: 2, Loss: 4.317708969116211
Epoch: 1, Loss: 4.462883472442627
Epoch: 2, Loss: 4.42466926574707
Epoch: 1, Loss: 4.217069625854492
Epoch: 2, Loss: 4.187

Epoch: 2, Loss: 4.342098236083984
Epoch: 1, Loss: 4.2634992599487305
Epoch: 2, Loss: 4.192656517028809
Epoch: 1, Loss: 4.443273067474365
Epoch: 2, Loss: 4.422377586364746
Epoch: 1, Loss: 4.229190826416016
Epoch: 2, Loss: 4.156546115875244
Epoch: 1, Loss: 4.278475761413574
Epoch: 2, Loss: 4.2441182136535645
Epoch: 1, Loss: 4.413132190704346
Epoch: 2, Loss: 4.329185485839844
Epoch: 1, Loss: 4.094932556152344
Epoch: 2, Loss: 4.051562786102295
Epoch: 1, Loss: 4.436441898345947
Epoch: 2, Loss: 4.372556209564209
Epoch: 1, Loss: 4.154155731201172
Epoch: 2, Loss: 4.108132839202881
Epoch: 1, Loss: 4.459728717803955
Epoch: 2, Loss: 4.383548259735107
Epoch: 1, Loss: 4.516420364379883
Epoch: 2, Loss: 4.447441101074219
Epoch: 1, Loss: 4.331943035125732
Epoch: 2, Loss: 4.277577877044678
Epoch: 1, Loss: 4.419002056121826
Epoch: 2, Loss: 4.369881629943848
Epoch: 1, Loss: 4.704331874847412
Epoch: 2, Loss: 4.634256839752197
Epoch: 1, Loss: 4.946343421936035
Epoch: 2, Loss: 4.888593673706055
Epoch: 1, Lo

Epoch: 1, Loss: 4.3917083740234375
Epoch: 2, Loss: 4.353026390075684
Epoch: 1, Loss: 4.212629318237305
Epoch: 2, Loss: 4.146207332611084
Epoch: 1, Loss: 4.448151588439941
Epoch: 2, Loss: 4.380660057067871
Epoch: 1, Loss: 4.207028388977051
Epoch: 2, Loss: 4.123625755310059
Epoch: 1, Loss: 4.289089202880859
Epoch: 2, Loss: 4.271315097808838
Epoch: 1, Loss: 4.270102024078369
Epoch: 2, Loss: 4.212920188903809
Epoch: 1, Loss: 4.384750843048096
Epoch: 2, Loss: 4.335040092468262
Epoch: 1, Loss: 4.46865701675415
Epoch: 2, Loss: 4.389245510101318
Epoch: 1, Loss: 4.301534652709961
Epoch: 2, Loss: 4.235405445098877
Epoch: 1, Loss: 4.203277587890625
Epoch: 2, Loss: 4.147402763366699
Epoch: 1, Loss: 4.138017177581787
Epoch: 2, Loss: 4.087338924407959
Epoch: 1, Loss: 4.329682350158691
Epoch: 2, Loss: 4.254341125488281
Epoch: 1, Loss: 4.104167461395264
Epoch: 2, Loss: 4.053129196166992
Epoch: 1, Loss: 4.209163665771484
Epoch: 2, Loss: 4.167654037475586
Epoch: 1, Loss: 4.683032989501953
Epoch: 2, Loss

Epoch: 2, Loss: 4.4916582107543945
Epoch: 1, Loss: 4.223042011260986
Epoch: 2, Loss: 4.161989212036133
Epoch: 1, Loss: 4.387956619262695
Epoch: 2, Loss: 4.326690673828125
Epoch: 1, Loss: 4.295490264892578
Epoch: 2, Loss: 4.264598846435547
Epoch: 1, Loss: 4.686251163482666
Epoch: 2, Loss: 4.597301483154297
Epoch: 1, Loss: 4.232570171356201
Epoch: 2, Loss: 4.194216728210449
Epoch: 1, Loss: 4.218878269195557
Epoch: 2, Loss: 4.151089668273926
Epoch: 1, Loss: 4.35308837890625
Epoch: 2, Loss: 4.324100494384766
Epoch: 1, Loss: 4.628659248352051
Epoch: 2, Loss: 4.55495023727417
Epoch: 1, Loss: 4.2325968742370605
Epoch: 2, Loss: 4.192440986633301
Epoch: 1, Loss: 4.065680027008057
Epoch: 2, Loss: 4.0322489738464355
Epoch: 1, Loss: 4.326797008514404
Epoch: 2, Loss: 4.279800891876221
Epoch: 1, Loss: 4.325085639953613
Epoch: 2, Loss: 4.265622138977051
Epoch: 1, Loss: 4.3036980628967285
Epoch: 2, Loss: 4.257274150848389
Epoch: 1, Loss: 4.096278190612793
Epoch: 2, Loss: 4.047929286956787
Epoch: 1, Lo

Epoch: 1, Loss: 4.022480010986328
Epoch: 2, Loss: 3.9580237865448
Epoch: 1, Loss: 4.693386077880859
Epoch: 2, Loss: 4.591578960418701
Epoch: 1, Loss: 4.566989898681641
Epoch: 2, Loss: 4.517938137054443
Epoch: 1, Loss: 4.445810794830322
Epoch: 2, Loss: 4.398171901702881
Epoch: 1, Loss: 4.387800693511963
Epoch: 2, Loss: 4.311054706573486
Epoch: 1, Loss: 4.326071262359619
Epoch: 2, Loss: 4.28410530090332
Epoch: 1, Loss: 4.244751930236816
Epoch: 2, Loss: 4.204371452331543
Epoch: 1, Loss: 4.29103422164917
Epoch: 2, Loss: 4.224202632904053
Epoch: 1, Loss: 4.355011463165283
Epoch: 2, Loss: 4.278026103973389
Epoch: 1, Loss: 4.212566375732422
Epoch: 2, Loss: 4.168488502502441
Epoch: 1, Loss: 3.9578921794891357
Epoch: 2, Loss: 3.9173810482025146
Epoch: 1, Loss: 4.4013285636901855
Epoch: 2, Loss: 4.3551716804504395
Epoch: 1, Loss: 4.317678928375244
Epoch: 2, Loss: 4.2556562423706055
Epoch: 1, Loss: 4.282038688659668
Epoch: 2, Loss: 4.239272594451904
Epoch: 1, Loss: 4.4654011726379395
Epoch: 2, Lo

Epoch: 2, Loss: 4.265035152435303
Epoch: 1, Loss: 4.563295841217041
Epoch: 2, Loss: 4.520299911499023
Epoch: 1, Loss: 4.186561107635498
Epoch: 2, Loss: 4.104033946990967
Epoch: 1, Loss: 4.193477153778076
Epoch: 2, Loss: 4.133929252624512
Epoch: 1, Loss: 4.457977771759033
Epoch: 2, Loss: 4.390286922454834
Epoch: 1, Loss: 3.7814104557037354
Epoch: 2, Loss: 3.737518548965454
Epoch: 1, Loss: 4.003737449645996
Epoch: 2, Loss: 3.946863889694214
Epoch: 1, Loss: 3.9748709201812744
Epoch: 2, Loss: 3.9132184982299805
Epoch: 1, Loss: 4.143933296203613
Epoch: 2, Loss: 4.099510192871094
Epoch: 1, Loss: 4.309701919555664
Epoch: 2, Loss: 4.26323938369751
Epoch: 1, Loss: 3.9916741847991943
Epoch: 2, Loss: 3.9126648902893066
Epoch: 1, Loss: 3.721468687057495
Epoch: 2, Loss: 3.6931545734405518
Epoch: 1, Loss: 4.405230522155762
Epoch: 2, Loss: 4.360406875610352
Epoch: 1, Loss: 4.232325077056885
Epoch: 2, Loss: 4.208604335784912
Epoch: 1, Loss: 4.017327785491943
Epoch: 2, Loss: 3.9962143898010254
Epoch: 1

Epoch: 1, Loss: 4.288891315460205
Epoch: 2, Loss: 4.241321563720703
Epoch: 1, Loss: 4.325517177581787
Epoch: 2, Loss: 4.26902961730957
Epoch: 1, Loss: 4.160174369812012
Epoch: 2, Loss: 4.120455265045166
Epoch: 1, Loss: 4.19197416305542
Epoch: 2, Loss: 4.128321170806885
Epoch: 1, Loss: 4.0984039306640625
Epoch: 2, Loss: 4.052993297576904
Epoch: 1, Loss: 3.7207860946655273
Epoch: 2, Loss: 3.6645939350128174
Epoch: 1, Loss: 4.355865955352783
Epoch: 2, Loss: 4.2892961502075195
Epoch: 1, Loss: 4.207861423492432
Epoch: 2, Loss: 4.144476413726807
Epoch: 1, Loss: 4.054170608520508
Epoch: 2, Loss: 4.002419471740723
Epoch: 1, Loss: 3.9045143127441406
Epoch: 2, Loss: 3.855945587158203
Epoch: 1, Loss: 4.068583011627197
Epoch: 2, Loss: 3.9952452182769775
Epoch: 1, Loss: 4.33829927444458
Epoch: 2, Loss: 4.299001216888428
Epoch: 1, Loss: 4.1915717124938965
Epoch: 2, Loss: 4.134469509124756
Epoch: 1, Loss: 4.4811553955078125
Epoch: 2, Loss: 4.404940128326416
Epoch: 1, Loss: 4.389028549194336
Epoch: 2,

Epoch: 1, Loss: 4.151417255401611
Epoch: 2, Loss: 4.111049175262451
Epoch: 1, Loss: 4.064346790313721
Epoch: 2, Loss: 4.015484809875488
Epoch: 1, Loss: 4.572320461273193
Epoch: 2, Loss: 4.501555919647217
Epoch: 1, Loss: 3.9520273208618164
Epoch: 2, Loss: 3.9038538932800293
Epoch: 1, Loss: 4.126452445983887
Epoch: 2, Loss: 4.0536394119262695
Epoch: 1, Loss: 4.068240642547607
Epoch: 2, Loss: 4.0536980628967285
Epoch: 1, Loss: 3.8556199073791504
Epoch: 2, Loss: 3.795680046081543
Epoch: 1, Loss: 3.86091685295105
Epoch: 2, Loss: 3.835649251937866
Epoch: 1, Loss: 3.9694321155548096
Epoch: 2, Loss: 3.9232661724090576
Epoch: 1, Loss: 4.144973278045654
Epoch: 2, Loss: 4.069204807281494
Epoch: 1, Loss: 4.618648529052734
Epoch: 2, Loss: 4.51541805267334
Epoch: 1, Loss: 3.9240338802337646
Epoch: 2, Loss: 3.8747942447662354
Epoch: 1, Loss: 4.15578031539917
Epoch: 2, Loss: 4.087271690368652
Epoch: 1, Loss: 4.033849239349365
Epoch: 2, Loss: 4.003873825073242
Epoch: 1, Loss: 4.122754096984863
Epoch: 2

Epoch: 1, Loss: 3.931673049926758
Epoch: 2, Loss: 3.8701937198638916
Epoch: 1, Loss: 4.195004940032959
Epoch: 2, Loss: 4.117612838745117
Epoch: 1, Loss: 3.862677812576294
Epoch: 2, Loss: 3.7752437591552734
Epoch: 1, Loss: 4.126894474029541
Epoch: 2, Loss: 4.051822662353516
Epoch: 1, Loss: 4.277611255645752
Epoch: 2, Loss: 4.211068630218506
Epoch: 1, Loss: 3.7904367446899414
Epoch: 2, Loss: 3.693723440170288
Epoch: 1, Loss: 4.047918796539307
Epoch: 2, Loss: 3.975776433944702
Epoch: 1, Loss: 4.0523552894592285
Epoch: 2, Loss: 4.015829563140869
Epoch: 1, Loss: 4.22965669631958
Epoch: 2, Loss: 4.19261360168457
Epoch: 1, Loss: 4.37824821472168
Epoch: 2, Loss: 4.33572244644165
Epoch: 1, Loss: 4.112030982971191
Epoch: 2, Loss: 4.034265995025635
Epoch: 1, Loss: 4.167923450469971
Epoch: 2, Loss: 4.095888614654541
Epoch: 1, Loss: 3.889694929122925
Epoch: 2, Loss: 3.8308627605438232
Epoch: 1, Loss: 4.107249736785889
Epoch: 2, Loss: 4.047756195068359
Epoch: 1, Loss: 4.3845038414001465
Epoch: 2, Lo

Epoch: 1, Loss: 4.215203285217285
Epoch: 2, Loss: 4.166566848754883
Epoch: 1, Loss: 3.878349781036377
Epoch: 2, Loss: 3.7979791164398193
Epoch: 1, Loss: 4.147810935974121
Epoch: 2, Loss: 4.088556289672852
Epoch: 1, Loss: 3.98647141456604
Epoch: 2, Loss: 3.914989709854126
Epoch: 1, Loss: 4.526416778564453
Epoch: 2, Loss: 4.46018648147583
Epoch: 1, Loss: 4.202847957611084
Epoch: 2, Loss: 4.1748528480529785
Epoch: 1, Loss: 3.9052860736846924
Epoch: 2, Loss: 3.8720507621765137
Epoch: 1, Loss: 4.056720733642578
Epoch: 2, Loss: 4.0220489501953125
Epoch: 1, Loss: 4.123230934143066
Epoch: 2, Loss: 4.058037757873535
Epoch: 1, Loss: 4.233428478240967
Epoch: 2, Loss: 4.179220676422119
Epoch: 1, Loss: 4.360970497131348
Epoch: 2, Loss: 4.295083045959473
Epoch: 1, Loss: 4.310459613800049
Epoch: 2, Loss: 4.262245178222656
Epoch: 1, Loss: 4.234348773956299
Epoch: 2, Loss: 4.186586856842041
Epoch: 1, Loss: 4.294898986816406
Epoch: 2, Loss: 4.243499755859375
Epoch: 1, Loss: 4.008242607116699
Epoch: 2, L

Epoch: 1, Loss: 4.203496932983398
Epoch: 2, Loss: 4.137149810791016
Epoch: 1, Loss: 3.7637760639190674
Epoch: 2, Loss: 3.7181508541107178
Epoch: 1, Loss: 3.88242244720459
Epoch: 2, Loss: 3.83422589302063
Epoch: 1, Loss: 4.344798564910889
Epoch: 2, Loss: 4.289012432098389
Epoch: 1, Loss: 4.100353240966797
Epoch: 2, Loss: 4.0235443115234375
Epoch: 1, Loss: 4.064023971557617
Epoch: 2, Loss: 4.020012378692627
Epoch: 1, Loss: 3.828813076019287
Epoch: 2, Loss: 3.7869765758514404
Epoch: 1, Loss: 4.167487621307373
Epoch: 2, Loss: 4.110665321350098
Epoch: 1, Loss: 3.8130884170532227
Epoch: 2, Loss: 3.775085926055908
Epoch: 1, Loss: 4.432745456695557
Epoch: 2, Loss: 4.398582935333252
Epoch: 1, Loss: 3.7223658561706543
Epoch: 2, Loss: 3.674421787261963
Epoch: 1, Loss: 3.7534217834472656
Epoch: 2, Loss: 3.672987699508667
Epoch: 1, Loss: 3.857959032058716
Epoch: 2, Loss: 3.7978768348693848
Epoch: 1, Loss: 3.885819911956787
Epoch: 2, Loss: 3.819444417953491
Epoch: 1, Loss: 3.756168842315674
Epoch: 2

Epoch: 1, Loss: 4.4416704177856445
Epoch: 2, Loss: 4.38629674911499
Epoch: 1, Loss: 3.902329444885254
Epoch: 2, Loss: 3.8336966037750244
Epoch: 1, Loss: 3.764146566390991
Epoch: 2, Loss: 3.7046773433685303
Epoch: 1, Loss: 4.175830841064453
Epoch: 2, Loss: 4.146229267120361
Epoch: 1, Loss: 4.154030799865723
Epoch: 2, Loss: 4.07781982421875
Epoch: 1, Loss: 3.951857805252075
Epoch: 2, Loss: 3.883538246154785
Epoch: 1, Loss: 3.8121352195739746
Epoch: 2, Loss: 3.6974267959594727
Epoch: 1, Loss: 3.868309259414673
Epoch: 2, Loss: 3.8255739212036133
Epoch: 1, Loss: 3.9266889095306396
Epoch: 2, Loss: 3.8942677974700928
Epoch: 1, Loss: 3.810514450073242
Epoch: 2, Loss: 3.774247169494629
Epoch: 1, Loss: 4.428821086883545
Epoch: 2, Loss: 4.379277229309082
Epoch: 1, Loss: 3.4824178218841553
Epoch: 2, Loss: 3.4331400394439697
Epoch: 1, Loss: 4.042919635772705
Epoch: 2, Loss: 3.994455575942993
Epoch: 1, Loss: 3.8682942390441895
Epoch: 2, Loss: 3.799874782562256
Epoch: 1, Loss: 3.468520402908325
Epoch

Epoch: 2, Loss: 4.279140472412109
Epoch: 1, Loss: 4.037736892700195
Epoch: 2, Loss: 3.9867186546325684
Epoch: 1, Loss: 4.055414199829102
Epoch: 2, Loss: 4.01021671295166
Epoch: 1, Loss: 3.916238307952881
Epoch: 2, Loss: 3.8319497108459473
Epoch: 1, Loss: 3.7321863174438477
Epoch: 2, Loss: 3.717808723449707
Epoch: 1, Loss: 4.158197402954102
Epoch: 2, Loss: 4.076075553894043
Epoch: 1, Loss: 4.057018280029297
Epoch: 2, Loss: 4.015720367431641
Epoch: 1, Loss: 4.0661139488220215
Epoch: 2, Loss: 4.029360294342041
Epoch: 1, Loss: 3.959174394607544
Epoch: 2, Loss: 3.912776470184326
Epoch: 1, Loss: 3.9366886615753174
Epoch: 2, Loss: 3.887023448944092
Epoch: 1, Loss: 3.849579095840454
Epoch: 2, Loss: 3.789060354232788
Epoch: 1, Loss: 3.903879165649414
Epoch: 2, Loss: 3.8706297874450684
Epoch: 1, Loss: 3.837087392807007
Epoch: 2, Loss: 3.7899551391601562
Epoch: 1, Loss: 3.40321683883667
Epoch: 2, Loss: 3.3530564308166504
Epoch: 1, Loss: 3.978139638900757
Epoch: 2, Loss: 3.898158311843872
Epoch: 1

Epoch: 1, Loss: 4.093822002410889
Epoch: 2, Loss: 4.03291130065918
Epoch: 1, Loss: 3.627300262451172
Epoch: 2, Loss: 3.5727083683013916
Epoch: 1, Loss: 4.140433311462402
Epoch: 2, Loss: 4.080718517303467
Epoch: 1, Loss: 4.400916576385498
Epoch: 2, Loss: 4.3561506271362305
Epoch: 1, Loss: 3.924957513809204
Epoch: 2, Loss: 3.8763198852539062
Epoch: 1, Loss: 3.8288917541503906
Epoch: 2, Loss: 3.7620105743408203
Epoch: 1, Loss: 4.03533411026001
Epoch: 2, Loss: 3.981445789337158
Epoch: 1, Loss: 3.844536066055298
Epoch: 2, Loss: 3.809237241744995
Epoch: 1, Loss: 3.9382574558258057
Epoch: 2, Loss: 3.8645076751708984
Epoch: 1, Loss: 3.5545976161956787
Epoch: 2, Loss: 3.4886908531188965
Epoch: 1, Loss: 4.019608497619629
Epoch: 2, Loss: 3.974830389022827
Epoch: 1, Loss: 3.6317527294158936
Epoch: 2, Loss: 3.5834577083587646
Epoch: 1, Loss: 4.0729475021362305
Epoch: 2, Loss: 3.9862818717956543
Epoch: 1, Loss: 4.133977890014648
Epoch: 2, Loss: 4.124846935272217
Epoch: 1, Loss: 3.772394895553589
Epo

Epoch: 2, Loss: 4.033304691314697
Epoch: 1, Loss: 3.6974470615386963
Epoch: 2, Loss: 3.6315243244171143
Epoch: 1, Loss: 4.0671892166137695
Epoch: 2, Loss: 3.9992716312408447
Epoch: 1, Loss: 3.8368117809295654
Epoch: 2, Loss: 3.7836215496063232
Epoch: 1, Loss: 3.817561388015747
Epoch: 2, Loss: 3.7689812183380127
Epoch: 1, Loss: 3.9981791973114014
Epoch: 2, Loss: 3.943073034286499
Epoch: 1, Loss: 3.7255454063415527
Epoch: 2, Loss: 3.67977237701416
Epoch: 1, Loss: 3.5279200077056885
Epoch: 2, Loss: 3.4453485012054443
Epoch: 1, Loss: 3.8931350708007812
Epoch: 2, Loss: 3.8696322441101074
Epoch: 1, Loss: 3.711294174194336
Epoch: 2, Loss: 3.6748247146606445
Epoch: 1, Loss: 3.9641871452331543
Epoch: 2, Loss: 3.8784031867980957
Epoch: 1, Loss: 3.861424684524536
Epoch: 2, Loss: 3.8239285945892334
Epoch: 1, Loss: 4.404442310333252
Epoch: 2, Loss: 4.347532749176025
Epoch: 1, Loss: 3.89713454246521
Epoch: 2, Loss: 3.8293707370758057
Epoch: 1, Loss: 3.797008752822876
Epoch: 2, Loss: 3.74897050857543

Epoch: 1, Loss: 3.516432762145996
Epoch: 2, Loss: 3.4958293437957764
Epoch: 1, Loss: 3.8206305503845215
Epoch: 2, Loss: 3.762491226196289
Epoch: 1, Loss: 3.649129629135132
Epoch: 2, Loss: 3.586454153060913
Epoch: 1, Loss: 4.0548014640808105
Epoch: 2, Loss: 4.010509967803955
Epoch: 1, Loss: 4.251030921936035
Epoch: 2, Loss: 4.185767650604248
Epoch: 1, Loss: 4.06350564956665
Epoch: 2, Loss: 4.014584064483643
Epoch: 1, Loss: 4.038875102996826
Epoch: 2, Loss: 3.9535984992980957
Epoch: 1, Loss: 3.6703579425811768
Epoch: 2, Loss: 3.6220474243164062
Epoch: 1, Loss: 3.9250845909118652
Epoch: 2, Loss: 3.8716681003570557
Epoch: 1, Loss: 4.144778728485107
Epoch: 2, Loss: 4.088861465454102
Epoch: 1, Loss: 3.7427525520324707
Epoch: 2, Loss: 3.6736207008361816
Epoch: 1, Loss: 4.098190784454346
Epoch: 2, Loss: 4.062408924102783
Epoch: 1, Loss: 3.440551280975342
Epoch: 2, Loss: 3.3637452125549316
Epoch: 1, Loss: 3.8750061988830566
Epoch: 2, Loss: 3.8008203506469727
Epoch: 1, Loss: 4.086050987243652
Ep

Epoch: 2, Loss: 3.3341281414031982
Epoch: 1, Loss: 3.771250009536743
Epoch: 2, Loss: 3.73406982421875
Epoch: 1, Loss: 3.8770627975463867
Epoch: 2, Loss: 3.8258702754974365
Epoch: 1, Loss: 3.8622963428497314
Epoch: 2, Loss: 3.8346524238586426
Epoch: 1, Loss: 3.88791561126709
Epoch: 2, Loss: 3.842639446258545
Epoch: 1, Loss: 3.814108371734619
Epoch: 2, Loss: 3.736422300338745
Epoch: 1, Loss: 3.9524075984954834
Epoch: 2, Loss: 3.9053122997283936
Epoch: 1, Loss: 3.6899969577789307
Epoch: 2, Loss: 3.63407301902771
Epoch: 1, Loss: 3.5109825134277344
Epoch: 2, Loss: 3.4577486515045166
Epoch: 1, Loss: 3.608454942703247
Epoch: 2, Loss: 3.5476784706115723
Epoch: 1, Loss: 4.144641399383545
Epoch: 2, Loss: 4.075417995452881
Epoch: 1, Loss: 3.735283136367798
Epoch: 2, Loss: 3.687717914581299
Epoch: 1, Loss: 4.0530171394348145
Epoch: 2, Loss: 3.995457887649536
Epoch: 1, Loss: 3.8142974376678467
Epoch: 2, Loss: 3.707897901535034
Epoch: 1, Loss: 3.9960687160491943
Epoch: 2, Loss: 3.9370555877685547
Ep

Epoch: 2, Loss: 3.759514093399048
Epoch: 1, Loss: 3.6672730445861816
Epoch: 2, Loss: 3.6087489128112793
Epoch: 1, Loss: 3.6824705600738525
Epoch: 2, Loss: 3.5925726890563965
Epoch: 1, Loss: 3.8269829750061035
Epoch: 2, Loss: 3.7469019889831543
Epoch: 1, Loss: 3.924400568008423
Epoch: 2, Loss: 3.851468086242676
Epoch: 1, Loss: 3.879086971282959
Epoch: 2, Loss: 3.831984043121338
Epoch: 1, Loss: 3.634140729904175
Epoch: 2, Loss: 3.535954713821411
Epoch: 1, Loss: 3.814225673675537
Epoch: 2, Loss: 3.7365846633911133
Epoch: 1, Loss: 3.5684561729431152
Epoch: 2, Loss: 3.515859603881836
Epoch: 1, Loss: 4.273372650146484
Epoch: 2, Loss: 4.208383560180664
Epoch: 1, Loss: 3.8735532760620117
Epoch: 2, Loss: 3.8136794567108154
Epoch: 1, Loss: 3.942718744277954
Epoch: 2, Loss: 3.9173619747161865
Epoch: 1, Loss: 3.7822015285491943
Epoch: 2, Loss: 3.7416317462921143
Epoch: 1, Loss: 3.652000665664673
Epoch: 2, Loss: 3.587989568710327
Epoch: 1, Loss: 3.7278034687042236
Epoch: 2, Loss: 3.6969997882843018

Epoch: 2, Loss: 3.7600362300872803
Epoch: 1, Loss: 3.948979377746582
Epoch: 2, Loss: 3.874368190765381
Epoch: 1, Loss: 3.711888074874878
Epoch: 2, Loss: 3.6729719638824463
Epoch: 1, Loss: 4.000154972076416
Epoch: 2, Loss: 3.9366860389709473
Epoch: 1, Loss: 3.9548895359039307
Epoch: 2, Loss: 3.8959155082702637
Epoch: 1, Loss: 3.9825680255889893
Epoch: 2, Loss: 3.9258148670196533
Epoch: 1, Loss: 3.523562431335449
Epoch: 2, Loss: 3.469550848007202
Epoch: 1, Loss: 3.8969433307647705
Epoch: 2, Loss: 3.8592615127563477
Epoch: 1, Loss: 3.7471165657043457
Epoch: 2, Loss: 3.6786980628967285
Epoch: 1, Loss: 4.157384395599365
Epoch: 2, Loss: 4.067890644073486
Epoch: 1, Loss: 3.6418590545654297
Epoch: 2, Loss: 3.578608274459839
Epoch: 1, Loss: 3.870070695877075
Epoch: 2, Loss: 3.8120334148406982
Epoch: 1, Loss: 3.7556962966918945
Epoch: 2, Loss: 3.71012806892395
Epoch: 1, Loss: 3.884484052658081
Epoch: 2, Loss: 3.8431479930877686
Epoch: 1, Loss: 3.9561069011688232
Epoch: 2, Loss: 3.892009496688842

Epoch: 1, Loss: 3.487335205078125
Epoch: 2, Loss: 3.4037771224975586
Epoch: 1, Loss: 3.9088592529296875
Epoch: 2, Loss: 3.8509199619293213
Epoch: 1, Loss: 3.731372833251953
Epoch: 2, Loss: 3.631334066390991
Epoch: 1, Loss: 3.222501754760742
Epoch: 2, Loss: 3.1473658084869385
Epoch: 1, Loss: 4.177176475524902
Epoch: 2, Loss: 4.098010063171387
Epoch: 1, Loss: 3.6874070167541504
Epoch: 2, Loss: 3.613818645477295
Epoch: 1, Loss: 3.7288239002227783
Epoch: 2, Loss: 3.6804611682891846
Epoch: 1, Loss: 3.708777904510498
Epoch: 2, Loss: 3.6642825603485107
Epoch: 1, Loss: 3.949490547180176
Epoch: 2, Loss: 3.8838021755218506
Epoch: 1, Loss: 4.332342624664307
Epoch: 2, Loss: 4.2530598640441895
Epoch: 1, Loss: 3.6199212074279785
Epoch: 2, Loss: 3.5618820190429688
Epoch: 1, Loss: 3.891212224960327
Epoch: 2, Loss: 3.8196136951446533
Epoch: 1, Loss: 3.8577024936676025
Epoch: 2, Loss: 3.8086118698120117
Epoch: 1, Loss: 3.6378531455993652
Epoch: 2, Loss: 3.574779510498047
Epoch: 1, Loss: 3.79195046424865

Epoch: 1, Loss: 3.424661636352539
Epoch: 2, Loss: 3.3896114826202393
Epoch: 1, Loss: 3.6313986778259277
Epoch: 2, Loss: 3.580090284347534
Epoch: 1, Loss: 4.04717493057251
Epoch: 2, Loss: 3.974076271057129
Epoch: 1, Loss: 3.8191170692443848
Epoch: 2, Loss: 3.7507643699645996
Epoch: 1, Loss: 3.758129358291626
Epoch: 2, Loss: 3.7410500049591064
Epoch: 1, Loss: 4.2132415771484375
Epoch: 2, Loss: 4.115599632263184
Epoch: 1, Loss: 4.069127559661865
Epoch: 2, Loss: 4.007665157318115
Epoch: 1, Loss: 3.7493109703063965
Epoch: 2, Loss: 3.712700843811035
Epoch: 1, Loss: 4.020698070526123
Epoch: 2, Loss: 3.985443353652954
Epoch: 1, Loss: 3.615386962890625
Epoch: 2, Loss: 3.608031988143921
Epoch: 1, Loss: 3.762928009033203
Epoch: 2, Loss: 3.7512006759643555
Epoch: 1, Loss: 4.001737594604492
Epoch: 2, Loss: 3.9563403129577637
Epoch: 1, Loss: 3.605119466781616
Epoch: 2, Loss: 3.565476894378662
Epoch: 1, Loss: 3.976959228515625
Epoch: 2, Loss: 3.9232912063598633
Epoch: 1, Loss: 3.7423694133758545
Epoc

Epoch: 1, Loss: 3.4809341430664062
Epoch: 2, Loss: 3.404233455657959
Epoch: 1, Loss: 3.9649271965026855
Epoch: 2, Loss: 3.9387199878692627
Epoch: 1, Loss: 3.7114288806915283
Epoch: 2, Loss: 3.6403167247772217
Epoch: 1, Loss: 4.117454528808594
Epoch: 2, Loss: 4.0558857917785645
Epoch: 1, Loss: 3.779891014099121
Epoch: 2, Loss: 3.7448971271514893
Epoch: 1, Loss: 3.5005383491516113
Epoch: 2, Loss: 3.452446937561035
Epoch: 1, Loss: 3.446775197982788
Epoch: 2, Loss: 3.3731844425201416
Epoch: 1, Loss: 3.861140251159668
Epoch: 2, Loss: 3.8073954582214355
Epoch: 1, Loss: 3.821439266204834
Epoch: 2, Loss: 3.7685627937316895
Epoch: 1, Loss: 3.797025203704834
Epoch: 2, Loss: 3.760214328765869
Epoch: 1, Loss: 4.028019905090332
Epoch: 2, Loss: 3.939211368560791
Epoch: 1, Loss: 3.804563045501709
Epoch: 2, Loss: 3.7744805812835693
Epoch: 1, Loss: 3.7718746662139893
Epoch: 2, Loss: 3.6794683933258057
Epoch: 1, Loss: 4.01131534576416
Epoch: 2, Loss: 3.955258846282959
Epoch: 1, Loss: 3.884007453918457
E

Epoch: 1, Loss: 4.013556957244873
Epoch: 2, Loss: 3.953773260116577
Epoch: 1, Loss: 4.018589019775391
Epoch: 2, Loss: 3.9755706787109375
Epoch: 1, Loss: 3.9541754722595215
Epoch: 2, Loss: 3.905994176864624
Epoch: 1, Loss: 3.8401198387145996
Epoch: 2, Loss: 3.7579712867736816
Epoch: 1, Loss: 3.9078829288482666
Epoch: 2, Loss: 3.846301317214966
Epoch: 1, Loss: 3.8641393184661865
Epoch: 2, Loss: 3.8032898902893066
Epoch: 1, Loss: 3.753737449645996
Epoch: 2, Loss: 3.6970627307891846
Epoch: 1, Loss: 3.7481462955474854
Epoch: 2, Loss: 3.6848127841949463
Epoch: 1, Loss: 3.791088342666626
Epoch: 2, Loss: 3.7243196964263916
Epoch: 1, Loss: 4.151026248931885
Epoch: 2, Loss: 4.063362121582031
Epoch: 1, Loss: 4.052708625793457
Epoch: 2, Loss: 3.9959120750427246
Epoch: 1, Loss: 3.3060600757598877
Epoch: 2, Loss: 3.247147560119629
Epoch: 1, Loss: 3.7996840476989746
Epoch: 2, Loss: 3.724916934967041
Epoch: 1, Loss: 3.922719717025757
Epoch: 2, Loss: 3.857699155807495
Epoch: 1, Loss: 3.7802622318267822

Epoch: 1, Loss: 3.695484161376953
Epoch: 2, Loss: 3.6377065181732178
Epoch: 1, Loss: 3.4327473640441895
Epoch: 2, Loss: 3.402634620666504
Epoch: 1, Loss: 4.012798309326172
Epoch: 2, Loss: 3.946164846420288
Epoch: 1, Loss: 4.019653797149658
Epoch: 2, Loss: 3.995788097381592
Epoch: 1, Loss: 3.7270843982696533
Epoch: 2, Loss: 3.640047311782837
Epoch: 1, Loss: 3.5752499103546143
Epoch: 2, Loss: 3.4933836460113525
Epoch: 1, Loss: 3.641770601272583
Epoch: 2, Loss: 3.577253580093384
Epoch: 1, Loss: 3.631736993789673
Epoch: 2, Loss: 3.572139263153076
Epoch: 1, Loss: 3.637995481491089
Epoch: 2, Loss: 3.5624184608459473
Epoch: 1, Loss: 3.741032838821411
Epoch: 2, Loss: 3.6741068363189697
Epoch: 1, Loss: 3.9385876655578613
Epoch: 2, Loss: 3.878875494003296
Epoch: 1, Loss: 3.8065996170043945
Epoch: 2, Loss: 3.743260145187378
Epoch: 1, Loss: 3.5299794673919678
Epoch: 2, Loss: 3.480379104614258
Epoch: 1, Loss: 3.7082064151763916
Epoch: 2, Loss: 3.6466150283813477
Epoch: 1, Loss: 3.5264763832092285
E

Epoch: 1, Loss: 3.846139907836914
Epoch: 2, Loss: 3.816673994064331
Epoch: 1, Loss: 3.800617218017578
Epoch: 2, Loss: 3.7296864986419678
Epoch: 1, Loss: 3.466973304748535
Epoch: 2, Loss: 3.394622564315796
Epoch: 1, Loss: 3.578784465789795
Epoch: 2, Loss: 3.516127824783325
Epoch: 1, Loss: 3.4464893341064453
Epoch: 2, Loss: 3.366820812225342
Epoch: 1, Loss: 3.7569358348846436
Epoch: 2, Loss: 3.6468451023101807
Epoch: 1, Loss: 3.5650148391723633
Epoch: 2, Loss: 3.510976552963257
Epoch: 1, Loss: 3.5699455738067627
Epoch: 2, Loss: 3.4950478076934814
Epoch: 1, Loss: 3.801832675933838
Epoch: 2, Loss: 3.749481201171875
Epoch: 1, Loss: 4.002967834472656
Epoch: 2, Loss: 3.9495279788970947
Epoch: 1, Loss: 3.311122179031372
Epoch: 2, Loss: 3.2791244983673096
Epoch: 1, Loss: 3.971567392349243
Epoch: 2, Loss: 3.9050300121307373
Epoch: 1, Loss: 3.700300693511963
Epoch: 2, Loss: 3.657273769378662
Epoch: 1, Loss: 3.98504900932312
Epoch: 2, Loss: 3.8880062103271484
Epoch: 1, Loss: 3.725013494491577
Epoc

Epoch: 2, Loss: 3.8125760555267334
Epoch: 1, Loss: 3.9493589401245117
Epoch: 2, Loss: 3.9025895595550537
Epoch: 1, Loss: 3.6319210529327393
Epoch: 2, Loss: 3.5859289169311523
Epoch: 1, Loss: 3.7793116569519043
Epoch: 2, Loss: 3.734529733657837
Epoch: 1, Loss: 3.58376145362854
Epoch: 2, Loss: 3.549778461456299
Epoch: 1, Loss: 3.761584520339966
Epoch: 2, Loss: 3.7046706676483154
Epoch: 1, Loss: 3.7469539642333984
Epoch: 2, Loss: 3.679421901702881
Epoch: 1, Loss: 3.4007019996643066
Epoch: 2, Loss: 3.35752272605896
Epoch: 1, Loss: 4.191193580627441
Epoch: 2, Loss: 4.1171369552612305
Epoch: 1, Loss: 3.823815107345581
Epoch: 2, Loss: 3.7458460330963135
Epoch: 1, Loss: 3.7170913219451904
Epoch: 2, Loss: 3.682450771331787
Epoch: 1, Loss: 3.8130416870117188
Epoch: 2, Loss: 3.783257246017456
Epoch: 1, Loss: 3.92322039604187
Epoch: 2, Loss: 3.861518621444702
Epoch: 1, Loss: 3.766482353210449
Epoch: 2, Loss: 3.718348503112793
Epoch: 1, Loss: 3.2498135566711426
Epoch: 2, Loss: 3.2033114433288574
Ep

Epoch: 2, Loss: 3.64658784866333
Epoch: 1, Loss: 3.5173087120056152
Epoch: 2, Loss: 3.4457826614379883
Epoch: 1, Loss: 3.8459675312042236
Epoch: 2, Loss: 3.7987070083618164
Epoch: 1, Loss: 3.715845823287964
Epoch: 2, Loss: 3.6970138549804688
Epoch: 1, Loss: 3.6027185916900635
Epoch: 2, Loss: 3.55666446685791
Epoch: 1, Loss: 3.8738651275634766
Epoch: 2, Loss: 3.807285785675049
Epoch: 1, Loss: 4.050968170166016
Epoch: 2, Loss: 3.971153497695923
Epoch: 1, Loss: 3.6055257320404053
Epoch: 2, Loss: 3.5325255393981934
Epoch: 1, Loss: 3.611656665802002
Epoch: 2, Loss: 3.557361364364624
Epoch: 1, Loss: 3.546855926513672
Epoch: 2, Loss: 3.4658687114715576
Epoch: 1, Loss: 3.6951849460601807
Epoch: 2, Loss: 3.6549463272094727
Epoch: 1, Loss: 3.618985652923584
Epoch: 2, Loss: 3.582848310470581
Epoch: 1, Loss: 3.5492804050445557
Epoch: 2, Loss: 3.4957926273345947
Epoch: 1, Loss: 3.4622104167938232
Epoch: 2, Loss: 3.393993854522705
Epoch: 1, Loss: 3.9040656089782715
Epoch: 2, Loss: 3.8294057846069336

Epoch: 2, Loss: 3.8444299697875977
Epoch: 1, Loss: 3.6927433013916016
Epoch: 2, Loss: 3.6275577545166016
Epoch: 1, Loss: 3.4708354473114014
Epoch: 2, Loss: 3.399589776992798
Epoch: 1, Loss: 3.674837589263916
Epoch: 2, Loss: 3.619652509689331
Epoch: 1, Loss: 3.5583395957946777
Epoch: 2, Loss: 3.4891090393066406
Epoch: 1, Loss: 3.551422595977783
Epoch: 2, Loss: 3.5064945220947266
Epoch: 1, Loss: 4.035143852233887
Epoch: 2, Loss: 3.988542079925537
Epoch: 1, Loss: 3.5418927669525146
Epoch: 2, Loss: 3.490800142288208
Epoch: 1, Loss: 3.4347777366638184
Epoch: 2, Loss: 3.3518121242523193
Epoch: 1, Loss: 3.6214053630828857
Epoch: 2, Loss: 3.5709729194641113
Epoch: 1, Loss: 3.59371280670166
Epoch: 2, Loss: 3.534214735031128
Epoch: 1, Loss: 3.8296613693237305
Epoch: 2, Loss: 3.811161994934082
Epoch: 1, Loss: 3.7470927238464355
Epoch: 2, Loss: 3.704113006591797
Epoch: 1, Loss: 3.240614175796509
Epoch: 2, Loss: 3.174739122390747
Epoch: 1, Loss: 3.5564112663269043
Epoch: 2, Loss: 3.4813973903656006

Epoch: 2, Loss: 3.5557236671447754
Epoch: 1, Loss: 3.5805342197418213
Epoch: 2, Loss: 3.5314269065856934
Epoch: 1, Loss: 3.419729471206665
Epoch: 2, Loss: 3.3732094764709473
Epoch: 1, Loss: 3.8957316875457764
Epoch: 2, Loss: 3.8394343852996826
Epoch: 1, Loss: 3.4179556369781494
Epoch: 2, Loss: 3.3913772106170654
Epoch: 1, Loss: 3.847749710083008
Epoch: 2, Loss: 3.8205831050872803
Epoch: 1, Loss: 3.686664342880249
Epoch: 2, Loss: 3.649380683898926
Epoch: 1, Loss: 3.671210289001465
Epoch: 2, Loss: 3.579653739929199
Epoch: 1, Loss: 3.4887776374816895
Epoch: 2, Loss: 3.4166207313537598
Epoch: 1, Loss: 3.6814348697662354
Epoch: 2, Loss: 3.644228935241699
Epoch: 1, Loss: 3.509887456893921
Epoch: 2, Loss: 3.4448354244232178
Epoch: 1, Loss: 3.7247226238250732
Epoch: 2, Loss: 3.685195207595825
Epoch: 1, Loss: 3.751084327697754
Epoch: 2, Loss: 3.7174456119537354
Epoch: 1, Loss: 3.886507511138916
Epoch: 2, Loss: 3.806072950363159
Epoch: 1, Loss: 3.515432119369507
Epoch: 2, Loss: 3.452006340026855

Epoch: 2, Loss: 3.3034327030181885
Epoch: 1, Loss: 3.5229713916778564
Epoch: 2, Loss: 3.4443624019622803
Epoch: 1, Loss: 3.9406232833862305
Epoch: 2, Loss: 3.8890798091888428
Epoch: 1, Loss: 3.467158794403076
Epoch: 2, Loss: 3.387998104095459
Epoch: 1, Loss: 3.6659772396087646
Epoch: 2, Loss: 3.59980845451355
Epoch: 1, Loss: 3.294703722000122
Epoch: 2, Loss: 3.229099988937378
Epoch: 1, Loss: 3.636596202850342
Epoch: 2, Loss: 3.6240310668945312
Epoch: 1, Loss: 3.641580581665039
Epoch: 2, Loss: 3.5793144702911377
Epoch: 1, Loss: 3.734959602355957
Epoch: 2, Loss: 3.68233323097229
Epoch: 1, Loss: 4.091590404510498
Epoch: 2, Loss: 4.035525321960449
Epoch: 1, Loss: 3.716032028198242
Epoch: 2, Loss: 3.6566059589385986
Epoch: 1, Loss: 3.7213001251220703
Epoch: 2, Loss: 3.667464017868042
Epoch: 1, Loss: 3.772998571395874
Epoch: 2, Loss: 3.709501266479492
Epoch: 1, Loss: 3.3685600757598877
Epoch: 2, Loss: 3.336576223373413
Epoch: 1, Loss: 3.7719779014587402
Epoch: 2, Loss: 3.697596549987793
Epoc

Epoch: 2, Loss: 3.2973811626434326
Epoch: 1, Loss: 3.528472661972046
Epoch: 2, Loss: 3.424164056777954
Epoch: 1, Loss: 3.821993589401245
Epoch: 2, Loss: 3.77785325050354
Epoch: 1, Loss: 3.73510479927063
Epoch: 2, Loss: 3.6463921070098877
Epoch: 1, Loss: 3.65343976020813
Epoch: 2, Loss: 3.609405279159546
Epoch: 1, Loss: 3.7256202697753906
Epoch: 2, Loss: 3.652801036834717
Epoch: 1, Loss: 3.987642765045166
Epoch: 2, Loss: 3.941910743713379
Epoch: 1, Loss: 3.909152030944824
Epoch: 2, Loss: 3.860456705093384
Epoch: 1, Loss: 3.557239294052124
Epoch: 2, Loss: 3.4798905849456787
Epoch: 1, Loss: 3.755885124206543
Epoch: 2, Loss: 3.705014944076538
Epoch: 1, Loss: 3.466625928878784
Epoch: 2, Loss: 3.4081151485443115
Epoch: 1, Loss: 3.276688575744629
Epoch: 2, Loss: 3.1778995990753174
Epoch: 1, Loss: 4.011191368103027
Epoch: 2, Loss: 3.9425277709960938
Epoch: 1, Loss: 3.394745111465454
Epoch: 2, Loss: 3.361379623413086
Epoch: 1, Loss: 3.421466112136841
Epoch: 2, Loss: 3.3961825370788574
Epoch: 1,

In [25]:
transformer.eval()

src_test = gen_tensor(test_df.iloc[:1900,2])
tgt_test = gen_tensor(test_df.iloc[:1900,3])

with torch.no_grad():

    val_output = transformer(src_test, tgt_test[:, :-1])
    val_loss = criterion(val_output.contiguous().view(-1, tgt_vocab_size), tgt_test[:, 1:].contiguous().view(-1))
    print(f"Validation Loss: {val_loss.item()}")

Validation Loss: 3.561875820159912
