In [None]:
import re

def clean_and_normalize(text):
    # Видалення непотрібних символів та зайвих пробілів
    text = re.sub(r'[^\w\s]', '', text)
    text = re.sub(r'\s+', ' ', text)

    # Переведення тексту до нижнього регістру
    text = text.lower()

    return text

def read_file_to_list(file_path):
    lines = []
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            lines.append(line.strip())
    return lines

def create_sentence_pairs(file1_path, file2_path):
    file1_lines = read_file_to_list(file1_path)
    file2_lines = read_file_to_list(file2_path)

    # Перевіряємо, чи кількість речень в обох файлах однакова
    assert len(file1_lines) == len(file2_lines), "Кількість речень в файлах не співпадає."

    sentence_pairs = []
    for i in range(len(file1_lines)):
        sentence1 = clean_and_normalize(file1_lines[i])
        sentence2 = clean_and_normalize(file2_lines[i])

        # Перевірка на наявність порожніх рядків
        if sentence1 and sentence2:
            sentence_pairs.append((sentence1, sentence2))

    return sentence_pairs


# Вказуємо шляхи до двох текстових файлів
#file1_path = '/content/output_en.txt'
#file2_path = '/content/output_uk.txt'

file11_path = '/content/english_sentences.txt'
file22_path = '/content/ukrainian_sentences.txt'

#data1 = create_sentence_pairs(file1_path, file2_path)
data2 = create_sentence_pairs(file11_path, file22_path)
#data=data1
data=data2[:80000]
len(data)

80000

In [None]:
import random
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import Dataset, DataLoader
import nltk
nltk.download('wordnet')
from nltk.stem import WordNetLemmatizer
from nltk.stem import PorterStemmer
import nltk
from nltk.tokenize import word_tokenize
from sklearn.model_selection import train_test_split
nltk.download('punkt')



batch_size = 16


train_data, temp_data = train_test_split(data, test_size=0.3, random_state=42)
test_data, val_data = train_test_split(temp_data, test_size=0.5, random_state=42)

def create_vocab_src(data):
    stemmer = PorterStemmer()
    word_list = []
    count=0
    for src_sent, trg_sent in data:
        words = nltk.word_tokenize(src_sent)
        for word in words:
            if stemmer.stem(word) not in word_list:
                count=count+1
                word_list.append(stemmer.stem(word))


    indices = random.sample(range(4, count+4), count)
    word_index = dict(zip(word_list, indices))
    word_index["<pad>"]=0
    word_index["<sos>"]=2
    word_index["<eos>"]=3
    word_index["<OOV>"]=1

    return word_index

def create_vocab_trg(data):
    word_list = []
    count=0
    for src_sent, trg_sent in data:
        for word in trg_sent.split(' '):
            if word not in word_list:
                count=count+1
                word_list.append(word)
    indices = random.sample(range(4, count+4), count)
    word_index = dict(zip(word_list, indices))
    word_index["<pad>"]=0
    word_index["<sos>"]=2
    word_index["<eos>"]=3
    word_index["<OOV>"]=1
    return word_index

# Функція для перетворення тексту у послідовність токенів
def sentence_src_to_tokens(sentence, word_index):
    stemmer = PorterStemmer()
    ans=[]
    words = nltk.word_tokenize(sentence)
    for word in words:
      if word in word_index:
          ans.append(word_index[stemmer.stem(word)])
      else:
          ans.append(word_index["<OOV>"])
    return  ans
def sentence_trg_to_tokens(sentence, word_index):
    ans=[]
    for word in sentence.split():
        if word in word_index:
          ans.append(word_index[word])
        else:
          ans.append(word_index["<OOV>"])
    return  ans


# Функція для паддінгу послідовностей до однакової довжини
def pad_sequences_src(sequences, max_len):
    padded_seqs = []
    for seq in sequences:
        if len(seq) < max_len-1:
            padding_length = max_len - len(seq) - 1
            padded_seqs.append(seq + [3] + [0] * padding_length)
        elif len(seq) == max_len-1:
            padded_seqs.append(seq + [3])
        else:
            truncated_seq = seq[:max_len-1]
            padded_seqs.append(truncated_seq + [3])


    return padded_seqs

def pad_sequences_trg(sequences, max_len):
    trg_input = []
    trg_output = []
    for seq in sequences:
        if len(seq) < max_len-1:
            padding_length = max_len - len(seq) - 1
            trg_input.append([2] + seq + [0] * padding_length)
            trg_output.append(seq + [3] + [0] * padding_length)
        elif len(seq) == max_len-1:
            trg_input.append([2] + seq)
            trg_output.append(seq + [3])
        else:
            truncated_seq = seq[:max_len-1]
            trg_input.append([2] + truncated_seq )
            trg_output.append(truncated_seq + [3])
    return trg_input, trg_output

# Створюємо словники для англійської та української мов
eng_word_index = create_vocab_src(train_data)
ukr_word_index = create_vocab_trg(train_data)

# Підраховуємо розмір словників
eng_vocab_size = len(eng_word_index)
ukr_vocab_size = len(ukr_word_index)



# Перетворюємо речення на послідовності токенів та паддінгуємо їх
train_src = [sentence_src_to_tokens(pair[0], eng_word_index) for pair in train_data]
train_target = [sentence_trg_to_tokens(pair[1], ukr_word_index) for pair in train_data]

val_src = [sentence_src_to_tokens(pair[0], eng_word_index) for pair in val_data]
val_target = [sentence_trg_to_tokens(pair[1], ukr_word_index) for pair in val_data]

max_src_len = 8
max_target_len = 8
train_src_padded = pad_sequences_src(train_src, max_src_len)
train_target_padded_input , train_target_padded_output= pad_sequences_trg(train_target, max_target_len)

val_src_padded = pad_sequences_src(val_src, max_src_len)
val_target_padded_input , val_target_padded_output= pad_sequences_trg(val_target, max_target_len)

class CustomDataset(Dataset):
    def __init__(self, src_list, input_trg_list, output_trg_list):
        super().__init__()
        self.src_data = torch.LongTensor(src_list)
        self.input_trg_data = torch.LongTensor(input_trg_list)
        self.output_trg_data = torch.LongTensor(output_trg_list)


    def make_mask(self):
        e_mask = (self.src_data != pad_id).unsqueeze(1)
        d_mask = (self.input_trg_data != pad_id).unsqueeze(1)

        nopeak_mask = torch.ones([1, seq_len, seq_len], dtype=torch.bool)
        nopeak_mask = torch.tril(nopeak_mask)
        d_mask = d_mask & nopeak_mask

        return e_mask, d_mask

    def __getitem__(self, idx):
        return self.src_data[idx], self.input_trg_data[idx], self.output_trg_data[idx]

    def __len__(self):
        return np.shape(self.src_data)[0]

# Створюємо DataLoader для тренування


train_dataset = CustomDataset(train_src_padded, train_target_padded_input, train_target_padded_output)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

val_dataset = CustomDataset(val_src_padded, val_target_padded_input, val_target_padded_output)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
# Визначаємо модель перекладача
train_dataloader

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


<torch.utils.data.dataloader.DataLoader at 0x79376bf550f0>

In [None]:
!wget http://nlp.stanford.edu/data/glove.6B.zip
!unzip -q glove.6B.zip

--2023-08-05 07:51:46--  http://nlp.stanford.edu/data/glove.6B.zip
Resolving nlp.stanford.edu (nlp.stanford.edu)... 171.64.67.140
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:80... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://nlp.stanford.edu/data/glove.6B.zip [following]
--2023-08-05 07:51:47--  https://nlp.stanford.edu/data/glove.6B.zip
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://downloads.cs.stanford.edu/nlp/data/glove.6B.zip [following]
--2023-08-05 07:51:47--  https://downloads.cs.stanford.edu/nlp/data/glove.6B.zip
Resolving downloads.cs.stanford.edu (downloads.cs.stanford.edu)... 171.64.64.22
Connecting to downloads.cs.stanford.edu (downloads.cs.stanford.edu)|171.64.64.22|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 862182613 (822M) [application/zip]
Saving to: ‘glove.6B.zip’


202

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')


import math
learning_rate = 1e-4
batch_size = 16
seq_len = 8
num_heads = 4
num_layers = 3
d_model = 100
d_ff = 1024
d_k = d_model // num_heads
drop_out_rate = 0.3
num_epochs = 5
beam_size = 4
class EncoderLayer(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer_norm_1 = LayerNormalization()
        self.multihead_attention = MultiheadAttention()
        self.drop_out_1 = nn.Dropout(drop_out_rate)

        self.layer_norm_2 = LayerNormalization()
        self.feed_forward = FeedFowardLayer()
        self.drop_out_2 = nn.Dropout(drop_out_rate)

    def forward(self, x, e_mask):
        x_1 = self.layer_norm_1(x)
        x = x + self.drop_out_1(
            self.multihead_attention(x_1, x_1, x_1, mask=e_mask)
        )
        x_2 = self.layer_norm_2(x)
        x = x + self.drop_out_2(self.feed_forward(x_2))

        return x


class DecoderLayer(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer_norm_1 = LayerNormalization()
        self.masked_multihead_attention = MultiheadAttention()
        self.drop_out_1 = nn.Dropout(drop_out_rate)

        self.layer_norm_2 = LayerNormalization()
        self.multihead_attention = MultiheadAttention()
        self.drop_out_2 = nn.Dropout(drop_out_rate)

        self.layer_norm_3 = LayerNormalization()
        self.feed_forward = FeedFowardLayer()
        self.drop_out_3 = nn.Dropout(drop_out_rate)

    def forward(self, x, e_output, e_mask,  d_mask):
        x_1 = self.layer_norm_1(x)
        x = x + self.drop_out_1(self.masked_multihead_attention(x_1, x_1, x_1, mask=d_mask))
        x_2 = self.layer_norm_2(x)
        x = x + self.drop_out_2(self.multihead_attention(x_2, e_output, e_output, mask=e_mask))
        x_3 = self.layer_norm_3(x)
        x = x + self.drop_out_3(self.feed_forward(x_3))

        return x


class MultiheadAttention(nn.Module):
    def __init__(self):
        super().__init__()
        self.inf = 1e9

        self.w_q = nn.Linear(d_model, d_model)
        self.w_k = nn.Linear(d_model, d_model)
        self.w_v = nn.Linear(d_model, d_model)

        self.dropout = nn.Dropout(drop_out_rate)
        self.attn_softmax = nn.Softmax(dim=-1)

        self.w_0 = nn.Linear(d_model, d_model)

    def forward(self, q, k, v, mask=None):
        input_shape = q.shape

        q = self.w_q(q).view(input_shape[0], -1, num_heads, d_k)
        k = self.w_k(k).view(input_shape[0], -1, num_heads, d_k)
        v = self.w_v(v).view(input_shape[0], -1, num_heads, d_k)

        q = q.transpose(1, 2)
        k = k.transpose(1, 2)
        v = v.transpose(1, 2)

        attn_values = self.self_attention(q, k, v, mask=mask)
        concat_output = attn_values.transpose(1, 2)\
            .contiguous().view(input_shape[0], -1, d_model)

        return self.w_0(concat_output)

    def self_attention(self, q, k, v, mask=None):
        attn_scores = torch.matmul(q, k.transpose(-2, -1))
        attn_scores = attn_scores / math.sqrt(d_k)

        if mask is not None:
            mask = mask.unsqueeze(1)
            attn_scores = attn_scores.masked_fill_(mask == 0, -1 * self.inf)

        attn_distribs = self.attn_softmax(attn_scores)

        attn_distribs = self.dropout(attn_distribs)
        attn_values = torch.matmul(attn_distribs, v)

        return attn_values


class FeedFowardLayer(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_1 = nn.Linear(d_model, d_ff, bias=True)
        self.relu = nn.ReLU()
        self.linear_2 = nn.Linear(d_ff, d_model, bias=True)
        self.dropout = nn.Dropout(drop_out_rate)

    def forward(self, x):
        x = self.relu(self.linear_1(x))
        x = self.dropout(x)
        x = self.linear_2(x)

        return x


class LayerNormalization(nn.Module):
    def __init__(self, eps=1e-6):
        super().__init__()
        self.eps = eps
        self.layer = nn.LayerNorm([d_model], elementwise_affine=True, eps=self.eps)

    def forward(self, x):
        x = self.layer(x)

        return x


class PositionalEncoder(nn.Module):
    def __init__(self):
        super().__init__()
        pe_matrix= torch.zeros(seq_len, d_model)

        for pos in range(seq_len):
            for i in range(d_model):
                if i % 2 == 0:
                    pe_matrix[pos, i] = math.sin(pos / (10000 ** (2 * i / d_model)))
                elif i % 2 == 1:
                    pe_matrix[pos, i] = math.cos(pos / (10000 ** (2 * i / d_model)))

        pe_matrix = pe_matrix.unsqueeze(0)
        self.positional_encoding = pe_matrix.to(device=device).requires_grad_(False)

    def forward(self, x):
        x = x * math.sqrt(d_model)
        x = x + self.positional_encoding

        return x

class Transformer(nn.Module):
    def __init__(self, src_vocab_size, trg_vocab_size,pretrained_embeddings):
        super().__init__()
        self.src_vocab_size = src_vocab_size
        self.trg_vocab_size = trg_vocab_size
        self.src_embedding = nn.Embedding(self.src_vocab_size, d_model)
        self.src_embedding.weight.data.copy_(torch.from_numpy(pretrained_embeddings))
        self.trg_embedding = nn.Embedding(self.trg_vocab_size, d_model)
        self.positional_encoder = PositionalEncoder()
        self.encoder = Encoder()
        self.decoder = Decoder()
        self.output_linear = nn.Linear(d_model, self.trg_vocab_size)
        self.softmax = nn.LogSoftmax(dim=-1)

    def forward(self, src_input, trg_input, e_mask=None, d_mask=None):
        src_input = self.src_embedding(src_input)

        trg_input = self.trg_embedding(trg_input)
        src_input = self.positional_encoder(src_input)
        trg_input = self.positional_encoder(trg_input)

        e_output = self.encoder(src_input, e_mask)
        d_output = self.decoder(trg_input, e_output, e_mask, d_mask)

        output = self.softmax(self.output_linear(d_output))

        return output


class Encoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = nn.ModuleList([EncoderLayer() for i in range(num_layers)])
        self.layer_norm = LayerNormalization()

    def forward(self, x, e_mask):
        for i in range(num_layers):
            x = self.layers[i](x, e_mask)

        return self.layer_norm(x)


class Decoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = nn.ModuleList([DecoderLayer() for i in range(num_layers)])
        self.layer_norm = LayerNormalization()

    def forward(self, x, e_output, e_mask, d_mask):
        for i in range(num_layers):
            x = self.layers[i](x, e_output, e_mask, d_mask)

        return self.layer_norm(x)




In [None]:
import collections
def load_pretrained_embeddings(
    embedding_file,
    word_to_id,
    num_oov_buckets=1,
    case_insensitive_embeddings=True,
):
    vocab_for_pretrain = collections.defaultdict(list)
    for key, value in word_to_id.items():
        vocab_for_pretrain[key].append(value)
    # Fill pretrained embedding matrix.
    with tf.io.gfile.GFile(embedding_file) as embedding:
        pretrained = None

        for line in embedding:
            fields = line.strip().split()
            word = fields[0]

            if pretrained is None:
                pretrained = np.random.normal(
                    size=(len(vocab_for_pretrain) , len(fields) - 1)
                )

            # Lookup word in the vocabulary.
            if word in vocab_for_pretrain:
                ids = vocab_for_pretrain[word]
                for index in ids:
                    pretrained[index] = np.asarray(fields[1:])

    return pretrained

In [None]:
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
import numpy as np
import sys
import datetime
import torch
import torch.nn as nn
from tqdm import tqdm
import tensorflow as tf
import warnings

warnings.filterwarnings("ignore")

sos_id=2
eos_id=3
pad_id=0
def make_mask(src_input, trg_input):
        e_mask = (src_input != pad_id).unsqueeze(1)
        d_mask = (trg_input != pad_id).unsqueeze(1)

        nopeak_mask = torch.ones([1, seq_len, seq_len], dtype=torch.bool)
        nopeak_mask = torch.tril(nopeak_mask).to(device)
        d_mask = d_mask & nopeak_mask

        return e_mask, d_mask
embedding_file = '/content/glove.6B.100d.txt'
pretrained_embeddings = load_pretrained_embeddings(embedding_file, eng_word_index)

model = Transformer(eng_vocab_size, ukr_vocab_size, pretrained_embeddings).to(device)
optim = torch.optim.Adam(model.parameters(), lr=learning_rate)
best_loss = sys.float_info.max

print("Loading loss function...")
criterion = nn.NLLLoss()
train_loader = train_dataloader
val_loader = val_dataloader

for epoch in range(1, num_epochs + 1):
    model.train()

    train_losses = []
    bleu_scores = []
    start_time = datetime.datetime.now()

    for i, batch in tqdm(enumerate(train_loader)):
        src_input, trg_input, trg_output = batch
        src_input, trg_input, trg_output = src_input.to(device), trg_input.to(device), trg_output.to(device)

        e_mask, d_mask = make_mask(src_input, trg_input)
        output = model(src_input, trg_input, e_mask, d_mask)
        optim.zero_grad()

        loss = criterion(output.view(-1, output.size(-1)), trg_output.view(-1))
        loss.backward()
        optim.step()

        train_losses.append(loss.item())

        # Calculate BLEU score
        predicted = torch.argmax(output, dim=-1).cpu().numpy()
        reference = trg_output.cpu().numpy()
        for p, r in zip(predicted, reference):
            bleu_score = sentence_bleu([r.tolist()], p.tolist())
            bleu_scores.append(bleu_score)

        del src_input, trg_input, trg_output, e_mask, d_mask, output
        torch.cuda.empty_cache()

    # Валідаційна частина
    model.eval()
    val_losses = []
    val_bleu_scores = []

    with torch.no_grad():
        for i, batch in tqdm(enumerate(val_loader)):
            src_input, trg_input, trg_output = batch
            src_input, trg_input, trg_output = src_input.to(device), trg_input.to(device), trg_output.to(device)

            e_mask, d_mask = make_mask(src_input, trg_input)
            output = model(src_input, trg_input, e_mask, d_mask)

            loss = criterion(output.view(-1, output.size(-1)), trg_output.view(-1))
            val_losses.append(loss.item())

            predicted = torch.argmax(output, dim=-1).cpu().numpy()
            reference = trg_output.cpu().numpy()
            for p, r in zip(predicted, reference):
                bleu_score = sentence_bleu([r.tolist()], p.tolist())
                val_bleu_scores.append(bleu_score)

    end_time = datetime.datetime.now()
    training_time = end_time - start_time
    seconds = training_time.seconds
    hours = seconds // 3600
    minutes = (seconds % 3600) // 60
    seconds = seconds % 60

    mean_train_loss = np.mean(train_losses)
    mean_bleu_score = np.mean(bleu_scores)
    mean_val_loss = np.mean(val_losses)
    mean_val_bleu_score = np.mean(val_bleu_scores)
    print(f"#################### Epoch: {epoch} ####################")
    print(f"Train loss: {mean_train_loss} || Mean BLEU score: {mean_bleu_score} || One epoch training time: {hours}hrs {minutes}mins {seconds}secs")
    print(f"Validation loss: {mean_val_loss} || Mean Validation BLEU score: {mean_val_bleu_score}")

    if mean_val_loss < best_loss:
        best_loss = mean_val_loss
        torch.save(model.state_dict(), 'best_model.pth')


Loading loss function...


The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
3500it [10:15,  5.69it/s]
750it [00:35, 21.30it/s]


#################### Epoch: 1 ####################
Train loss: 3.4842790113857816 || Mean BLEU score: 0.37630488418809716 || One epoch training time: 0hrs 10mins 50secs
Validation loss: 2.9148073374430337 || Mean Validation BLEU score: 0.39404411675366674


3500it [10:34,  5.52it/s]
750it [00:33, 22.08it/s]


#################### Epoch: 2 ####################
Train loss: 2.745517986706325 || Mean BLEU score: 0.39463161382612316 || One epoch training time: 0hrs 11mins 8secs
Validation loss: 2.674203867594401 || Mean Validation BLEU score: 0.404041627705153


3500it [10:29,  5.56it/s]
750it [00:34, 21.92it/s]


#################### Epoch: 3 ####################
Train loss: 2.499940338747842 || Mean BLEU score: 0.40102193568698696 || One epoch training time: 0hrs 11mins 3secs
Validation loss: 2.5248763915697734 || Mean Validation BLEU score: 0.41117803049928714


3500it [10:31,  5.55it/s]
750it [00:33, 22.36it/s]


#################### Epoch: 4 ####################
Train loss: 2.3172517173290252 || Mean BLEU score: 0.4068810364193691 || One epoch training time: 0hrs 11mins 4secs
Validation loss: 2.4004576665560404 || Mean Validation BLEU score: 0.4178626927207603


3500it [11:58,  4.87it/s]
750it [00:38, 19.48it/s]


#################### Epoch: 5 ####################
Train loss: 2.1635126913615634 || Mean BLEU score: 0.4130770414300886 || One epoch training time: 0hrs 12mins 37secs
Validation loss: 2.2912527311642963 || Mean Validation BLEU score: 0.4249349384895645


In [None]:
sos_id=2
eos_id=3
pad_id=0
def greedy_search( e_output, e_mask):
        last_words = torch.LongTensor([pad_id] * seq_len).to(device)
        last_words[0] = sos_id # (L)
        cur_len = 1

        for i in range(seq_len):
            d_mask = (last_words.unsqueeze(0) != pad_id).unsqueeze(1).to(device)
            nopeak_mask = torch.ones([1, seq_len, seq_len], dtype=torch.bool).to(device)
            nopeak_mask = torch.tril(nopeak_mask)
            d_mask = d_mask & nopeak_mask

            trg_embedded = model.trg_embedding(last_words.unsqueeze(0))
            trg_positional_encoded = model.positional_encoder(trg_embedded)
            decoder_output = model.decoder(
                trg_positional_encoded,
                e_output,
                e_mask,
                d_mask)

            output = model.softmax(
                model.output_linear(decoder_output))

            output = torch.argmax(output, dim=-1)
            last_word_id = output[0][i].item()

            if i < seq_len-1:
                last_words[i+1] = last_word_id
                cur_len += 1

            if last_word_id == eos_id:
                break

        if last_words[-1].item() == pad_id:
            decoded_output = last_words[1:cur_len].tolist()
        else:
            decoded_output = last_words[1:].tolist()

        decoded_output = ' '.join([list(ukr_word_index.keys())[list(ukr_word_index.values()).index(i)] for i in decoded_output])

        return decoded_output



def translate_text(input_text):
    model.eval()
    input_seq = sentence_src_to_tokens(input_text, eng_word_index)

    input_seq_pad = pad_sequences_src([input_seq], max_target_len)

    src_data = torch.LongTensor(input_seq_pad[0]).unsqueeze(0).to(device)

    e_mask = (src_data != pad_id).unsqueeze(1).to(device)

    src_data = model.src_embedding(src_data)
    src_data = model.positional_encoder(src_data)
    e_output = model.encoder(src_data, e_mask)

    result = greedy_search(e_output, e_mask)

    return result





for test_pair in test_data:
    input_text, target_text = test_pair
    print(f'Input: {input_text}')
    print(f'Target: {target_text}')
    predicted_text = translate_text(input_text)
    print(f'Predicted: {predicted_text}')
    print('---')


Input: youre aggressive
Target: ти агресивний
Predicted: ти дуже зайнятий <eos>
---
Input: tom is buying beer
Target: том купує пива
Predicted: том дуже любить <eos>
---
Input: there was something else
Target: там було щось ще
Predicted: це було дуже добре <eos>
---
Input: weve done that before
Target: ми вже це робили раніше
Predicted: ми це було <eos>
---
Input: toms french is perfect
Target: у тома бездоганна французька
Predicted: це було скасовано <eos>
---
Input: tom is giddy
Target: у тома паморочиться в голові
Predicted: том дуже пяний <eos>
---
Input: please do it quickly
Target: будь ласка зроби це швидко
Predicted: це було б це зробити <eos>
---
Input: i play in a band
Target: я граю в групі
Predicted: я в бостоні <eos>
---
Input: we love your books
Target: ми любимо твої книжки
Predicted: ми маємо подобається <eos>
---
Input: i read the old documents
Target: я читав старі документи
Predicted: я люблю грати в бостоні <eos>
---
Input: tom stole marys lunch
Target: том поцупив 

KeyboardInterrupt: ignored

In [None]:
import matplotlib.pyplot as plt


# Витягуємо значення втрати та метрики accuracy з історії
loss = history.history['loss']
accuracy = history.history['accuracy']

# Побудова графіка для втрати
plt.plot(range(1, len(loss) + 1), loss, label='Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Побудова графіка для accuracy
plt.plot(range(1, len(accuracy) + 1), accuracy, label='Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()