In [None]:
# Mount Drive
from google.colab import drive
import os

drive.mount('/content/drive', force_remount=True)
path_to_root_dir = "/content/drive/MyDrive/deep learning folder/ensembleLSTM-CNN hybrid model/seq2seq" # You are advised to created a folder in your drive with name `deep learning folder` for everything to work seamlessly or You can change this to your desired folder path

if os.path.exists(path_to_root_dir) == False:
  os.mkdir(path_to_root_dir)

# Change directory to Where you want to save results
os.chdir(path_to_root_dir)

In [None]:
import torch
import spacy
from torchtext.data.metrics import bleu_score
import sys


def translate_sentence(model, sentence, german, english, device, max_length=50):
    # Load german tokenizer
    spacy_ger = spacy.load("de_core_news_sm")

    # Create tokens using spacy and everything in lower case (which is what our vocab is)
    if type(sentence) == str:
        tokens = [token.text.lower() for token in spacy_ger(sentence)]
    else:
        tokens = [token.lower() for token in sentence]

    # Add <SOS> and <EOS> in beginning and end respectively
    tokens.insert(0, german.init_token)
    tokens.append(german.eos_token)

    # Go through each german token and convert to an index
    text_to_indices = [german.vocab.stoi[token] for token in tokens]

    # Convert to Tensor
    sentence_tensor = torch.LongTensor(text_to_indices).unsqueeze(1).to(device)

    outputs = [english.vocab.stoi["<sos>"]]
    for i in range(max_length):
        trg_tensor = torch.LongTensor(outputs).unsqueeze(1).to(device)

        with torch.no_grad():
            output = model(sentence_tensor, trg_tensor)

        best_guess = output.argmax(2)[-1, :].item()
        outputs.append(best_guess)

        if best_guess == english.vocab.stoi["<eos>"]:
            break

    translated_sentence = [english.vocab.itos[idx] for idx in outputs]
    # remove start token
    return translated_sentence[1:]


def bleu(data, model, german, english, device):
    targets = []
    outputs = []

    for example in data:
        src = vars(example)["src"]
        trg = vars(example)["trg"]

        prediction = translate_sentence(model, src, german, english, device)
        prediction = prediction[:-1]  # remove <eos> token

        targets.append([trg])
        outputs.append(prediction)

    return bleu_score(outputs, targets)


def save_checkpoint(state, filename="my_checkpoint.pth.tar"):
    print("=> Saving checkpoint")
    torch.save(state, filename)


def load_checkpoint(checkpoint, model, optimizer):
    print("=> Loading checkpoint")
    model.load_state_dict(checkpoint["state_dict"])
    optimizer.load_state_dict(checkpoint["optimizer"])

In [None]:
# !pip install torchtext==0.6.0
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter # to print to tensorboard
from torchtext.datasets import Multi30k
from torchtext.data import Field, BucketIterator
import numpy as np
import spacy
import random

spacy.cli.download("de_core_news_sm")
spacy_ger = spacy.load('de_core_news_sm')

spacy.cli.download("en_core_web_sm")
spacy_eng = spacy.load('en_core_web_sm')

def tokenizer_ger(text):
  return [tok.text for tok in spacy_ger.tokenizer(text)]

def tokenizer_eng(text):
  return [tok.text for tok in spacy_eng.tokenizer(text)]

german = Field(tokenize=tokenizer_ger, lower=True, init_token='<sos>', eos_token='<eos>')
english = Field(tokenize=tokenizer_eng, lower=True, init_token='<sos>', eos_token='<eos>')

train_data, validation_data, test_data = Multi30k.splits(exts=('.de', '.en'), fields=(german, english))

german.build_vocab(train_data, max_size=10000, min_freq=2)
english.build_vocab(train_data, max_size=10000, min_freq=2)

In [None]:
class Transformer(nn.Module):
  def __init__(self, embedding_size, src_vocab_size, trg_vocab_size, src_pad_idx, 
               num_heads, num_encoder_layers, num_decoder_layers, forward_expansion, 
               dropout, max_len, device
               ):
    super(Transformer, self).__init__()
    self.src_word_embedding = nn.Embedding(src_vocab_size, embedding_size)
    self.src_position_embeddding = nn.Embedding(max_len, embedding_size)
    self.trg_word_embedding = nn.Embedding(trg_vocab_size, embedding_size)
    self.trg_position_embedding = nn.Embedding(max_len, embedding_size)
    self.device = device
    self.transformer = nn.Transformer(embedding_size, num_heads, 
                                      num_encoder_layers=num_encoder_layers, 
                                      num_decoder_layers=num_decoder_layers,
                                      dim_feedforward=forward_expansion,
                                      dropout=dropout,
                                      )
    self.fc_out = nn.Linear(embedding_size, trg_vocab_size)
    self.dropout = nn.Dropout(dropout)
    self.src_pad_idx = src_pad_idx

  def make_src_mask(self, src):
    # src shape = (src_len, N)
    src_mask = src.transpose(0,1) == self.src_pad_idx
    # (N, src_mask)
    return src_mask

  def forward(self, src, trg):
    # Get shapes
    src_seq_length, N1 = src.shape
    trg_seq_length, N2 = trg.shape
    # print(f"src_seq_length, N1 --> {src_seq_length}, {N1}")
    # print(f"trg_seq_length, N2 --> {trg_seq_length}, {N2}")

    # Create Positions
    src_position = (
        torch.arange(0, src_seq_length).unsqueeze(1).expand(src_seq_length, N1)
        .to(self.device)
    )

    trg_position = (
        torch.arange(0, trg_seq_length).unsqueeze(1).expand(trg_seq_length, N2)
        .to(self.device)
    )

    # Embed positions into data
    embed_src = self.dropout(
        (self.src_word_embedding(src) + self.src_position_embeddding(src_position))
    )
    embed_trg = self.dropout(
        (self.trg_word_embedding(trg) + self.trg_position_embedding(trg_position))
    )

    src_padding_mask = self.make_src_mask(src)
    trg_mask = self.transformer.generate_square_subsequent_mask(trg_seq_length).to(
        self.device
        )
    out = self.transformer(
        embed_src,
        embed_trg,
        src_key_padding_mask = src_padding_mask,
        tgt_mask=trg_mask,

    )
    # print(f"trans out: {out.shape}")
    out = self.fc_out(out)
    # print(f"feed out: {out.shape}")
    return out


In [None]:
# Setup the training phase
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
load_model = False
save_model = True

# Training hyperparameters
num_epochs = 5
learning_rate = 3e-4
batch_size = 32

# Model hyperparameters
src_vocab_size = len(german.vocab)
trg_vocab_size = len(english.vocab)
embedding_size = 512
num_heads = 8
num_encoder_layers = 3
num_decoder_layers = 3
dropout = 0.1
max_len = 100
forward_expansion = 2048
src_pad_idx = english.vocab.stoi["<pad>"]

# Tensorboard for nice plots
writer = SummaryWriter("runs/loss_plot")
step = 0

In [None]:
train_iterator, valid_iterator, test_iterator = BucketIterator.splits((train_data, 
                                                                      validation_data, test_data), batch_size=batch_size, sort_within_batch=True,
                                                                      sort_key = lambda x: len(x.src),
                                                                      device=device)

model = Transformer(
    embedding_size,
    src_vocab_size, 
    trg_vocab_size,
    src_pad_idx,
    num_heads,
    num_encoder_layers,
    num_decoder_layers,
    forward_expansion,
    dropout,
    max_len,
    device,
).to(device)

optimizer = optim.Adam(model.parameters(), lr=learning_rate)
pad_idx = english.vocab.stoi["<pad>"]
criterion = nn.CrossEntropyLoss(ignore_index=pad_idx)

In [None]:
num_epochs = 150
checkpoint_filename="my_checkpoint_trans.pth.tar"

if load_model:
  load_checkpoint(torch.load(checkpoint_filename), model, optimizer)

sentence = 'Ein Boot mit mehreren Männern wird von einem großen Pferdegespann ans Ufer gezogen..'

for epoch in range(num_epochs):
    print(f"Epoch [{epoch} / {num_epochs}]")

    checkpoint = {'state_dict' : model.state_dict(), 'optimizer':optimizer.state_dict()}
    save_checkpoint(checkpoint, filename=checkpoint_filename)

    model.eval()
    translated_sentence = translate_sentence(model, sentence, german, english, device, max_length=100)
    print(f"Translated example sentence \n {translated_sentence}")
    model.train()

    for batch_idx, bacth in enumerate(train_iterator):
      inp_data = bacth.src.to(device)
      target = bacth.trg.to(device)

      output = model(inp_data, target[:-1])
      # import sys
      # sys.exit()
      # output shape: (trg_len, bacth_size, output_dim)

      output = output.reshape(-1, output.shape[2])
      target = target[1:].reshape(-1)

      optimizer.zero_grad()
      loss = criterion(output, target)

      loss.backward()

      torch.nn.utils.clip_grad_norm_(model.parameters(),max_norm=1)
      optimizer.step()

      writer.add_scalar('Training loss', loss, global_step=step)
      step += 1

In [None]:
score = bleu(test_data, model, german, english, device)
print(f"Bleu score {score*100:.2f}")