# Demo

# For running in google colab, I have to upload the files to the running session folder first, such as Dict, BaseModel and NewModel, but after doing that, the code should work fine.

In [1]:
from google.colab import files
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchtext.data.utils import get_tokenizer
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from torchtext.vocab import build_vocab_from_iterator
import numpy as np
import pickle

In [3]:
#loading tokenizer and vocab
tokenizer = get_tokenizer('basic_english')
vocab = torch.load('./Dict.zip')

#label dict 
label_dict = {
    0: "fear",
    1: "sadness",
    2: "anger", 
    3: "disgust", 
    4:"contentment", 
    5: "awe", 
    6: "something else",
    7: "amusement", 
    8: "excitement"
}

# Loading Model

In [4]:
class FeedForwardNetwork(nn.Module):

    def __init__(self, input_dim, ff_dim, dropout):

        super(FeedForwardNetwork, self).__init__()
        
        self.linear_layer_1 = nn.Linear(input_dim, ff_dim)
        self.activation_layer = nn.LeakyReLU()
        self.dropout_layer = nn.Dropout(p = dropout)
        self.linear_layer_2 = nn.Linear(ff_dim, input_dim)

        
    def forward(self, x: torch.Tensor):
        
        y = None

        output1 = self.linear_layer_1(x)
        output2 = self.activation_layer(output1)
        output3 = self.dropout_layer(output2)
        y = self.linear_layer_2(output3)
        
        return y

class PositionalEncoding(nn.Module):

    def __init__(self, d_model, dropout=0.0, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)

class TransformerCell(nn.Module):

    def __init__(self, input_dim: int, num_heads: int, ff_dim: int, dropout: float):

        super(TransformerCell, self).__init__()
        
        self.multi_attention = nn.MultiheadAttention(input_dim, num_heads, batch_first=True)
        self.dropout_layer_1 = nn.Dropout(dropout)
        self.batchnorm_layer_1 = nn.LayerNorm(input_dim)

        self.feedfoward_layer = FeedForwardNetwork(input_dim, ff_dim, dropout)
        self.dropout_layer_2 = nn.Dropout(dropout)
        self.batchnorm_layer_2 = nn.LayerNorm(input_dim)
        
    def forward(self, x: torch.Tensor, mask: torch.Tensor=None):
        
        y = None

        output1, attn_output_weights = self.multi_attention(x, x, x, mask)
        output2 = self.dropout_layer_1(output1)
        output3 = self.batchnorm_layer_1(x + output2)

        output4 = self.feedfoward_layer(output3)
        output5 = self.dropout_layer_2(output4)
        y = self.batchnorm_layer_2(output5 + output3)
        
        return y

class TransformerBaseline(nn.Module):
    """
    A Transformer-based text classifier.
    """
    def __init__(self, 
            vocab_size: int, embed_dim: int, num_heads: int, trx_ff_dim: int, 
            num_trx_cells: int, num_class: int, dropout: float=0.1, pad_token: int=1
        ):
        super(TransformerBaseline, self).__init__()
        
        self.embed_dim = embed_dim
        
        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=pad_token)
        self.positional_layer = PositionalEncoding(embed_dim)
        self.transform_layer = nn.ModuleList(TransformerCell(embed_dim, num_heads, trx_ff_dim, dropout) for i in range(num_trx_cells))
        self.output_layer = nn.Linear(embed_dim, num_class)

    def forward(self, text, mask=None):
        embedded = self.embedding(text) * math.sqrt(self.embed_dim)

        logits = None
        positional = self.positional_layer(embedded)
        for num, i in enumerate(self.transform_layer):
          positional = i(positional, mask)
        attention_output = torch.mean(positional, 1)
        logits = self.output_layer(attention_output)
        
        return logits

class TransformerLSTMCell(nn.Module):

    def __init__(self, input_dim: int, num_heads: int, ff_dim: int, dropout: float):

        super(TransformerLSTMCell, self).__init__()
        
        self.lstm = nn.LSTM(input_dim, input_dim, batch_first=True, dropout = dropout, bidirectional=True)
        self.feedfoward_layer_lstm = FeedForwardNetwork(input_dim, ff_dim, dropout)

        self.multi_attention = nn.MultiheadAttention(input_dim, num_heads, batch_first=True)
        self.dropout_layer_1 = nn.Dropout(dropout)
        self.batchnorm_layer_1 = nn.LayerNorm(input_dim)

        self.feedfoward_layer_multi = FeedForwardNetwork(input_dim, ff_dim, dropout)
        self.dropout_layer_2 = nn.Dropout(dropout)
        self.batchnorm_layer_2 = nn.LayerNorm(input_dim)

        
    def forward(self, x: torch.Tensor, mask: torch.Tensor=None):
        
        y = None
        #print(x.shape)

        output1, attn_output_weights = self.multi_attention(x, x, x, mask)
        output2 = self.dropout_layer_1(output1)
        output3 = self.batchnorm_layer_1(x + output2)

        output, (ht, ct) = self.lstm(x)
        #print(x.shape[-1])
        output = output[:, :, :x.shape[-1]] + output[:, :, x.shape[-1]:]
        output = output/2.0
        #print(output.shape)
        output6 = self.feedfoward_layer_lstm(output)
        #print(output6.shape)

        #print(output3.shape)
        output4 = self.feedfoward_layer_multi(output3)
        output5 = self.dropout_layer_2(output4)
        #print(output5.shape)
        y = self.batchnorm_layer_2(output5 + (output3 + output6))
        
        return y
    
class TransformerLSTM(nn.Module):
    """
    A Transformer-based text classifier.
    """
    def __init__(self, 
            vocab_size: int, embed_dim: int, num_heads: int, trx_ff_dim: int, 
            num_trx_cells: int, num_class: int, dropout: float=0.1, pad_token: int=1
        ):
        super(TransformerLSTM, self).__init__()
        
        self.embed_dim = embed_dim
        
        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=pad_token)
        self.positional_layer = PositionalEncoding(embed_dim)
        self.transform_layer = nn.ModuleList(TransformerLSTMCell(embed_dim, num_heads, trx_ff_dim, dropout) for i in range(num_trx_cells))
        self.output_layer = nn.Linear(embed_dim, num_class)

    def forward(self, text, mask=None):
        embedded = self.embedding(text) * math.sqrt(self.embed_dim)

        logits = None
        positional = self.positional_layer(embedded)
        for num, i in enumerate(self.transform_layer):
          positional = i(positional, mask)
        attention_output = torch.mean(positional, 1)
        logits = self.output_layer(attention_output)
        
        return logits

# Importing model

In [5]:
base_model = torch.load('./BaseModel.zip', map_location=torch.device('cpu'))
base_model.eval()

new_model = torch.load('./NewModel.zip', map_location=torch.device('cpu'))
new_model.eval()

TransformerLSTM(
  (embedding): Embedding(53558, 120, padding_idx=1)
  (positional_layer): PositionalEncoding(
    (dropout): Dropout(p=0.0, inplace=False)
  )
  (transform_layer): ModuleList(
    (0): TransformerLSTMCell(
      (lstm): LSTM(120, 120, batch_first=True, dropout=0.1, bidirectional=True)
      (feedfoward_layer_lstm): FeedForwardNetwork(
        (linear_layer_1): Linear(in_features=120, out_features=28, bias=True)
        (activation_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.1, inplace=False)
        (linear_layer_2): Linear(in_features=28, out_features=120, bias=True)
      )
      (multi_attention): MultiheadAttention(
        (out_proj): NonDynamicallyQuantizableLinear(in_features=120, out_features=120, bias=True)
      )
      (dropout_layer_1): Dropout(p=0.1, inplace=False)
      (batchnorm_layer_1): LayerNorm((120,), eps=1e-05, elementwise_affine=True)
      (feedfoward_layer_multi): FeedForwardNetwork(
        (linear_layer_1): Lin

# Running Inference

In [29]:
# Input painting description
text = "the student can finally rest after finishing the work"
text = tokenizer(text)
text = vocab(text)
text = torch.IntTensor(text)
text = text.reshape(1, -1)

#Prediction on the description
prediction = base_model(text).argmax(1)
print("base model's prediction: " + label_dict[prediction.item()])

prediction = new_model(text).argmax(1)
print("new model's prediction: " + label_dict[prediction.item()])

base model's prediction: contentment
new model's prediction: awe


In [28]:
# Input painting description
text = "the student is hopeful that he can get a passing grade"
text = tokenizer(text)
text = vocab(text)
text = torch.IntTensor(text)
text = text.reshape(1, -1)

#Prediction on the description
prediction = base_model(text).argmax(1)
print("base model's prediction: " + label_dict[prediction.item()])

prediction = new_model(text).argmax(1)
print("new model's prediction: " + label_dict[prediction.item()])

base model's prediction: something else
new model's prediction: contentment


In [27]:
# Input painting description
text = "the street is empty and lonely because of the pandemic"
text = tokenizer(text)
text = vocab(text)
text = torch.IntTensor(text)
text = text.reshape(1, -1)

#Prediction on the description
prediction = base_model(text).argmax(1)
print("base model's prediction: " + label_dict[prediction.item()])

prediction = new_model(text).argmax(1)
print("new model's prediction: " + label_dict[prediction.item()])

base model's prediction: fear
new model's prediction: sadness
