In [2]:
import gradio as gr
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchtext
from torchtext import data


In [3]:
 # baseline model
 # It first converts each of the word tokens into a vector using the GloVe word embeddings that were downloaded. It then computes 
 # the average of those word embeddings in a given sentence. The idea is that this becomes the ‘average’ meaning of the entire 
 # sentence. This is fed to a fully connected layer which produces a scalar output with sigmoid activation (which should be computed 
 # inside the BCEWithLogitsLoss losss function) to represent the probability that the sentence is in the subjective class.
class Baseline(torch.nn.Module):
	def __init__(self, glove):
		super(Baseline, self).__init__()
		self.embedding = torch.nn.Embedding.from_pretrained(glove.vectors)
		self.fc = torch.nn.Linear(100, 1)

	def forward(self, x):
		x = self.embedding(x)
		x = torch.mean(x, dim=0)
		x = self.fc(x).squeeze()
		return x

# cnn model
class CNN(nn.Module):
	def __init__(self, vocab_size, embedding_dim, n_filters, filter_sizes, output_dim, dropout):
		super().__init__()
		self.embedding = nn.Embedding(vocab_size, embedding_dim)
		self.convs = nn.ModuleList([
		nn.Conv2d(in_channels = 1, out_channels = n_filters, kernel_size = (fs, embedding_dim)) for fs in filter_sizes
		])
		self.fc = nn.Linear(len(filter_sizes) * n_filters, output_dim)
		self.dropout = nn.Dropout(dropout)
	
	def forward(self, text):
		#text = [sent len, batch size]
		text = text.permute(1, 0)
		#text = [batch size, sent len]
		embedded = self.embedding(text)
		#embedded = [batch size, sent len, emb dim]
		embedded = embedded.unsqueeze(1)
		#embedded = [batch size, 1, sent len, emb dim]
		conved = [F.relu(conv(embedded)).squeeze(3) for conv in self.convs]
		#conv_n = [batch size, n_filters, sent len - filter_sizes[n] + 1]
		pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
		#pooled_n = [batch size, n_filters]
		cat = self.dropout(torch.cat(pooled, dim = 1))
		#cat = [batch size, n_filters * len(filter_sizes)]
		return self.fc(cat)

glove = torchtext.vocab.GloVe(name="6B",dim=100)
INPUT_DIM = len(glove.vectors)
EMBEDDING_DIM = 100
N_FILTERS = 50
FILTER_SIZES = [2,4]
OUTPUT_DIM = 1
DROPOUT = 0.5

In [4]:
# load baseline model from saved checkpoint
baseline_model = Baseline(glove)
baseline_model.load_state_dict(torch.load('baseline.pt'))
baseline_model.eval()

# load cnn model from saved checkpoint
cnn_model = CNN(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT)
cnn_model.load_state_dict(torch.load('cnn.pt', map_location=torch.device('cpu')))
cnn_model.eval()



CNN(
  (embedding): Embedding(400000, 100)
  (convs): ModuleList(
    (0): Conv2d(1, 50, kernel_size=(2, 100), stride=(1, 1))
    (1): Conv2d(1, 50, kernel_size=(4, 100), stride=(1, 1))
  )
  (fc): Linear(in_features=100, out_features=1, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)

In [5]:
# baseline prediction function with output probability and label
def baseline_predict(sentence):
	tokens = sentence.split()
	# Convert to integer representation per token
	token_ints = [glove.stoi.get(tok, len(glove.stoi)-1) for tok in tokens]
	# Convert into a tensor of the shape accepted by the models
	token_tensor = torch.LongTensor(token_ints).view(-1,1)
	# Get the output from the model
	prediction = baseline_model(token_tensor)
	prediction = torch.sigmoid(prediction)
	prediction = prediction.item()
	if prediction > 0.5:
		return prediction, "Subjective"
	else:
		return prediction, "Objective"

# cnn prediction function with output probability and label
def cnn_predict(sentence):
	# pad the sentence to atleast 4 words
	if len(sentence.split()) < 4:
		sentence += " . . ."
	tokens = sentence.split()
	# Convert to integer representation per token
	token_ints = [glove.stoi.get(tok, len(glove.stoi)-1) for tok in tokens]
	# Convert into a tensor of the shape accepted by the models
	token_tensor = torch.LongTensor(token_ints).view(-1,1)
	# Get the output from the model
	prediction = cnn_model(token_tensor)
	prediction = torch.sigmoid(prediction)
	prediction = prediction.item()
	if prediction > 0.5:
		return prediction, "Subjective"
	else:
		return prediction, "Objective"

# baseline interface
baseline_interface = gr.Interface(fn=baseline_predict, inputs="text", outputs= [gr.outputs.Textbox(label="Baseline Probability"), gr.outputs.Textbox(label="Baseline Label")], title="Baseline Model", description="Enter a sentence to predict whether it is subjective or objective", allow_flagging=False)


# cnn interface
cnn_interface = gr.Interface(fn=cnn_predict, inputs="text", outputs=[gr.outputs.Textbox(label="CNN Probability"), gr.outputs.Textbox(label="CNN Label")], title="CNN Model", description="Enter a sentence to predict whether it is subjective or objective.", allow_flagging=False)


gr.Parallel(baseline_interface, cnn_interface).launch()

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


(<gradio.routes.App at 0x124cecc70>, 'http://127.0.0.1:7860/', None)