In [None]:
!pip3 install gradio

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
!pip3 install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import gradio as gr
import torchtext
import torch
import torch.nn as nn
from transformers import BertTokenizer, BertModel, AdamW, get_linear_schedule_with_warmup
import numpy as np

In [None]:
class Net(nn.Module):
    def __init__(self, bert_model):
        super(Net, self).__init__()
        self.bert_model = bert_model
        self.fcdense = nn.Linear(self.bert_model.config.hidden_size, 1)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, input_ids, attention_mask):
        bert_out = self.bert_model(input_ids, attention_mask, return_dict=True)
        pooler_output =  bert_out['pooler_output']    # (batch_size, 768)
        output = self.fcdense(pooler_output)       # (batch_size, 1)
        return output

In [None]:
class Baseline(nn.Module):
  def __init__(self, embedding_dim, vocab):
    super(Baseline, self).__init__()
    self.embedding = nn.Embedding.from_pretrained(vocab.vectors)
    self.fc = nn.Linear(embedding_dim, 1)

  def forward(self, x, lengths=None):
    #x has shape [sentence length, batch size]
    embedded = self.embedding(x)
    average = embedded.mean(0) # [sent len, batch size, emb dim]
    output = self.fc(average)
    output = torch.sigmoid(output)
    return output

In [None]:
class ToxicCommentTagger(nn.Module):
  def __init__(self, n_classes: int, n_training_steps=None, n_warmup_steps=None):
      super().__init__()
      self.bert = BertModel.from_pretrained('bert-base-cased', return_dict=True) #load the pretrained bert model
      self.classifier = nn.Linear(self.bert.config.hidden_size, n_classes) # add a linear layer to the bert
      self.n_training_steps = n_training_steps
      self.n_warmup_steps = n_warmup_steps
      self.criterion = nn.BCELoss()
      
  def forward(self, input_ids, attention_mask, labels=None):
      output = self.bert(input_ids=input_ids, attention_mask=attention_mask)
      output = self.classifier(output.pooler_output)
      output = torch.sigmoid(output) 
      print("**************")
      print(output)
      return output

In [None]:
def classify(comment_text):
  columns = {0:"toxic", 1:"severe_toxic",2:"obscene",3:"threat",4:"insult",5:"identity_hate"}
  tz = BertTokenizer.from_pretrained("bert-base-cased")
  model = ToxicCommentTagger(6).to('cpu')
  model.load_state_dict(torch.load('saved_weights.pt', map_location='cpu'))

  glove = torchtext.vocab.GloVe(name="6B", dim=100)
  baseLineModel = torch.load("baseline.pt")

  tokens = comment_text.split()
  token_ints = [glove.stoi.get(tok, len(glove.stoi)-1) for tok in tokens]
  token_tensor = torch.LongTensor(token_ints).view(-1,1)
  baselinePrediction = baseLineModel(token_tensor)
  baselineProb = int(baselinePrediction.detach().numpy()*1000)/1000

  encoding = tz.encode_plus(
            comment_text,
            max_length=128,
            padding="max_length",
            truncation=True,
            add_special_tokens=True, # [CLS] & [SEP]
            return_token_type_ids=False,
            return_attention_mask=True, #attention_mask
            return_tensors='pt',
        )
  baselinePrediction = model(encoding["input_ids"].to('cpu'), encoding["attention_mask"].to('cpu'))
  probs = baselinePrediction.detach().flatten().numpy()
  # maxProbKey = probs.argmax(axis=0)
  # maxProbVal = np.max(probs)
  print(probs)

  mx = max(probs[0], probs[1])
  secondmax = min(probs[0], probs[1])
  n = len(probs)
  for i in range(2,n):
      if probs[i] > mx:
          secondmax = mx
          mx = probs[i]
      elif probs[i] > secondmax and \
          mx != probs[i]:
          secondmax = probs[i]
      elif mx == secondmax and \
          secondmax != probs[i]:
            secondmax = probs[i]

  print(secondmax)
  key = np.where(probs==secondmax)[0][0]
  print("Baseline Prob: " + str(baselineProb))

  if baselineProb > 0:
      a = "toxic"
  else:
      a = "Not Toxic"

  if (secondmax > 0.1):
    b = columns[key]
  else:
    b = "Not Toxic"
  return a,b


output1 = gr.Textbox(label="Baseline Toxicity classification")
output2 = gr.Textbox(label="Bert Toxicity classification")

demo = gr.Interface(fn=classify, inputs="text", outputs=[output1, output2])
demo.launch(debug=True)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Note: opening Chrome Inspector may crash demo inside Colab notebooks.

To create a public link, set `share=True` in `launch()`.


<IPython.core.display.Javascript object>

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


**************
tensor([[0.9876, 0.0846, 0.8904, 0.0104, 0.8833, 0.1202]],
       grad_fn=<SigmoidBackward0>)
[0.9875963  0.08456862 0.890357   0.01036554 0.883297   0.12024701]
0.890357
Baseline Prob: 0.936
