In [1]:
#Importing dependencies

import nltk
import random
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')
from google.colab import files
import pandas as pd
import re
from nltk.corpus import stopwords  
from nltk.tokenize import word_tokenize  

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


In [2]:

# Using the kaggle token for accessing the IMDB dataset
from google.colab import files
uploaded = files.upload()

Saving kaggle.json to kaggle.json


In [3]:
!ls -lha kaggle.json
!pip install -q kaggle
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download -d lakshmi25npathi/imdb-dataset-of-50k-movie-reviews
!unzip imdb-dataset-of-50k-movie-reviews.zip

-rw-r--r-- 1 root root 67 Mar 25 06:26 kaggle.json
Downloading imdb-dataset-of-50k-movie-reviews.zip to /content
 86% 22.0M/25.7M [00:00<00:00, 111MB/s]
100% 25.7M/25.7M [00:00<00:00, 102MB/s]
Archive:  imdb-dataset-of-50k-movie-reviews.zip
  inflating: IMDB Dataset.csv        


In [4]:
# Reading the csv file and replacing positive and negative with 1 and 0
orig_data = pd.read_csv('IMDB Dataset.csv')
orig_data.replace({"positive":1,"negative":0},inplace=True)

In [5]:
# Removing 'not' from stopwords' list and adding the <br> token
stop_words = list(set(stopwords.words('english')))
stop_words.remove('not')
stop_words.append('br')

In [6]:
stop_words

['doesn',
 'about',
 'from',
 'during',
 'out',
 'you',
 "doesn't",
 'very',
 'our',
 'it',
 'do',
 'mustn',
 "it's",
 'o',
 'won',
 'myself',
 'which',
 'this',
 'him',
 'd',
 'on',
 'when',
 'did',
 'than',
 'm',
 'i',
 'its',
 "aren't",
 'had',
 'he',
 'because',
 'in',
 'should',
 "you've",
 'such',
 'her',
 'ain',
 "weren't",
 'his',
 'down',
 'each',
 'who',
 'no',
 'own',
 'yours',
 'so',
 'until',
 "shouldn't",
 'weren',
 'a',
 'don',
 'been',
 'as',
 "won't",
 'above',
 'any',
 "don't",
 'we',
 'ours',
 'just',
 'haven',
 'mightn',
 'only',
 'my',
 'being',
 'does',
 "that'll",
 'they',
 'has',
 'couldn',
 'through',
 'yourself',
 'she',
 "she's",
 'll',
 't',
 'over',
 "mustn't",
 'more',
 "you're",
 'doing',
 'why',
 'was',
 'other',
 'himself',
 "hasn't",
 'is',
 've',
 'themselves',
 'these',
 'now',
 'while',
 "isn't",
 'there',
 'isn',
 "you'll",
 'what',
 'if',
 'again',
 "you'd",
 'all',
 'an',
 'are',
 'under',
 'am',
 'with',
 'against',
 'into',
 'once',
 'how',
 's

In [7]:
# This function takes in data, removes special characters and stop words, returning the X and Y dataset
def preprocess_text(data):
  x = []
  y = []
  for p,q in zip(data['review'],data['sentiment']):
    cleaned = re.sub(r'[^(a-zA-Z)\s]','',p)
    stopped = '  '.join([w for w in cleaned.split(' ') if w not in stop_words])
    x.append(stopped)
    y.append(q)
  return x,y

X,Y = preprocess_text(orig_data)
print(len(X))
print(len(Y))

50000
50000


In [8]:
# Using the word tokeniser from the keras library's built in preprocessing module to create our bag of words
import keras
from keras.preprocessing.text import Tokenizer
tokens = keras.preprocessing.text.Tokenizer(
    num_words=None,
    filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
    lower=True, split=' ', char_level=False, oov_token='<UNK>'
)
tokens.fit_on_texts(X)

In [9]:
len(tokens.word_index)

171851

In [10]:
# Converting the text data to sequences utilizing the tokenizer
import numpy as np
X_fin = tokens.texts_to_sequences(X)

In [11]:
# Padding our data to get uniform length to fit into the model
X_fin = keras.preprocessing.sequence.pad_sequences(X_fin,padding = 'post')

In [12]:
# Checking the length of the padding for future reference
length = [len(k) for k in X_fin]
max_pad = max(length)
max_pad

1493

In [13]:
# Dividing test and train data into an 80-20 split
x_train = np.array(X_fin[:40000])
y_train = np.array(Y[:40000])
x_val = np.array(X_fin[40000:])
y_val = np.array(Y[40000:])

In [14]:
print(y_train)
print(x_train[0])

[1 1 1 ... 1 0 0]
[   7 1886  992 ...    0    0    0]


In [15]:
# Using a sequential model to predict the sentiment
from keras.layers import Dense,Bidirectional,LSTM,Input,Embedding,Dropout,BatchNormalization,TimeDistributed
from keras.models import Model
from keras.utils.vis_utils import plot_model

inputs = Input(shape=(None,), dtype="int32")

# Embedding each integer in a 128-dimensional vector (these word embeddings can either be learned by scratch or pre-learnt)
x = Embedding(len(tokens.word_index),200)(inputs)

# Addding 3 bidirectional LSTMs
x = Bidirectional(LSTM(128,return_sequences=True))(x)
x = Dropout(0.20)(x)
x = BatchNormalization()(x)

x = Bidirectional(LSTM(128,return_sequences=True))(x)
x = Dropout(0.10)(x)
x = BatchNormalization()(x)

x = Bidirectional(LSTM(128))(x)

# Getting the final output with the sigmoid function
outputs = Dense(1, activation="sigmoid")(x)
k_model = Model(inputs, outputs)
k_model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, None)]            0         
_________________________________________________________________
embedding (Embedding)        (None, None, 200)         34370200  
_________________________________________________________________
bidirectional (Bidirectional (None, None, 256)         336896    
_________________________________________________________________
dropout (Dropout)            (None, None, 256)         0         
_________________________________________________________________
batch_normalization (BatchNo (None, None, 256)         1024      
_________________________________________________________________
bidirectional_1 (Bidirection (None, None, 256)         394240    
_________________________________________________________________
dropout_1 (Dropout)          (None, None, 256)         0     

In [16]:
# Using the adam optimization method to train the model on 10 epochs
k_model.compile("adam", "binary_crossentropy", metrics=["accuracy"])
history = k_model.fit(x_train, y_train,batch_size = 32,epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [17]:
# Saving the model and downloading it
k_model.save('/content/drive/MyDrive/New_model_v2.h5')
files.download('New_model_v2.h5')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [39]:
# Saving in drive as backup
k_model.save('/content/drive/MyDrive/New_model_v2.h5')

In [18]:
# Evaluating the model to check accuracy
k_model.evaluate(x_val[:-1],y_val[:-1])



[0.5921744704246521, 0.8727872967720032]

In [32]:
# Defining a function that converts the text into the sequence of padded form
def predict(phrase):
    phrase = tokens.texts_to_sequences(phrase)
    phrase = keras.preprocessing.sequence.pad_sequences(phrase,maxlen=max_pad,padding='post')
    res = k_model.predict([phrase])
    return res

In [None]:
"""
Sample review for "Avengers: Endgame" used to check how the model is doing on positive responses: 
Overwhelming. It best describes the final chapter that culminates Marvel Cinematic Universe’s 21 iconic films into one. And that also describes the experience of watching your favourite superheroes come together for a singular goal, for one last time. Directors Anthony and Joe Russo ensure that the humongous build-up and the avalanche of expectations do not get the better of them. They deliver a largely wholesome product that is full of moments laced with action, emotion, comedy and drama. Writers Christopher Markus and Stephen McFeely take you along, even if you haven’t been following the franchise. They do an incredible job with the screenplay to balance emotions with visual spectacle. So if you’re not a fan yet, chances are, you might become one after watching this instalment.
While the screen time for each character is not equal, their significance in the story is. And there are enough surprises in store, as far as their fates are concerned. ‘Endgame’ delivers quite well on the emotional quotient, bringing out superpowers and vulnerabilities of its cinematic demigods through their measured performances. From an upright Captain America (Chris Evans) to a stoic Black Widow (Scarlett Johansson) and from a straight-faced Captain Marvel (Brie Larson) delivering the punches to the reassuring presence of Iron Man (Robert Downey Jr.), ‘Endgame’ has it all and a lot more. Thanks to the conviction in performances, you also might just find yourself rooting for the bad guy, Thanos (Josh Brolin) at some point. However, it’s the comic collective of Thor (Chris Hemsworth), Hulk (Mark Ruffalo) and Ant Man (Paul Rudd) that ensures there are enough lighter moments in this otherwise heartfelt finale.
The action becomes progressively intense, but never overbearing. In fact, it remains relevant and true to the narrative, such that it weaves in enough opportunities for major plot twists that even the diehard fans may not see coming. The extensive CGI work adds to the visual appeal, even in 2D.
For the non-fans, the film’s explanatory tone might come across as a speed breaker at times, but for the fans, the same invokes hope and excitement, leading to constant gasps and howls.
Overall, 'Avengers: Endgame' is a befitting tribute to the Cinematic Universe that has spawned larger-than-life superheroes and super fans. At three hours plus, ‘Endgame’ delivers on a lot of its hallmark promises, leaving its fans with a range of emotions and fond memories.


Sample review for "Disaster Movie" to see how the model is doing on negative responses:
This movie was on TV once so I decided to watch it since I wouldn't have to pay any money for it.The main character Will (played by Matt Lanter) has 
a dream where he meets a stone age Amy Winehouse (I think it's supposed to be a joke) who tells him that the world is going to end the day this 
movie premiered in the cinema (Coincidence?) and to stop it they must find a crystal skull. Matt later wakes up to celebrate his super-sweet 
sixteenth birthday (despite him being in his twenties) in a scene where we get one unfunny joke and celebrity impersonation after another. Then 
disaster strikes (it seems kinda redundant though since this movie already is one), hurricanes, earthquakes, meteorites and other classic disaster
movie ingredients hit planet earth one after another. Will, followed by his friends: Juney (Crista Flanagan), Calvin (Gary \"G Thang\" Johnson),
and Lisa (Kim Kardashian) go out into the city and tries to find his girlfriend and a safe place and later realizes that he has to find the 
crystal skull to set things right.The problem with this movie is, just like other movies by Jason Friedberg and Aaron Seltzer, that it doesn't
stay on the theme but goes all over the place and try to spoof almost every popular movie that was made that year. And I use the term \"spoof\"
lightly. Once again \"Seltzerberger\" show that they only grasp the most superficial concept of what humor is and never really bother to dig 
deeper and see what it is that makes things funny. Sometimes doing things outside the theme can work but not if it takes up a majority of the 
movie. And (for me) this movie is worse than Epic Movie. Yes you read right, Worse than Epic Movie. That movie at least had a story. Sure it was 
borrowed and \"crapified\" but at least it was a story. In this movie, everything that happens during the second act, when they try to find a 
safe place/figure out where they should go, just feels like a filler where the gang stumble into one reference after another. \"Seltzerberger\'s\" 
over-reliance on potty humor, movie/TV references, random musical numbers, deliberately obvious stunt-doubles and crappy special effects does not 
save them this time.Seltzer and Friedberg, your movie sucks horribly. If I may paraphrase a line from \'Billy Madison\' I\'d like to say: I 
award you only one star, and may God have mercy on your souls.Once again, if you want to see a GOOD movie made in the style that this train 
wreck was trying (and failing) to emulate, watch \"Hotshots\" \"Airplane!\", \"The naked gun\" movies, \"Top Secret\" instead."
"""

In [43]:
predict(["Overwhelming. It best describes the final chapter that culminates Marvel Cinematic Universe’s 21 iconic films into one. And that also describes the experience of watching your favourite superheroes come together for a singular goal, for one last time. Directors Anthony and Joe Russo ensure that the humongous build-up and the avalanche of expectations do not get the better of them. They deliver a largely wholesome product that is full of moments laced with action, emotion, comedy and drama. Writers Christopher Markus and Stephen McFeely take you along, even if you haven’t been following the franchise. They do an incredible job with the screenplay to balance emotions with visual spectacle. So if you’re not a fan yet, chances are, you might become one after watching this instalment. While the screen time for each character is not equal, their significance in the story is. And there are enough surprises in store, as far as their fates are concerned. ‘Endgame’ delivers quite well on the emotional quotient, bringing out superpowers and vulnerabilities of its cinematic demigods through their measured performances. From an upright Captain America (Chris Evans) to a stoic Black Widow (Scarlett Johansson) and from a straight-faced Captain Marvel (Brie Larson) delivering the punches to the reassuring presence of Iron Man (Robert Downey Jr.), ‘Endgame’ has it all and a lot more. Thanks to the conviction in performances, you also might just find yourself rooting for the bad guy, Thanos (Josh Brolin) at some point. However, it’s the comic collective of Thor (Chris Hemsworth), Hulk (Mark Ruffalo) and Ant Man (Paul Rudd) that ensures there are enough lighter moments in this otherwise heartfelt finale. The action becomes progressively intense, but never overbearing. In fact, it remains relevant and true to the narrative, such that it weaves in enough opportunities for major plot twists that even the diehard fans may not see coming. The extensive CGI work adds to the visual appeal, even in 2D. For the non-fans, the film’s explanatory tone might come across as a speed breaker at times, but for the fans, the same invokes hope and excitement, leading to constant gasps and howls. Overall, 'Avengers: Endgame' is a befitting tribute to the Cinematic Universe that has spawned larger-than-life superheroes and super fans. At three hours plus, ‘Endgame’ delivers on a lot of its hallmark promises, leaving its fans with a range of emotions and fond memories."
])

array([[0.9998185]], dtype=float32)

In [52]:
predict([
    "This movie was on TV once so I decided to watch it since I wouldn't have to pay any money for it.The main character Will (played by Matt Lanter) has a dream where he meets a stone age Amy Winehouse (I think it's supposed to be a joke) who tells him that the world is going to end the day this movie premiered in the cinema (Coincidence?) and to stop it they must find a crystal skull. Matt later wakes up to celebrate his super-sweet sixteenth birthday (despite him being in his twenties) in a scene where we get one unfunny joke and celebrity impersonation after another. Then disaster strikes (it seems kinda redundant though since this movie already is one), hurricanes, earthquakes, meteorites and other classic disaster movie ingredients hit planet earth one after another. Will, followed by his friends: Juney (Crista Flanagan), Calvin (Gary \"G Thang\" Johnson), and Lisa (Kim Kardashian) go out into the city and tries to find his girlfriend and a safe place and later realizes that he has to find the crystal skull to set things right.The problem with this movie is, just like other movies by Jason Friedberg and Aaron Seltzer, that it doesn't stay on the theme but goes all over the place and try to spoof almost every popular movie that was made that year. And I use the term \"spoof\" lightly. Once again \"Seltzerberger\" show that they only grasp the most superficial concept of what humor is and never really bother to dig deeper and see what it is that makes things funny. Sometimes doing things outside the theme can work but not if it takes up a majority of the movie. And (for me) this movie is worse than Epic Movie. Yes you read right, Worse than Epic Movie. That movie at least had a story. Sure it was borrowed and \"crapified\" but at least it was a story. In this movie, everything that happens during the second act, when they try to find a safe place/figure out where they should go, just feels like a filler where the gang stumble into one reference after another. \"Seltzerberger\'s\" over-reliance on potty humor, movie/TV references, random musical numbers, deliberately obvious stunt-doubles and crappy special effects does not save them this time.Seltzer and Friedberg, your movie sucks horribly. If I may paraphrase a line from \'Billy Madison\' I\'d like to say: I award you only one star, and may God have mercy on your souls.Once again, if you want to see a GOOD movie made in the style that this train wreck was trying (and failing) to emulate, watch \"Hotshots\" \"Airplane!\", \"The naked gun\" movies, \"Top Secret\" instead."
])

array([[0.11181722]], dtype=float32)

In [None]:
"""
Now checking to see whether we were able to map the intuition to checking for positive/negative workshop reviews : 

Sample  positive review : 
"The workshop was hardly a pushover, it was an amazingly well constructed and well delivered workshop, with key points being covered well,
explanations being to the point yet understandable, and all in all was a thrill to be a part of. This was head and shoulders above 
any other ML workshop I have been on"

Sample negative review : 
""The workshop was a chore to deal with. Honestly it was boring, drab and much more basic than I expected it to be. I had great expectations 
but was sadly let down. Poor showing""

"""

In [45]:
predict(["The workshop was hardly a pushover, it was an amazingly well constructed and well delivered workshop, with key points being covered well, explanations being to the point yet understandable, and all in all was a thrill to be a part of. This was head and shoulders above any other ML workshop I have been on"])

array([[0.9999167]], dtype=float32)

In [46]:
predict(["The workshop was a chore to deal with. Honestly it was boring, drab and much more basic than I expected it to be. I had great expectations but was sadly let down. Poor showing"])

array([[2.7348706e-05]], dtype=float32)

In [49]:
# Checking to see if we were able to overcome the trap of "not good" = "bad"
predict(['The workshop was not good at all.'])

array([[0.28421098]], dtype=float32)