<h3>Import Libraries

In [1]:
from sklearn import model_selection, preprocessing, linear_model, naive_bayes, metrics, svm
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn import decomposition, ensemble

import pandas as pd
import xgboost, numpy, textblob, string
from keras.preprocessing import text, sequence
from keras import layers, models, optimizers

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Read Data
path = '/content/drive/MyDrive/NLP/Sentimental Analysis Project/Amazon_Customer_Reviews.csv'
data = pd.read_csv(path)
data.drop('Unnamed: 0',axis=1,inplace=True)
data.head()


Unnamed: 0,Ratings,Reviews
0,Good,"I use Amazon.com often, and 99% of the time, ..."
1,Good,With Amazon you can leisurely shop in the com...
2,Good,I am very happy with all my purchases since I...
3,Good,"I absolutely love Amazon. Their selections, pr..."
4,Good,We have shopped Amazon for years and always fi...


<h3>Pre-Processing

In [7]:
#Data cleaning and preprocessing
import re
from nltk.stem import WordNetLemmatizer
wordnet = WordNetLemmatizer()

# Function for preprocessing
def preprocessing(data):    
    
    reviews =  []
    for i in range(0, len(data)):
        # Replacing values in rows
        review = data['Reviews'][i]
        review = review.replace('$','dollars')
        review = review.lower()
        reviews.append(review)
    
    return reviews
        
reviews_1 = preprocessing(data)

In [8]:
df = pd.DataFrame(reviews_1)
df.head()

Unnamed: 0,0
0,"i use amazon.com often, and 99% of the time, ..."
1,with amazon you can leisurely shop in the com...
2,i am very happy with all my purchases since i...
3,"i absolutely love amazon. their selections, pr..."
4,we have shopped amazon for years and always fi...


In [9]:
df.columns=['reviews']

In [10]:
df.head()

Unnamed: 0,reviews
0,"i use amazon.com often, and 99% of the time, ..."
1,with amazon you can leisurely shop in the com...
2,i am very happy with all my purchases since i...
3,"i absolutely love amazon. their selections, pr..."
4,we have shopped amazon for years and always fi...


<h3> Word to vec Representation

In [11]:
path_vec = '/content/drive/MyDrive/NLP/Sentimental Analysis Project/wiki-news-300d-1M.vec'

In [None]:
# load the pre-trained word-embedding vectors 
embeddings_index = {}
for i, line in enumerate(open(path_vec)):
    values = line.split()
    embeddings_index[values[0]] = numpy.asarray(values[1:], dtype='float32')

In [None]:
embeddings_index

In [None]:
# create a tokenizer 
token = text.Tokenizer()
token.fit_on_texts(df['reviews'])
word_index = token.word_index

In [None]:
# create token-embedding mapping
embedding_matrix = numpy.zeros((len(word_index) + 1, 300))
for word, i in word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        embedding_matrix[i] = embedding_vector

In [None]:
embedding_matrix

array([[ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [-0.0281    ,  0.2958    ,  0.0666    , ..., -0.0482    ,
         0.14300001,  0.0724    ],
       [ 0.0171    ,  0.0596    , -0.0121    , ...,  0.0741    ,
         0.004     , -0.0612    ],
       ...,
       [-0.0885    , -0.1832    , -0.084     , ...,  0.15350001,
        -0.0632    , -0.0021    ],
       [ 0.0506    , -0.17900001, -0.20730001, ...,  0.1452    ,
        -0.0499    ,  0.1106    ],
       [ 0.0481    , -0.02      , -0.0963    , ..., -0.0867    ,
         0.0005    , -0.35960001]])

<h3>Train-Test Split and Dummifying output variable 

In [None]:
#dummyfying output variable
y=pd.get_dummies(data['Ratings'],drop_first=True)

# split the dataset into training and validation datasets 
train_x, valid_x, train_y, valid_y = model_selection.train_test_split(df['reviews'], y)

In [None]:
len(max(df['reviews']))

165

In [None]:
# convert text to sequence of tokens and pad them to ensure equal length vectors 
train_seq_x = sequence.pad_sequences(token.texts_to_sequences(train_x), maxlen=165)
valid_seq_x = sequence.pad_sequences(token.texts_to_sequences(valid_x), maxlen=165)

In [None]:
train_seq_x

array([[   0,    0,    0, ...,    8, 4545,  207],
       [1684,   23,  909, ...,   78,   76,   56],
       [   0,    0,    0, ...,    3,   25, 1061],
       ...,
       [   0,    0,    0, ...,   81,   14,  630],
       [   0,    0,    0, ...,  151,   83,   21],
       [   2,  466,   73, ...,   78,   76,   56]], dtype=int32)

<h3>LSTM

In [None]:
## Creating model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense

model=Sequential()
model.add(Embedding(len(word_index) + 1, 300, weights=[embedding_matrix],input_length=165 ,trainable=False))
model.add(Dropout(0.3))
model.add(LSTM(100))
model.add(Dense(1,activation='sigmoid'))
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
print(model.summary())

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 165, 300)          5350800   
_________________________________________________________________
dropout_1 (Dropout)          (None, 165, 300)          0         
_________________________________________________________________
lstm (LSTM)                  (None, 100)               160400    
_________________________________________________________________
dense (Dense)                (None, 1)                 101       
Total params: 5,511,301
Trainable params: 160,501
Non-trainable params: 5,350,800
_________________________________________________________________
None


In [None]:
model.fit(train_seq_x,train_y,validation_data=(valid_seq_x,valid_y),epochs=10,batch_size=64)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f4636962198>

<h3>Gated Recurrent Network

In [None]:
## Creating model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import GRU
from tensorflow.keras.layers import Dense

model=Sequential()
model.add(Embedding(len(word_index) + 1, 300, weights=[embedding_matrix],input_length=165 ,trainable=False))
model.add(Dropout(0.3))
model.add(GRU(100))
model.add(Dense(1,activation='sigmoid'))
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
print(model.summary())

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 165, 300)          5350800   
_________________________________________________________________
dropout_2 (Dropout)          (None, 165, 300)          0         
_________________________________________________________________
gru (GRU)                    (None, 100)               120600    
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 101       
Total params: 5,471,501
Trainable params: 120,701
Non-trainable params: 5,350,800
_________________________________________________________________
None


In [None]:
model.fit(train_seq_x,train_y,validation_data=(valid_seq_x,valid_y),epochs=10,batch_size=64)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f4633e03240>

<h3>Bi-Directional</h3>

In [None]:
from tensorflow.keras.layers import Bidirectional
## Creating model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import GRU
from tensorflow.keras.layers import Dense

model=Sequential()
model.add(Embedding(len(word_index) + 1, 300, weights=[embedding_matrix],input_length=165 ,trainable=False))
model.add(Dropout(0.3))
model.add(Bidirectional(LSTM(100)))
model.add(Dense(1,activation='sigmoid'))
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
print(model.summary())



Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_5 (Embedding)      (None, 165, 300)          5350800   
_________________________________________________________________
dropout_4 (Dropout)          (None, 165, 300)          0         
_________________________________________________________________
bidirectional (Bidirectional (None, 200)               320800    
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 201       
Total params: 5,671,801
Trainable params: 321,001
Non-trainable params: 5,350,800
_________________________________________________________________
None


In [None]:
model.fit(train_seq_x,train_y,validation_data=(valid_seq_x,valid_y),epochs=10,batch_size=64)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f4631e63630>