## Importing libraries

In [96]:
import pandas as pd
from sklearn.model_selection import train_test_split
from keras.preprocessing.text import Tokenizer
from keras.models import Sequential
from keras.layers import Dense
import matplotlib.pyplot as plt
import numpy as np

## loading data and splitting into train and test set

In [97]:
df = pd.read_csv('new_process_data.csv') #In new_process_data i just combine text data columns into one column.

In [98]:
x=df['text'].values
y=df['Rating'].values
x_train , x_test , y_train , y_test = train_test_split(x,y,test_size=0.33 , stratify=y , random_state=42)

## Checking different features for best test accuracy

In [81]:
def training_model(x_train , x_test , y_train , y_test , verbose , mode):
    
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(x_train)

    X_train = tokenizer.texts_to_matrix(x_train,mode = mode)
    X_test = tokenizer.texts_to_matrix(x_test,mode = mode)

    model = Sequential()
    model.add(Dense(50 , input_shape = (X_train.shape[1],) , activation = 'relu'))
    model.add(Dense(1 , activation = 'sigmoid'))

    model.compile(loss='binary_crossentropy' , optimizer = 'adam' , metrics=['accuracy'])
    #print(model.summary())

    model.fit(X_train , y_train , epochs=10 , verbose=verbose)

    loss , acc = model.evaluate(X_test , y_test , verbose=verbose)

    #print('#'*50)

    print('Test accuracy for ' + mode + ' is = ' , acc)

In [82]:
modes = ['freq' , 'binary' , 'tfidf' , 'count']
for i in modes:
    training_model(x_train , x_test , y_train , y_test , 0 , i)

Test accuracy for freq is =  0.9397357106208801
Test accuracy for binary is =  0.9318061470985413
Test accuracy for tfidf is =  0.9318061470985413
Test accuracy for count is =  0.932511031627655


### As we get to know freq provide us best test accuracy now lets train our actual model but before training it don't forget to run first three cells because we don't want our tokenizer to learn similar words again and again.

## Running our final model

In [99]:
def final_training_model(x_train , x_test , y_train , y_test , verbose , mode):
    
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(x_train)

    X_train = tokenizer.texts_to_matrix(x_train,mode = mode)
    X_test = tokenizer.texts_to_matrix(x_test,mode = mode)

    model = Sequential()
    model.add(Dense(50 , input_shape = (X_train.shape[1],) , activation = 'relu'))
    model.add(Dense(1 , activation = 'sigmoid'))

    model.compile(loss='binary_crossentropy' , optimizer = 'adam' , metrics=['accuracy'])
    print(model.summary())

    model.fit(X_train , y_train , epochs=10 , verbose=verbose)

    loss , acc = model.evaluate(X_test , y_test , verbose=verbose)

    print('#'*50)

    print('Test accuracy for ' + mode + ' is = ' , acc)
    
    return tokenizer , model

In [100]:
our_tokenizer , our_model = final_training_model(x_train , x_test , y_train , y_test , 2 , 'freq')

Model: "sequential_17"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_33 (Dense)             (None, 50)                902250    
_________________________________________________________________
dense_34 (Dense)             (None, 1)                 51        
Total params: 902,301
Trainable params: 902,301
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/10
360/360 - 3s - loss: 0.4039 - accuracy: 0.8796
Epoch 2/10
360/360 - 2s - loss: 0.2870 - accuracy: 0.8815
Epoch 3/10
360/360 - 2s - loss: 0.2053 - accuracy: 0.9069
Epoch 4/10
360/360 - 2s - loss: 0.1524 - accuracy: 0.9391
Epoch 5/10
360/360 - 2s - loss: 0.1252 - accuracy: 0.9522
Epoch 6/10
360/360 - 2s - loss: 0.1070 - accuracy: 0.9610
Epoch 7/10
360/360 - 2s - loss: 0.0939 - accuracy: 0.9665
Epoch 8/10
360/360 - 2s - loss: 0.0829 - accuracy: 0.9732
Epoch 9/10
360/360 - 2s - loss: 0.0733 -

## Making Prediction for future review

In [114]:
def predict_sentiment(r_text):
    encoded = our_tokenizer.texts_to_matrix([r_text] , mode = 'freq')
    yhat = our_model.predict(encoded , verbose = 0)
    percent_pos = yhat[0,0]
    if round(percent_pos) == 0:
        return (1-percent_pos) , 'NEGATIVE'
    return percent_pos , 'POSITIVE'
    
    
text = 'it is worst product try to avoid it you can find other options'
percent , sentiment = predict_sentiment(text)
print('Review : [%s]\nSentiment : %s (%.3f%%)\n' %(text , sentiment , percent*100))

text = 'great one go for it'
percent , sentiment = predict_sentiment(text)
print('Review : [%s]\nSentiment : %s (%.3f%%)' %(text , sentiment , percent*100))

Review : [it is worst product try to avoid it you can find other options]
Sentiment : NEGATIVE (80.180%)

Review : [great one go for it]
Sentiment : POSITIVE (99.983%)
