In [95]:
import numpy as np
import pandas as pd

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier # Multi Layer Perceptron Classifier 
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.model_selection import cross_val_score

In [96]:
movies_data = pd.read_csv("Data/movies.txt", sep="\t", header = None, names = ['Sentiment', 'Review'])

In [97]:
movies_data.head()

Unnamed: 0,Sentiment,Review
0,1,The Da Vinci Code book is just awesome.
1,1,this was the first clive cussler i've ever rea...
2,1,i liked the Da Vinci Code a lot.
3,1,i liked the Da Vinci Code a lot.
4,1,I liked the Da Vinci Code but it ultimatly did...


In [98]:
movies_data.shape

(6918, 2)

In [99]:
count_vect = CountVectorizer()
X_counts = count_vect.fit_transform(movies_data['Review'])
X_counts.shape

(6918, 2132)

### Analysing the behavior of Neural Network without removing the stop words

In [100]:
X_train, X_test, y_train, y_test = train_test_split(X_counts, movies_data.Sentiment, test_size= 0.2, random_state= 111)

In [101]:
model = MLPClassifier(activation='relu', alpha = 1e-05, hidden_layer_sizes=(5,2), random_state=1)
model.fit(X=X_train, y=y_train)

MLPClassifier(alpha=1e-05, hidden_layer_sizes=(5, 2), random_state=1)

In [102]:
y_pred = model.predict(X_train)

In [103]:
print(accuracy_score(y_train,y_pred))

1.0


### As Training Set has a near perfect Accuracy Score, validate the model for overfitting

In [104]:
scores = cross_val_score (model, X_train, y_train, cv =5)


In [105]:
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Accuracy: 0.99 (+/- 0.01)


### Output of Neural Network after removing the Stop Words

In [106]:
count_vect = CountVectorizer(stop_words='english')
X_counts = count_vect.fit_transform(movies_data['Review'])
X_counts.shape

(6918, 1921)

In [107]:
X_train, X_test, y_train, y_test = train_test_split(X_counts, movies_data.Sentiment, test_size= 0.2, random_state= 1234)

In [108]:
model = MLPClassifier(activation='relu', max_iter=1000, alpha = 1e-05, hidden_layer_sizes=(5,2), random_state=1)
model.fit(X=X_train, y=y_train)

MLPClassifier(alpha=1e-05, hidden_layer_sizes=(5, 2), max_iter=1000,
              random_state=1)

In [109]:
y_pred = model.predict(X_train)

In [110]:
print(accuracy_score(y_train,y_pred))

1.0


In [111]:
scores = cross_val_score (model, X_train, y_train, cv =5)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Accuracy: 0.99 (+/- 0.00)


In [112]:
y_pred = model.predict(X_test)
print(accuracy_score(y_test,y_pred))

0.990606936416185


### Confusion Matrix and Precision Recall Values

In [113]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_pred)

array([[577,  13],
       [  0, 794]], dtype=int64)

In [114]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      0.98      0.99       590
           1       0.98      1.00      0.99       794

    accuracy                           0.99      1384
   macro avg       0.99      0.99      0.99      1384
weighted avg       0.99      0.99      0.99      1384

