# Sentiment Classification & How To "Frame Problems" for a Neural Network

# Curate a Dataset

In [74]:
g = open('training_text.txt','r') # What we know!
train_reviews = list(map(lambda x:x[:-1],g.readlines()))
g.close()

g = open('training_category.txt','r') # What we WANT to know!
train_labels = list(map(lambda x:x[:-1].upper(),g.readlines()))
g.close()

g = open('test_text.txt','r') # What we know!
test_reviews = list(map(lambda x:x[:-1],g.readlines()))
g.close()

In [75]:
len(train_reviews) #3464
#len(train_labels)   #3464
#len(test_reviews)    #1360

3464

In [76]:
train_reviews[0]
#test_reviews[0]

'5573 1189 4017 1207 4768 8542 17 1189 5085 5773'

In [77]:
train_labels[3463]

'NEGATIVE'

# Develop a predictive hypothesis

In [78]:
def pretty_print_review_and_label(i):
    print(train_labels[i] + "\t:\t" + train_reviews[i][:80] + "...")
    
print("labels.txt \t : \t reviews.txt\n")
pretty_print_review_and_label(2137)
#pretty_print_review_and_label(12816)
#pretty_print_review_and_label(6267)
#pretty_print_review_and_label(21934)
#pretty_print_review_and_label(5297)
#pretty_print_review_and_label(4998)

labels.txt 	 : 	 reviews.txt

POSITIVE	:	3048 4752 8542 5800 5063 203 6257 5773 6338 5773...


# Quick Validation

In [79]:
from collections import Counter
import numpy as np

In [80]:
positive_counts = Counter()
negative_counts = Counter()
total_counts = Counter()

In [81]:
for i in range(len(train_reviews)):
    if(train_labels[i] == 'POSITIVE'):
        for word in train_reviews[i].split(" "):
            positive_counts[word] += 1
            total_counts[word] += 1
    else:
        for word in train_reviews[i].split(" "):
            negative_counts[word] += 1
            total_counts[word] += 1

In [82]:
positive_counts.most_common()

[('5773', 2938),
 ('2734', 1231),
 ('230', 1198),
 ('237', 1177),
 ('6084', 1108),
 ('3078', 1043),
 ('7526', 947),
 ('8542', 771),
 ('5922', 735),
 ('4017', 682),
 ('2698', 613),
 ('5071', 582),
 ('6300', 565),
 ('224', 561),
 ('5045', 546),
 ('6700', 523),
 ('753', 517),
 ('8406', 454),
 ('4211', 415),
 ('7924', 380),
 ('1994', 365),
 ('6517', 328),
 ('6461', 288),
 ('5672', 279),
 ('7945', 276),
 ('5107', 275),
 ('5658', 263),
 ('2963', 259),
 ('5795', 253),
 ('3283', 231),
 ('2230', 230),
 ('3647', 229),
 ('4608', 224),
 ('1444', 224),
 ('7860', 222),
 ('3968', 220),
 ('7152', 215),
 ('2269', 214),
 ('4442', 212),
 ('6527', 210),
 ('6193', 203),
 ('5085', 198),
 ('1543', 197),
 ('7669', 195),
 ('5819', 190),
 ('6794', 184),
 ('5174', 175),
 ('6349', 170),
 ('7343', 164),
 ('5326', 161),
 ('6254', 159),
 ('203', 159),
 ('3718', 158),
 ('5035', 157),
 ('1698', 156),
 ('254', 154),
 ('3468', 152),
 ('3581', 152),
 ('6589', 150),
 ('8075', 150),
 ('6980', 149),
 ('6619', 147),
 ('5551'

In [83]:
pos_neg_ratios = Counter()

for term,cnt in list(total_counts.most_common()):
    if(cnt > 100):
        pos_neg_ratio = positive_counts[term] / float(negative_counts[term]+1)
        pos_neg_ratios[term] = pos_neg_ratio

for word,ratio in pos_neg_ratios.most_common():
    if(ratio > 1):
        pos_neg_ratios[word] = np.log(ratio)
    else:
        pos_neg_ratios[word] = -np.log((1 / (ratio+0.01)))

In [84]:
# words most frequently seen in a review with a "POSITIVE" label
pos_neg_ratios.most_common()

[('6700', 6.259581464064923),
 ('7526', 5.754686804517968),
 ('5107', 5.616771097666572),
 ('5045', 5.60947179518496),
 ('8406', 5.424950017481403),
 ('4608', 5.4116460518550396),
 ('7860', 5.402677381872279),
 ('4442', 5.356586274672012),
 ('2698', 5.319752647268102),
 ('7669', 5.272999558563747),
 ('5819', 5.247024072160486),
 ('5174', 5.1647859739235145),
 ('203', 5.0689042022202315),
 ('3468', 5.0238805208462765),
 ('6980', 5.003946305945459),
 ('6084', 4.930870325627393),
 ('7945', 4.927253685157205),
 ('572', 4.912654885736052),
 ('6343', 4.852030263919617),
 ('7957', 4.844187086458591),
 ('3771', 4.844187086458591),
 ('7473', 4.795790545596741),
 ('4799', 4.795790545596741),
 ('3080', 4.770684624465665),
 ('3601', 4.7535901911063645),
 ('3283', 4.749270529961848),
 ('77', 4.74493212836325),
 ('5620', 4.718498871295094),
 ('6451', 4.718498871295094),
 ('2655', 4.709530201312334),
 ('6843', 4.700480365792417),
 ('4521', 4.6913478822291435),
 ('3730', 4.68213122712422),
 ('2641', 4

In [85]:
# words most frequently seen in a review with a "NEGATIVE" label
list(reversed(pos_neg_ratios.most_common()))[0:30]

[('1280', 0.8732733467904643),
 ('4032', 1.4781019103730135),
 ('6589', 1.5141277326297755),
 ('1556', 1.5769147207285403),
 ('5326', 1.5848968035179827),
 ('4801', 1.6094379124341003),
 ('154', 1.7692866133759964),
 ('6619', 1.8549383708495866),
 ('6461', 2.02537432040956),
 ('7685', 2.0343211063993665),
 ('5373', 2.1078794770003695),
 ('5658', 2.1709566505156093),
 ('4017', 2.181224235989778),
 ('1994', 2.3163784151263815),
 ('3718', 2.354544831924757),
 ('5672', 2.412335956953165),
 ('5922', 2.488996635039525),
 ('5085', 2.515678308454754),
 ('4580', 2.545531271604435),
 ('237', 2.5598646007434267),
 ('1543', 2.5751535276357784),
 ('254', 2.6390573296152584),
 ('5795', 2.6430177308313554),
 ('5773', 2.7176261976704943),
 ('4976', 2.8134107167600364),
 ('5035', 2.8590212280120886),
 ('2230', 2.953172659135195),
 ('755', 2.9618307218783095),
 ('3746', 3.0122615755052013),
 ('753', 3.0291670496402285)]

# Transforming Text into Numbers

In [86]:
#from IPython.display import Image

#review = "This was a horrible, terrible movie."

#Image(filename='assets/sentiment_network.png')

In [87]:
#review = "The movie was excellent"

#Image(filename='assets/sentiment_network_pos.png')

# Creating the Input/Output Data

In [88]:
vocab = set(total_counts.keys())
vocab_size = len(vocab)
print(vocab_size)

8493


In [89]:
list(vocab)

['4661',
 '7993',
 '6022',
 '8266',
 '1072',
 '8476',
 '137',
 '1650',
 '5129',
 '8393',
 '3256',
 '6',
 '5975',
 '5266',
 '6065',
 '4070',
 '256',
 '1023',
 '8232',
 '720',
 '6481',
 '2769',
 '2507',
 '273',
 '216',
 '5451',
 '210',
 '3837',
 '4982',
 '1680',
 '2051',
 '3556',
 '2502',
 '6928',
 '7440',
 '7990',
 '950',
 '8582',
 '1720',
 '7595',
 '6720',
 '4443',
 '1976',
 '1408',
 '7489',
 '7416',
 '519',
 '7136',
 '4512',
 '1902',
 '4134',
 '4367',
 '5659',
 '6437',
 '3728',
 '5642',
 '3772',
 '7155',
 '3932',
 '4688',
 '5618',
 '2172',
 '2722',
 '7299',
 '7387',
 '3735',
 '7691',
 '4232',
 '7685',
 '2788',
 '8572',
 '6384',
 '1724',
 '2600',
 '7594',
 '989',
 '5117',
 '4998',
 '2697',
 '4184',
 '6763',
 '6093',
 '876',
 '5213',
 '695',
 '5544',
 '6606',
 '6142',
 '2637',
 '1876',
 '2516',
 '7542',
 '3426',
 '4072',
 '8180',
 '930',
 '3209',
 '332',
 '2723',
 '5743',
 '6906',
 '2211',
 '5338',
 '8405',
 '6679',
 '7223',
 '7658',
 '8306',
 '2758',
 '8227',
 '8377',
 '6637',
 '5700',

In [90]:
import numpy as np

layer_0 = np.zeros((1,vocab_size))
layer_0

array([[0., 0., 0., ..., 0., 0., 0.]])

In [91]:
#from IPython.display import Image
#Image(filename='assets/sentiment_network.png')

In [92]:
word2index = {}

for i,word in enumerate(vocab):
    word2index[word] = i
word2index

{'4661': 0,
 '7993': 1,
 '6022': 2,
 '8266': 3,
 '1072': 4,
 '8476': 5,
 '137': 6,
 '1650': 7,
 '5129': 8,
 '8393': 9,
 '3256': 10,
 '6': 11,
 '5975': 12,
 '5266': 13,
 '6065': 14,
 '4070': 15,
 '256': 16,
 '1023': 17,
 '8232': 18,
 '720': 19,
 '6481': 20,
 '2769': 21,
 '2507': 22,
 '273': 23,
 '216': 24,
 '5451': 25,
 '210': 26,
 '3837': 27,
 '4982': 28,
 '1680': 29,
 '2051': 30,
 '3556': 31,
 '2502': 32,
 '6928': 33,
 '7440': 34,
 '7990': 35,
 '950': 36,
 '8582': 37,
 '1720': 38,
 '7595': 39,
 '6720': 40,
 '4443': 41,
 '1976': 42,
 '1408': 43,
 '7489': 44,
 '7416': 45,
 '519': 46,
 '7136': 47,
 '4512': 48,
 '1902': 49,
 '4134': 50,
 '4367': 51,
 '5659': 52,
 '6437': 53,
 '3728': 54,
 '5642': 55,
 '3772': 56,
 '7155': 57,
 '3932': 58,
 '4688': 59,
 '5618': 60,
 '2172': 61,
 '2722': 62,
 '7299': 63,
 '7387': 64,
 '3735': 65,
 '7691': 66,
 '4232': 67,
 '7685': 68,
 '2788': 69,
 '8572': 70,
 '6384': 71,
 '1724': 72,
 '2600': 73,
 '7594': 74,
 '989': 75,
 '5117': 76,
 '4998': 77,
 '2697':

In [93]:
def update_input_layer(review):
    
    global layer_0
    
    # clear out previous state, reset the layer to be all 0s
    layer_0 *= 0
    for word in review.split(" "):
        layer_0[0][word2index[word]] += 1

update_input_layer(train_reviews[0])

In [94]:
layer_0

array([[0., 0., 0., ..., 0., 0., 0.]])

In [95]:
def get_target_for_label(label):
    if(label == 'POSITIVE'):
        return 1
    else:
        return 0

In [96]:
train_labels[0]

'POSITIVE'

In [97]:
get_target_for_label(train_labels[0])

1

In [98]:
train_labels[3463]

'NEGATIVE'

In [99]:
get_target_for_label(train_labels[3463])

0

# Building a Neural Network

- 3 layer neural network
- no non-linearity in hidden layer
- create a "pre_process_data" function to create vocabulary for our training data generating functions
- modify "train" to train over the entire corpus

In [100]:
import time
import sys
import numpy as np

# Let's tweak our network from before to model these phenomena
class SentimentNetwork:
    def __init__(self, reviews,labels,hidden_nodes = 10, learning_rate = 0.1):
       
        # set our random number generator 
        np.random.seed(1)
    
        self.pre_process_data(reviews, labels)
        
        self.init_network(len(self.review_vocab),hidden_nodes, 1, learning_rate)
        
        
    def pre_process_data(self, reviews, labels):
        
        review_vocab = set()
        for review in reviews:
            for word in review.split(" "):
                review_vocab.add(word)
        self.review_vocab = list(review_vocab)
        
        label_vocab = set()
        for label in labels:
            label_vocab.add(label)
        
        self.label_vocab = list(label_vocab)
        
        self.review_vocab_size = len(self.review_vocab)
        self.label_vocab_size = len(self.label_vocab)
        
        self.word2index = {}
        for i, word in enumerate(self.review_vocab):
            self.word2index[word] = i
        
        self.label2index = {}
        for i, label in enumerate(self.label_vocab):
            self.label2index[label] = i
         
        
    def init_network(self, input_nodes, hidden_nodes, output_nodes, learning_rate):
        # Set number of nodes in input, hidden and output layers.
        self.input_nodes = input_nodes
        self.hidden_nodes = hidden_nodes
        self.output_nodes = output_nodes

        # Initialize weights
        self.weights_0_1 = np.zeros((self.input_nodes,self.hidden_nodes))
    
        self.weights_1_2 = np.random.normal(0.0, self.output_nodes**-0.5, 
                                                (self.hidden_nodes, self.output_nodes))
        
        self.learning_rate = learning_rate
        
        self.layer_0 = np.zeros((1,input_nodes))
    
        
    def update_input_layer(self,review):

        # clear out previous state, reset the layer to be all 0s
        self.layer_0 *= 0
        for word in review.split(" "):
            if(word in self.word2index.keys()):
                self.layer_0[0][self.word2index[word]] += 1
                
    def get_target_for_label(self,label):
        if(label == 'POSITIVE'):
            return 1
        else:
            return 0
        
    def sigmoid(self,x):
        return 1 / (1 + np.exp(-x))
    
    
    def sigmoid_output_2_derivative(self,output):
        return output * (1 - output)
    
    def train(self, training_reviews, training_labels):
        
        assert(len(training_reviews) == len(training_labels))
        
        correct_so_far = 0
        
        start = time.time()
        
        for i in range(len(training_reviews)):
            
            review = training_reviews[i]
            label = training_labels[i]
            
            #### Implement the forward pass here ####
            ### Forward pass ###

            # Input Layer
            self.update_input_layer(review)

            # Hidden layer
            layer_1 = self.layer_0.dot(self.weights_0_1)

            # Output layer
            layer_2 = self.sigmoid(layer_1.dot(self.weights_1_2))

            #### Implement the backward pass here ####
            ### Backward pass ###

            # TODO: Output error
            layer_2_error = layer_2 - self.get_target_for_label(label) # Output layer error is the difference between desired target and actual output.
            layer_2_delta = layer_2_error * self.sigmoid_output_2_derivative(layer_2)

            # TODO: Backpropagated error
            layer_1_error = layer_2_delta.dot(self.weights_1_2.T) # errors propagated to the hidden layer
            layer_1_delta = layer_1_error # hidden layer gradients - no nonlinearity so it's the same as the error

            # TODO: Update the weights
            self.weights_1_2 -= layer_1.T.dot(layer_2_delta) * self.learning_rate # update hidden-to-output weights with gradient descent step
            self.weights_0_1 -= self.layer_0.T.dot(layer_1_delta) * self.learning_rate # update input-to-hidden weights with gradient descent step

            if(np.abs(layer_2_error) < 0.5):
                correct_so_far += 1
            
            #reviews_per_second = i / float(time.time() - start)
            
            sys.stdout.write("\rProgress:" + str(100 * i/float(len(training_reviews)))[:4] + " #Correct:" + str(correct_so_far) + " #Trained:" + str(i+1) + " Training Accuracy:" + str(correct_so_far * 100 / float(i+1))[:4] + "%")
            #if(i % 2500 == 0):
            #    print("")
    
    def test(self, testing_reviews):
        
        correct = 0
        
        start = time.time()
        
        for i in range(len(testing_reviews)):
            pred = self.run(testing_reviews[i])
            sys.stdout.write("\r\n" + pred)
            #if(pred == testing_labels[i]):
            #    correct += 1
            
            #reviews_per_second = i / float(time.time() - start)
            
            #sys.stdout.write("\rProgress:" + str(100 * i/float(len(testing_reviews)))[:4] \
             #                + "% Speed(reviews/sec):" + str(reviews_per_second)[0:5] \
             #               + "% #Correct:" + str(correct) + " #Tested:" + str(i+1) + " Testing Accuracy:" + str(correct * 100 / float(i+1))[:4] + "%")          
            
    def run(self, review):
        
        # Input Layer
        self.update_input_layer(review.lower())

        # Hidden layer
        layer_1 = self.layer_0.dot(self.weights_0_1)

        # Output layer
        layer_2 = self.sigmoid(layer_1.dot(self.weights_1_2))
        
        if(layer_2[0] > 0.5):
            return "POSITIVE"
        else:
            return "NEGATIVE"
        

In [120]:
mlp = SentimentNetwork(train_reviews[:],train_labels[:], learning_rate=0.1)

In [126]:
# train the network
mlp.train(train_reviews[:],train_labels[:])

Progress:0.0 #Correct:1 #Trained:1 Training Accuracy:100.%Progress:0.02 #Correct:2 #Trained:2 Training Accuracy:100.%Progress:0.05 #Correct:3 #Trained:3 Training Accuracy:100.%Progress:0.08 #Correct:4 #Trained:4 Training Accuracy:100.%Progress:0.11 #Correct:5 #Trained:5 Training Accuracy:100.%Progress:0.14 #Correct:6 #Trained:6 Training Accuracy:100.%Progress:0.17 #Correct:7 #Trained:7 Training Accuracy:100.%Progress:0.20 #Correct:8 #Trained:8 Training Accuracy:100.%Progress:0.23 #Correct:9 #Trained:9 Training Accuracy:100.%Progress:0.25 #Correct:10 #Trained:10 Training Accuracy:100.%Progress:0.28 #Correct:11 #Trained:11 Training Accuracy:100.%Progress:0.31 #Correct:12 #Trained:12 Training Accuracy:100.%Progress:0.34 #Correct:13 #Trained:13 Training Accuracy:100.%Progress:0.37 #Correct:14 #Trained:14 Training Accuracy:100.%Progress:0.40 #Correct:15 #Trained:15 Training Accuracy:100.%Progress:0.43 #Correct:16 #Trained:16 Training Accuracy:100.%Progress:0.46 #Correct:17 

Progress:6.92 #Correct:241 #Trained:241 Training Accuracy:100.%Progress:6.95 #Correct:242 #Trained:242 Training Accuracy:100.%Progress:6.98 #Correct:243 #Trained:243 Training Accuracy:100.%Progress:7.01 #Correct:244 #Trained:244 Training Accuracy:100.%Progress:7.04 #Correct:245 #Trained:245 Training Accuracy:100.%Progress:7.07 #Correct:246 #Trained:246 Training Accuracy:100.%Progress:7.10 #Correct:247 #Trained:247 Training Accuracy:100.%Progress:7.13 #Correct:248 #Trained:248 Training Accuracy:100.%Progress:7.15 #Correct:249 #Trained:249 Training Accuracy:100.%Progress:7.18 #Correct:250 #Trained:250 Training Accuracy:100.%Progress:7.21 #Correct:251 #Trained:251 Training Accuracy:100.%Progress:7.24 #Correct:252 #Trained:252 Training Accuracy:100.%Progress:7.27 #Correct:253 #Trained:253 Training Accuracy:100.%Progress:7.30 #Correct:254 #Trained:254 Training Accuracy:100.%Progress:7.33 #Correct:255 #Trained:255 Training Accuracy:100.%Progress:7.36 #Correct:256 #Trained:256

Progress:15.4 #Correct:536 #Trained:536 Training Accuracy:100.%Progress:15.4 #Correct:537 #Trained:537 Training Accuracy:100.%Progress:15.5 #Correct:538 #Trained:538 Training Accuracy:100.%Progress:15.5 #Correct:539 #Trained:539 Training Accuracy:100.%Progress:15.5 #Correct:540 #Trained:540 Training Accuracy:100.%Progress:15.5 #Correct:541 #Trained:541 Training Accuracy:100.%Progress:15.6 #Correct:542 #Trained:542 Training Accuracy:100.%Progress:15.6 #Correct:543 #Trained:543 Training Accuracy:100.%Progress:15.6 #Correct:544 #Trained:544 Training Accuracy:100.%Progress:15.7 #Correct:545 #Trained:545 Training Accuracy:100.%Progress:15.7 #Correct:546 #Trained:546 Training Accuracy:100.%Progress:15.7 #Correct:547 #Trained:547 Training Accuracy:100.%Progress:15.7 #Correct:548 #Trained:548 Training Accuracy:100.%Progress:15.8 #Correct:549 #Trained:549 Training Accuracy:100.%Progress:15.8 #Correct:550 #Trained:550 Training Accuracy:100.%Progress:15.8 #Correct:551 #Trained:551

Progress:23.7 #Correct:822 #Trained:822 Training Accuracy:100.%Progress:23.7 #Correct:823 #Trained:823 Training Accuracy:100.%Progress:23.7 #Correct:824 #Trained:824 Training Accuracy:100.%Progress:23.7 #Correct:825 #Trained:825 Training Accuracy:100.%Progress:23.8 #Correct:826 #Trained:826 Training Accuracy:100.%Progress:23.8 #Correct:827 #Trained:827 Training Accuracy:100.%Progress:23.8 #Correct:828 #Trained:828 Training Accuracy:100.%Progress:23.9 #Correct:829 #Trained:829 Training Accuracy:100.%Progress:23.9 #Correct:830 #Trained:830 Training Accuracy:100.%Progress:23.9 #Correct:831 #Trained:831 Training Accuracy:100.%Progress:23.9 #Correct:832 #Trained:832 Training Accuracy:100.%Progress:24.0 #Correct:833 #Trained:833 Training Accuracy:100.%Progress:24.0 #Correct:834 #Trained:834 Training Accuracy:100.%Progress:24.0 #Correct:835 #Trained:835 Training Accuracy:100.%Progress:24.1 #Correct:836 #Trained:836 Training Accuracy:100.%Progress:24.1 #Correct:837 #Trained:837

Progress:31.8 #Correct:1105 #Trained:1105 Training Accuracy:100.%Progress:31.8 #Correct:1106 #Trained:1106 Training Accuracy:100.%Progress:31.9 #Correct:1107 #Trained:1107 Training Accuracy:100.%Progress:31.9 #Correct:1108 #Trained:1108 Training Accuracy:100.%Progress:31.9 #Correct:1109 #Trained:1109 Training Accuracy:100.%Progress:32.0 #Correct:1110 #Trained:1110 Training Accuracy:100.%Progress:32.0 #Correct:1111 #Trained:1111 Training Accuracy:100.%Progress:32.0 #Correct:1112 #Trained:1112 Training Accuracy:100.%Progress:32.1 #Correct:1113 #Trained:1113 Training Accuracy:100.%Progress:32.1 #Correct:1114 #Trained:1114 Training Accuracy:100.%Progress:32.1 #Correct:1115 #Trained:1115 Training Accuracy:100.%Progress:32.1 #Correct:1116 #Trained:1116 Training Accuracy:100.%Progress:32.2 #Correct:1117 #Trained:1117 Training Accuracy:100.%Progress:32.2 #Correct:1118 #Trained:1118 Training Accuracy:100.%Progress:32.2 #Correct:1119 #Trained:1119 Training Accuracy:100.%Progress:

Progress:40.0 #Correct:1389 #Trained:1389 Training Accuracy:100.%Progress:40.0 #Correct:1390 #Trained:1390 Training Accuracy:100.%Progress:40.1 #Correct:1391 #Trained:1391 Training Accuracy:100.%Progress:40.1 #Correct:1392 #Trained:1392 Training Accuracy:100.%Progress:40.1 #Correct:1393 #Trained:1393 Training Accuracy:100.%Progress:40.2 #Correct:1394 #Trained:1394 Training Accuracy:100.%Progress:40.2 #Correct:1395 #Trained:1395 Training Accuracy:100.%Progress:40.2 #Correct:1396 #Trained:1396 Training Accuracy:100.%Progress:40.3 #Correct:1397 #Trained:1397 Training Accuracy:100.%Progress:40.3 #Correct:1398 #Trained:1398 Training Accuracy:100.%Progress:40.3 #Correct:1399 #Trained:1399 Training Accuracy:100.%Progress:40.3 #Correct:1400 #Trained:1400 Training Accuracy:100.%Progress:40.4 #Correct:1401 #Trained:1401 Training Accuracy:100.%Progress:40.4 #Correct:1402 #Trained:1402 Training Accuracy:100.%Progress:40.4 #Correct:1403 #Trained:1403 Training Accuracy:100.%Progress:

Progress:48.3 #Correct:1676 #Trained:1676 Training Accuracy:100.%Progress:48.3 #Correct:1677 #Trained:1677 Training Accuracy:100.%Progress:48.4 #Correct:1678 #Trained:1678 Training Accuracy:100.%Progress:48.4 #Correct:1679 #Trained:1679 Training Accuracy:100.%Progress:48.4 #Correct:1680 #Trained:1680 Training Accuracy:100.%Progress:48.4 #Correct:1681 #Trained:1681 Training Accuracy:100.%Progress:48.5 #Correct:1682 #Trained:1682 Training Accuracy:100.%Progress:48.5 #Correct:1683 #Trained:1683 Training Accuracy:100.%Progress:48.5 #Correct:1684 #Trained:1684 Training Accuracy:100.%Progress:48.6 #Correct:1685 #Trained:1685 Training Accuracy:100.%Progress:48.6 #Correct:1686 #Trained:1686 Training Accuracy:100.%Progress:48.6 #Correct:1687 #Trained:1687 Training Accuracy:100.%Progress:48.7 #Correct:1688 #Trained:1688 Training Accuracy:100.%Progress:48.7 #Correct:1689 #Trained:1689 Training Accuracy:100.%Progress:48.7 #Correct:1690 #Trained:1690 Training Accuracy:100.%Progress:

Progress:55.2 #Correct:1916 #Trained:1916 Training Accuracy:100.%Progress:55.3 #Correct:1917 #Trained:1917 Training Accuracy:100.%Progress:55.3 #Correct:1918 #Trained:1918 Training Accuracy:100.%Progress:55.3 #Correct:1919 #Trained:1919 Training Accuracy:100.%Progress:55.3 #Correct:1920 #Trained:1920 Training Accuracy:100.%Progress:55.4 #Correct:1921 #Trained:1921 Training Accuracy:100.%Progress:55.4 #Correct:1922 #Trained:1922 Training Accuracy:100.%Progress:55.4 #Correct:1923 #Trained:1923 Training Accuracy:100.%Progress:55.5 #Correct:1924 #Trained:1924 Training Accuracy:100.%Progress:55.5 #Correct:1925 #Trained:1925 Training Accuracy:100.%Progress:55.5 #Correct:1926 #Trained:1926 Training Accuracy:100.%Progress:55.6 #Correct:1927 #Trained:1927 Training Accuracy:100.%Progress:55.6 #Correct:1928 #Trained:1928 Training Accuracy:100.%Progress:55.6 #Correct:1929 #Trained:1929 Training Accuracy:100.%Progress:55.6 #Correct:1930 #Trained:1930 Training Accuracy:100.%Progress:

Progress:62.1 #Correct:2153 #Trained:2153 Training Accuracy:100.%Progress:62.1 #Correct:2154 #Trained:2154 Training Accuracy:100.%Progress:62.1 #Correct:2155 #Trained:2155 Training Accuracy:100.%Progress:62.2 #Correct:2156 #Trained:2156 Training Accuracy:100.%Progress:62.2 #Correct:2157 #Trained:2157 Training Accuracy:100.%Progress:62.2 #Correct:2158 #Trained:2158 Training Accuracy:100.%Progress:62.2 #Correct:2159 #Trained:2159 Training Accuracy:100.%Progress:62.3 #Correct:2160 #Trained:2160 Training Accuracy:100.%Progress:62.3 #Correct:2161 #Trained:2161 Training Accuracy:100.%Progress:62.3 #Correct:2162 #Trained:2162 Training Accuracy:100.%Progress:62.4 #Correct:2163 #Trained:2163 Training Accuracy:100.%Progress:62.4 #Correct:2164 #Trained:2164 Training Accuracy:100.%Progress:62.4 #Correct:2165 #Trained:2165 Training Accuracy:100.%Progress:62.5 #Correct:2166 #Trained:2166 Training Accuracy:100.%Progress:62.5 #Correct:2167 #Trained:2167 Training Accuracy:100.%Progress:

Progress:69.9 #Correct:2425 #Trained:2425 Training Accuracy:100.%Progress:70.0 #Correct:2426 #Trained:2426 Training Accuracy:100.%Progress:70.0 #Correct:2427 #Trained:2427 Training Accuracy:100.%Progress:70.0 #Correct:2428 #Trained:2428 Training Accuracy:100.%Progress:70.0 #Correct:2429 #Trained:2429 Training Accuracy:100.%Progress:70.1 #Correct:2430 #Trained:2430 Training Accuracy:100.%Progress:70.1 #Correct:2431 #Trained:2431 Training Accuracy:100.%Progress:70.1 #Correct:2432 #Trained:2432 Training Accuracy:100.%Progress:70.2 #Correct:2433 #Trained:2433 Training Accuracy:100.%Progress:70.2 #Correct:2434 #Trained:2434 Training Accuracy:100.%Progress:70.2 #Correct:2435 #Trained:2435 Training Accuracy:100.%Progress:70.2 #Correct:2436 #Trained:2436 Training Accuracy:100.%Progress:70.3 #Correct:2437 #Trained:2437 Training Accuracy:100.%Progress:70.3 #Correct:2438 #Trained:2438 Training Accuracy:100.%Progress:70.3 #Correct:2439 #Trained:2439 Training Accuracy:100.%Progress:

Progress:78.4 #Correct:2718 #Trained:2718 Training Accuracy:100.%Progress:78.4 #Correct:2719 #Trained:2719 Training Accuracy:100.%Progress:78.4 #Correct:2720 #Trained:2720 Training Accuracy:100.%Progress:78.5 #Correct:2721 #Trained:2721 Training Accuracy:100.%Progress:78.5 #Correct:2722 #Trained:2722 Training Accuracy:100.%Progress:78.5 #Correct:2723 #Trained:2723 Training Accuracy:100.%Progress:78.6 #Correct:2724 #Trained:2724 Training Accuracy:100.%Progress:78.6 #Correct:2725 #Trained:2725 Training Accuracy:100.%Progress:78.6 #Correct:2726 #Trained:2726 Training Accuracy:100.%Progress:78.6 #Correct:2727 #Trained:2727 Training Accuracy:100.%Progress:78.7 #Correct:2728 #Trained:2728 Training Accuracy:100.%Progress:78.7 #Correct:2729 #Trained:2729 Training Accuracy:100.%Progress:78.7 #Correct:2730 #Trained:2730 Training Accuracy:100.%Progress:78.8 #Correct:2731 #Trained:2731 Training Accuracy:100.%Progress:78.8 #Correct:2732 #Trained:2732 Training Accuracy:100.%Progress:

Progress:85.8 #Correct:2976 #Trained:2976 Training Accuracy:100.%Progress:85.9 #Correct:2977 #Trained:2977 Training Accuracy:100.%Progress:85.9 #Correct:2978 #Trained:2978 Training Accuracy:100.%Progress:85.9 #Correct:2979 #Trained:2979 Training Accuracy:100.%Progress:85.9 #Correct:2980 #Trained:2980 Training Accuracy:100.%Progress:86.0 #Correct:2981 #Trained:2981 Training Accuracy:100.%Progress:86.0 #Correct:2982 #Trained:2982 Training Accuracy:100.%Progress:86.0 #Correct:2983 #Trained:2983 Training Accuracy:100.%Progress:86.1 #Correct:2984 #Trained:2984 Training Accuracy:100.%Progress:86.1 #Correct:2985 #Trained:2985 Training Accuracy:100.%Progress:86.1 #Correct:2986 #Trained:2986 Training Accuracy:100.%Progress:86.2 #Correct:2987 #Trained:2987 Training Accuracy:100.%Progress:86.2 #Correct:2988 #Trained:2988 Training Accuracy:100.%Progress:86.2 #Correct:2989 #Trained:2989 Training Accuracy:100.%Progress:86.2 #Correct:2990 #Trained:2990 Training Accuracy:100.%Progress:

Progress:93.4 #Correct:3239 #Trained:3239 Training Accuracy:100.%Progress:93.5 #Correct:3240 #Trained:3240 Training Accuracy:100.%Progress:93.5 #Correct:3241 #Trained:3241 Training Accuracy:100.%Progress:93.5 #Correct:3242 #Trained:3242 Training Accuracy:100.%Progress:93.5 #Correct:3243 #Trained:3243 Training Accuracy:100.%Progress:93.6 #Correct:3244 #Trained:3244 Training Accuracy:100.%Progress:93.6 #Correct:3245 #Trained:3245 Training Accuracy:100.%Progress:93.6 #Correct:3246 #Trained:3246 Training Accuracy:100.%Progress:93.7 #Correct:3247 #Trained:3247 Training Accuracy:100.%Progress:93.7 #Correct:3248 #Trained:3248 Training Accuracy:100.%Progress:93.7 #Correct:3249 #Trained:3249 Training Accuracy:100.%Progress:93.7 #Correct:3250 #Trained:3250 Training Accuracy:100.%Progress:93.8 #Correct:3251 #Trained:3251 Training Accuracy:100.%Progress:93.8 #Correct:3252 #Trained:3252 Training Accuracy:100.%Progress:93.8 #Correct:3253 #Trained:3253 Training Accuracy:100.%Progress:

In [127]:
# evaluate our model before training (just to show how horrible it is)
len(test_reviews)
mlp.test(test_reviews)


POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
NEGATIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
NEGATIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
NEGATIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE


POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
NEGATIVE
NEGATIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
NEGATIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
P

In [128]:
mlp = SentimentNetwork(train_reviews[:],train_labels[:], learning_rate=0.01)

In [129]:
# train the network
mlp.train(train_reviews[:],train_labels[:])

Progress:0.0 #Correct:0 #Trained:1 Training Accuracy:0.0%Progress:0.02 #Correct:0 #Trained:2 Training Accuracy:0.0%Progress:0.05 #Correct:1 #Trained:3 Training Accuracy:33.3%Progress:0.08 #Correct:2 #Trained:4 Training Accuracy:50.0%Progress:0.11 #Correct:3 #Trained:5 Training Accuracy:60.0%Progress:0.14 #Correct:4 #Trained:6 Training Accuracy:66.6%Progress:0.17 #Correct:4 #Trained:7 Training Accuracy:57.1%Progress:0.20 #Correct:5 #Trained:8 Training Accuracy:62.5%Progress:0.23 #Correct:6 #Trained:9 Training Accuracy:66.6%Progress:0.25 #Correct:7 #Trained:10 Training Accuracy:70.0%Progress:0.28 #Correct:8 #Trained:11 Training Accuracy:72.7%Progress:0.31 #Correct:8 #Trained:12 Training Accuracy:66.6%Progress:0.34 #Correct:9 #Trained:13 Training Accuracy:69.2%Progress:0.37 #Correct:10 #Trained:14 Training Accuracy:71.4%Progress:0.40 #Correct:11 #Trained:15 Training Accuracy:73.3%Progress:0.43 #Correct:12 #Trained:16 Training Accuracy:75.0%Progress:0.46 #Correct:13 #Train

Progress:8.11 #Correct:274 #Trained:282 Training Accuracy:97.1%Progress:8.14 #Correct:275 #Trained:283 Training Accuracy:97.1%Progress:8.16 #Correct:276 #Trained:284 Training Accuracy:97.1%Progress:8.19 #Correct:277 #Trained:285 Training Accuracy:97.1%Progress:8.22 #Correct:278 #Trained:286 Training Accuracy:97.2%Progress:8.25 #Correct:279 #Trained:287 Training Accuracy:97.2%Progress:8.28 #Correct:280 #Trained:288 Training Accuracy:97.2%Progress:8.31 #Correct:281 #Trained:289 Training Accuracy:97.2%Progress:8.34 #Correct:282 #Trained:290 Training Accuracy:97.2%Progress:8.37 #Correct:283 #Trained:291 Training Accuracy:97.2%Progress:8.40 #Correct:284 #Trained:292 Training Accuracy:97.2%Progress:8.42 #Correct:285 #Trained:293 Training Accuracy:97.2%Progress:8.45 #Correct:286 #Trained:294 Training Accuracy:97.2%Progress:8.48 #Correct:287 #Trained:295 Training Accuracy:97.2%Progress:8.51 #Correct:288 #Trained:296 Training Accuracy:97.2%Progress:8.54 #Correct:289 #Trained:297

Progress:16.4 #Correct:564 #Trained:572 Training Accuracy:98.6%Progress:16.5 #Correct:565 #Trained:573 Training Accuracy:98.6%Progress:16.5 #Correct:566 #Trained:574 Training Accuracy:98.6%Progress:16.5 #Correct:567 #Trained:575 Training Accuracy:98.6%Progress:16.5 #Correct:568 #Trained:576 Training Accuracy:98.6%Progress:16.6 #Correct:569 #Trained:577 Training Accuracy:98.6%Progress:16.6 #Correct:570 #Trained:578 Training Accuracy:98.6%Progress:16.6 #Correct:571 #Trained:579 Training Accuracy:98.6%Progress:16.7 #Correct:572 #Trained:580 Training Accuracy:98.6%Progress:16.7 #Correct:573 #Trained:581 Training Accuracy:98.6%Progress:16.7 #Correct:574 #Trained:582 Training Accuracy:98.6%Progress:16.8 #Correct:575 #Trained:583 Training Accuracy:98.6%Progress:16.8 #Correct:576 #Trained:584 Training Accuracy:98.6%Progress:16.8 #Correct:577 #Trained:585 Training Accuracy:98.6%Progress:16.8 #Correct:578 #Trained:586 Training Accuracy:98.6%Progress:16.9 #Correct:579 #Trained:587

Progress:23.4 #Correct:806 #Trained:814 Training Accuracy:99.0%Progress:23.4 #Correct:807 #Trained:815 Training Accuracy:99.0%Progress:23.5 #Correct:808 #Trained:816 Training Accuracy:99.0%Progress:23.5 #Correct:809 #Trained:817 Training Accuracy:99.0%Progress:23.5 #Correct:810 #Trained:818 Training Accuracy:99.0%Progress:23.6 #Correct:811 #Trained:819 Training Accuracy:99.0%Progress:23.6 #Correct:812 #Trained:820 Training Accuracy:99.0%Progress:23.6 #Correct:813 #Trained:821 Training Accuracy:99.0%Progress:23.7 #Correct:814 #Trained:822 Training Accuracy:99.0%Progress:23.7 #Correct:815 #Trained:823 Training Accuracy:99.0%Progress:23.7 #Correct:816 #Trained:824 Training Accuracy:99.0%Progress:23.7 #Correct:817 #Trained:825 Training Accuracy:99.0%Progress:23.8 #Correct:818 #Trained:826 Training Accuracy:99.0%Progress:23.8 #Correct:819 #Trained:827 Training Accuracy:99.0%Progress:23.8 #Correct:820 #Trained:828 Training Accuracy:99.0%Progress:23.9 #Correct:821 #Trained:829

Progress:31.6 #Correct:1091 #Trained:1099 Training Accuracy:99.2%Progress:31.7 #Correct:1092 #Trained:1100 Training Accuracy:99.2%Progress:31.7 #Correct:1093 #Trained:1101 Training Accuracy:99.2%Progress:31.7 #Correct:1094 #Trained:1102 Training Accuracy:99.2%Progress:31.8 #Correct:1095 #Trained:1103 Training Accuracy:99.2%Progress:31.8 #Correct:1096 #Trained:1104 Training Accuracy:99.2%Progress:31.8 #Correct:1097 #Trained:1105 Training Accuracy:99.2%Progress:31.8 #Correct:1098 #Trained:1106 Training Accuracy:99.2%Progress:31.9 #Correct:1099 #Trained:1107 Training Accuracy:99.2%Progress:31.9 #Correct:1100 #Trained:1108 Training Accuracy:99.2%Progress:31.9 #Correct:1101 #Trained:1109 Training Accuracy:99.2%Progress:32.0 #Correct:1102 #Trained:1110 Training Accuracy:99.2%Progress:32.0 #Correct:1103 #Trained:1111 Training Accuracy:99.2%Progress:32.0 #Correct:1104 #Trained:1112 Training Accuracy:99.2%Progress:32.1 #Correct:1105 #Trained:1113 Training Accuracy:99.2%Progress:

Progress:39.9 #Correct:1376 #Trained:1384 Training Accuracy:99.4%Progress:39.9 #Correct:1377 #Trained:1385 Training Accuracy:99.4%Progress:39.9 #Correct:1378 #Trained:1386 Training Accuracy:99.4%Progress:40.0 #Correct:1379 #Trained:1387 Training Accuracy:99.4%Progress:40.0 #Correct:1380 #Trained:1388 Training Accuracy:99.4%Progress:40.0 #Correct:1381 #Trained:1389 Training Accuracy:99.4%Progress:40.0 #Correct:1382 #Trained:1390 Training Accuracy:99.4%Progress:40.1 #Correct:1383 #Trained:1391 Training Accuracy:99.4%Progress:40.1 #Correct:1384 #Trained:1392 Training Accuracy:99.4%Progress:40.1 #Correct:1385 #Trained:1393 Training Accuracy:99.4%Progress:40.2 #Correct:1386 #Trained:1394 Training Accuracy:99.4%Progress:40.2 #Correct:1387 #Trained:1395 Training Accuracy:99.4%Progress:40.2 #Correct:1388 #Trained:1396 Training Accuracy:99.4%Progress:40.3 #Correct:1389 #Trained:1397 Training Accuracy:99.4%Progress:40.3 #Correct:1390 #Trained:1398 Training Accuracy:99.4%Progress:

Progress:48.5 #Correct:1672 #Trained:1682 Training Accuracy:99.4%Progress:48.5 #Correct:1673 #Trained:1683 Training Accuracy:99.4%Progress:48.5 #Correct:1674 #Trained:1684 Training Accuracy:99.4%Progress:48.6 #Correct:1675 #Trained:1685 Training Accuracy:99.4%Progress:48.6 #Correct:1676 #Trained:1686 Training Accuracy:99.4%Progress:48.6 #Correct:1677 #Trained:1687 Training Accuracy:99.4%Progress:48.7 #Correct:1678 #Trained:1688 Training Accuracy:99.4%Progress:48.7 #Correct:1679 #Trained:1689 Training Accuracy:99.4%Progress:48.7 #Correct:1680 #Trained:1690 Training Accuracy:99.4%Progress:48.7 #Correct:1681 #Trained:1691 Training Accuracy:99.4%Progress:48.8 #Correct:1682 #Trained:1692 Training Accuracy:99.4%Progress:48.8 #Correct:1683 #Trained:1693 Training Accuracy:99.4%Progress:48.8 #Correct:1684 #Trained:1694 Training Accuracy:99.4%Progress:48.9 #Correct:1685 #Trained:1695 Training Accuracy:99.4%Progress:48.9 #Correct:1686 #Trained:1696 Training Accuracy:99.4%Progress:

Progress:56.4 #Correct:1948 #Trained:1958 Training Accuracy:99.4%Progress:56.5 #Correct:1949 #Trained:1959 Training Accuracy:99.4%Progress:56.5 #Correct:1950 #Trained:1960 Training Accuracy:99.4%Progress:56.5 #Correct:1951 #Trained:1961 Training Accuracy:99.4%Progress:56.6 #Correct:1952 #Trained:1962 Training Accuracy:99.4%Progress:56.6 #Correct:1953 #Trained:1963 Training Accuracy:99.4%Progress:56.6 #Correct:1954 #Trained:1964 Training Accuracy:99.4%Progress:56.6 #Correct:1955 #Trained:1965 Training Accuracy:99.4%Progress:56.7 #Correct:1956 #Trained:1966 Training Accuracy:99.4%Progress:56.7 #Correct:1957 #Trained:1967 Training Accuracy:99.4%Progress:56.7 #Correct:1958 #Trained:1968 Training Accuracy:99.4%Progress:56.8 #Correct:1959 #Trained:1969 Training Accuracy:99.4%Progress:56.8 #Correct:1960 #Trained:1970 Training Accuracy:99.4%Progress:56.8 #Correct:1961 #Trained:1971 Training Accuracy:99.4%Progress:56.8 #Correct:1962 #Trained:1972 Training Accuracy:99.4%Progress:

Progress:64.9 #Correct:2241 #Trained:2251 Training Accuracy:99.5%Progress:64.9 #Correct:2242 #Trained:2252 Training Accuracy:99.5%Progress:65.0 #Correct:2243 #Trained:2253 Training Accuracy:99.5%Progress:65.0 #Correct:2244 #Trained:2254 Training Accuracy:99.5%Progress:65.0 #Correct:2245 #Trained:2255 Training Accuracy:99.5%Progress:65.0 #Correct:2246 #Trained:2256 Training Accuracy:99.5%Progress:65.1 #Correct:2247 #Trained:2257 Training Accuracy:99.5%Progress:65.1 #Correct:2248 #Trained:2258 Training Accuracy:99.5%Progress:65.1 #Correct:2249 #Trained:2259 Training Accuracy:99.5%Progress:65.2 #Correct:2250 #Trained:2260 Training Accuracy:99.5%Progress:65.2 #Correct:2251 #Trained:2261 Training Accuracy:99.5%Progress:65.2 #Correct:2252 #Trained:2262 Training Accuracy:99.5%Progress:65.3 #Correct:2253 #Trained:2263 Training Accuracy:99.5%Progress:65.3 #Correct:2254 #Trained:2264 Training Accuracy:99.5%Progress:65.3 #Correct:2255 #Trained:2265 Training Accuracy:99.5%Progress:

Progress:72.9 #Correct:2518 #Trained:2529 Training Accuracy:99.5%Progress:73.0 #Correct:2519 #Trained:2530 Training Accuracy:99.5%Progress:73.0 #Correct:2520 #Trained:2531 Training Accuracy:99.5%Progress:73.0 #Correct:2521 #Trained:2532 Training Accuracy:99.5%Progress:73.0 #Correct:2522 #Trained:2533 Training Accuracy:99.5%Progress:73.1 #Correct:2523 #Trained:2534 Training Accuracy:99.5%Progress:73.1 #Correct:2524 #Trained:2535 Training Accuracy:99.5%Progress:73.1 #Correct:2525 #Trained:2536 Training Accuracy:99.5%Progress:73.2 #Correct:2526 #Trained:2537 Training Accuracy:99.5%Progress:73.2 #Correct:2527 #Trained:2538 Training Accuracy:99.5%Progress:73.2 #Correct:2528 #Trained:2539 Training Accuracy:99.5%Progress:73.2 #Correct:2529 #Trained:2540 Training Accuracy:99.5%Progress:73.3 #Correct:2530 #Trained:2541 Training Accuracy:99.5%Progress:73.3 #Correct:2531 #Trained:2542 Training Accuracy:99.5%Progress:73.3 #Correct:2532 #Trained:2543 Training Accuracy:99.5%Progress:

Progress:81.6 #Correct:2819 #Trained:2830 Training Accuracy:99.6%Progress:81.6 #Correct:2820 #Trained:2831 Training Accuracy:99.6%Progress:81.7 #Correct:2821 #Trained:2832 Training Accuracy:99.6%Progress:81.7 #Correct:2822 #Trained:2833 Training Accuracy:99.6%Progress:81.7 #Correct:2823 #Trained:2834 Training Accuracy:99.6%Progress:81.8 #Correct:2824 #Trained:2835 Training Accuracy:99.6%Progress:81.8 #Correct:2825 #Trained:2836 Training Accuracy:99.6%Progress:81.8 #Correct:2826 #Trained:2837 Training Accuracy:99.6%Progress:81.8 #Correct:2827 #Trained:2838 Training Accuracy:99.6%Progress:81.9 #Correct:2828 #Trained:2839 Training Accuracy:99.6%Progress:81.9 #Correct:2829 #Trained:2840 Training Accuracy:99.6%Progress:81.9 #Correct:2830 #Trained:2841 Training Accuracy:99.6%Progress:82.0 #Correct:2831 #Trained:2842 Training Accuracy:99.6%Progress:82.0 #Correct:2832 #Trained:2843 Training Accuracy:99.6%Progress:82.0 #Correct:2833 #Trained:2844 Training Accuracy:99.6%Progress:

Progress:90.1 #Correct:3112 #Trained:3123 Training Accuracy:99.6%Progress:90.1 #Correct:3113 #Trained:3124 Training Accuracy:99.6%Progress:90.1 #Correct:3114 #Trained:3125 Training Accuracy:99.6%Progress:90.2 #Correct:3115 #Trained:3126 Training Accuracy:99.6%Progress:90.2 #Correct:3116 #Trained:3127 Training Accuracy:99.6%Progress:90.2 #Correct:3117 #Trained:3128 Training Accuracy:99.6%Progress:90.3 #Correct:3118 #Trained:3129 Training Accuracy:99.6%Progress:90.3 #Correct:3119 #Trained:3130 Training Accuracy:99.6%Progress:90.3 #Correct:3120 #Trained:3131 Training Accuracy:99.6%Progress:90.3 #Correct:3121 #Trained:3132 Training Accuracy:99.6%Progress:90.4 #Correct:3122 #Trained:3133 Training Accuracy:99.6%Progress:90.4 #Correct:3123 #Trained:3134 Training Accuracy:99.6%Progress:90.4 #Correct:3124 #Trained:3135 Training Accuracy:99.6%Progress:90.5 #Correct:3125 #Trained:3136 Training Accuracy:99.6%Progress:90.5 #Correct:3126 #Trained:3137 Training Accuracy:99.6%Progress:

Progress:96.4 #Correct:3332 #Trained:3343 Training Accuracy:99.6%Progress:96.5 #Correct:3333 #Trained:3344 Training Accuracy:99.6%Progress:96.5 #Correct:3334 #Trained:3345 Training Accuracy:99.6%Progress:96.5 #Correct:3335 #Trained:3346 Training Accuracy:99.6%Progress:96.5 #Correct:3336 #Trained:3347 Training Accuracy:99.6%Progress:96.6 #Correct:3337 #Trained:3348 Training Accuracy:99.6%Progress:96.6 #Correct:3337 #Trained:3349 Training Accuracy:99.6%Progress:96.6 #Correct:3337 #Trained:3350 Training Accuracy:99.6%Progress:96.7 #Correct:3337 #Trained:3351 Training Accuracy:99.5%Progress:96.7 #Correct:3337 #Trained:3352 Training Accuracy:99.5%Progress:96.7 #Correct:3337 #Trained:3353 Training Accuracy:99.5%Progress:96.7 #Correct:3337 #Trained:3354 Training Accuracy:99.4%Progress:96.8 #Correct:3337 #Trained:3355 Training Accuracy:99.4%Progress:96.8 #Correct:3337 #Trained:3356 Training Accuracy:99.4%Progress:96.8 #Correct:3337 #Trained:3357 Training Accuracy:99.4%Progress:

In [130]:
mlp = SentimentNetwork(train_reviews[:],train_labels[:], learning_rate=0.001)

In [131]:
# train the network
mlp.train(train_reviews[:],train_labels[:])
len(test_reviews)
mlp.test(test_reviews)

Progress:0.0 #Correct:0 #Trained:1 Training Accuracy:0.0%Progress:0.02 #Correct:0 #Trained:2 Training Accuracy:0.0%Progress:0.05 #Correct:1 #Trained:3 Training Accuracy:33.3%Progress:0.08 #Correct:2 #Trained:4 Training Accuracy:50.0%Progress:0.11 #Correct:3 #Trained:5 Training Accuracy:60.0%Progress:0.14 #Correct:4 #Trained:6 Training Accuracy:66.6%Progress:0.17 #Correct:4 #Trained:7 Training Accuracy:57.1%Progress:0.20 #Correct:5 #Trained:8 Training Accuracy:62.5%Progress:0.23 #Correct:6 #Trained:9 Training Accuracy:66.6%Progress:0.25 #Correct:7 #Trained:10 Training Accuracy:70.0%Progress:0.28 #Correct:8 #Trained:11 Training Accuracy:72.7%Progress:0.31 #Correct:8 #Trained:12 Training Accuracy:66.6%Progress:0.34 #Correct:9 #Trained:13 Training Accuracy:69.2%Progress:0.37 #Correct:10 #Trained:14 Training Accuracy:71.4%Progress:0.40 #Correct:11 #Trained:15 Training Accuracy:73.3%Progress:0.43 #Correct:12 #Trained:16 Training Accuracy:75.0%Progress:0.46 #Correct:13 #Train

Progress:7.99 #Correct:270 #Trained:278 Training Accuracy:97.1%Progress:8.02 #Correct:271 #Trained:279 Training Accuracy:97.1%Progress:8.05 #Correct:272 #Trained:280 Training Accuracy:97.1%Progress:8.08 #Correct:273 #Trained:281 Training Accuracy:97.1%Progress:8.11 #Correct:274 #Trained:282 Training Accuracy:97.1%Progress:8.14 #Correct:275 #Trained:283 Training Accuracy:97.1%Progress:8.16 #Correct:276 #Trained:284 Training Accuracy:97.1%Progress:8.19 #Correct:277 #Trained:285 Training Accuracy:97.1%Progress:8.22 #Correct:278 #Trained:286 Training Accuracy:97.2%Progress:8.25 #Correct:279 #Trained:287 Training Accuracy:97.2%Progress:8.28 #Correct:280 #Trained:288 Training Accuracy:97.2%Progress:8.31 #Correct:281 #Trained:289 Training Accuracy:97.2%Progress:8.34 #Correct:282 #Trained:290 Training Accuracy:97.2%Progress:8.37 #Correct:283 #Trained:291 Training Accuracy:97.2%Progress:8.40 #Correct:284 #Trained:292 Training Accuracy:97.2%Progress:8.42 #Correct:285 #Trained:293

Progress:16.8 #Correct:577 #Trained:585 Training Accuracy:98.6%Progress:16.8 #Correct:578 #Trained:586 Training Accuracy:98.6%Progress:16.9 #Correct:579 #Trained:587 Training Accuracy:98.6%Progress:16.9 #Correct:580 #Trained:588 Training Accuracy:98.6%Progress:16.9 #Correct:581 #Trained:589 Training Accuracy:98.6%Progress:17.0 #Correct:582 #Trained:590 Training Accuracy:98.6%Progress:17.0 #Correct:583 #Trained:591 Training Accuracy:98.6%Progress:17.0 #Correct:584 #Trained:592 Training Accuracy:98.6%Progress:17.0 #Correct:585 #Trained:593 Training Accuracy:98.6%Progress:17.1 #Correct:586 #Trained:594 Training Accuracy:98.6%Progress:17.1 #Correct:587 #Trained:595 Training Accuracy:98.6%Progress:17.1 #Correct:588 #Trained:596 Training Accuracy:98.6%Progress:17.2 #Correct:589 #Trained:597 Training Accuracy:98.6%Progress:17.2 #Correct:590 #Trained:598 Training Accuracy:98.6%Progress:17.2 #Correct:591 #Trained:599 Training Accuracy:98.6%Progress:17.2 #Correct:592 #Trained:600

Progress:25.7 #Correct:884 #Trained:892 Training Accuracy:99.1%Progress:25.7 #Correct:885 #Trained:893 Training Accuracy:99.1%Progress:25.7 #Correct:886 #Trained:894 Training Accuracy:99.1%Progress:25.8 #Correct:887 #Trained:895 Training Accuracy:99.1%Progress:25.8 #Correct:888 #Trained:896 Training Accuracy:99.1%Progress:25.8 #Correct:889 #Trained:897 Training Accuracy:99.1%Progress:25.8 #Correct:890 #Trained:898 Training Accuracy:99.1%Progress:25.9 #Correct:891 #Trained:899 Training Accuracy:99.1%Progress:25.9 #Correct:892 #Trained:900 Training Accuracy:99.1%Progress:25.9 #Correct:893 #Trained:901 Training Accuracy:99.1%Progress:26.0 #Correct:894 #Trained:902 Training Accuracy:99.1%Progress:26.0 #Correct:895 #Trained:903 Training Accuracy:99.1%Progress:26.0 #Correct:896 #Trained:904 Training Accuracy:99.1%Progress:26.0 #Correct:897 #Trained:905 Training Accuracy:99.1%Progress:26.1 #Correct:898 #Trained:906 Training Accuracy:99.1%Progress:26.1 #Correct:899 #Trained:907

Progress:34.4 #Correct:1187 #Trained:1195 Training Accuracy:99.3%Progress:34.4 #Correct:1188 #Trained:1196 Training Accuracy:99.3%Progress:34.5 #Correct:1189 #Trained:1197 Training Accuracy:99.3%Progress:34.5 #Correct:1190 #Trained:1198 Training Accuracy:99.3%Progress:34.5 #Correct:1191 #Trained:1199 Training Accuracy:99.3%Progress:34.6 #Correct:1192 #Trained:1200 Training Accuracy:99.3%Progress:34.6 #Correct:1193 #Trained:1201 Training Accuracy:99.3%Progress:34.6 #Correct:1194 #Trained:1202 Training Accuracy:99.3%Progress:34.6 #Correct:1195 #Trained:1203 Training Accuracy:99.3%Progress:34.7 #Correct:1196 #Trained:1204 Training Accuracy:99.3%Progress:34.7 #Correct:1197 #Trained:1205 Training Accuracy:99.3%Progress:34.7 #Correct:1198 #Trained:1206 Training Accuracy:99.3%Progress:34.8 #Correct:1199 #Trained:1207 Training Accuracy:99.3%Progress:34.8 #Correct:1200 #Trained:1208 Training Accuracy:99.3%Progress:34.8 #Correct:1201 #Trained:1209 Training Accuracy:99.3%Progress:

Progress:43.0 #Correct:1485 #Trained:1493 Training Accuracy:99.4%Progress:43.1 #Correct:1486 #Trained:1494 Training Accuracy:99.4%Progress:43.1 #Correct:1487 #Trained:1495 Training Accuracy:99.4%Progress:43.1 #Correct:1488 #Trained:1496 Training Accuracy:99.4%Progress:43.1 #Correct:1489 #Trained:1497 Training Accuracy:99.4%Progress:43.2 #Correct:1490 #Trained:1498 Training Accuracy:99.4%Progress:43.2 #Correct:1491 #Trained:1499 Training Accuracy:99.4%Progress:43.2 #Correct:1492 #Trained:1500 Training Accuracy:99.4%Progress:43.3 #Correct:1493 #Trained:1501 Training Accuracy:99.4%Progress:43.3 #Correct:1494 #Trained:1502 Training Accuracy:99.4%Progress:43.3 #Correct:1495 #Trained:1503 Training Accuracy:99.4%Progress:43.3 #Correct:1496 #Trained:1504 Training Accuracy:99.4%Progress:43.4 #Correct:1497 #Trained:1505 Training Accuracy:99.4%Progress:43.4 #Correct:1498 #Trained:1506 Training Accuracy:99.4%Progress:43.4 #Correct:1499 #Trained:1507 Training Accuracy:99.4%Progress:

Progress:50.1 #Correct:1727 #Trained:1737 Training Accuracy:99.4%Progress:50.1 #Correct:1728 #Trained:1738 Training Accuracy:99.4%Progress:50.1 #Correct:1729 #Trained:1739 Training Accuracy:99.4%Progress:50.2 #Correct:1730 #Trained:1740 Training Accuracy:99.4%Progress:50.2 #Correct:1731 #Trained:1741 Training Accuracy:99.4%Progress:50.2 #Correct:1732 #Trained:1742 Training Accuracy:99.4%Progress:50.2 #Correct:1733 #Trained:1743 Training Accuracy:99.4%Progress:50.3 #Correct:1734 #Trained:1744 Training Accuracy:99.4%Progress:50.3 #Correct:1735 #Trained:1745 Training Accuracy:99.4%Progress:50.3 #Correct:1736 #Trained:1746 Training Accuracy:99.4%Progress:50.4 #Correct:1737 #Trained:1747 Training Accuracy:99.4%Progress:50.4 #Correct:1738 #Trained:1748 Training Accuracy:99.4%Progress:50.4 #Correct:1739 #Trained:1749 Training Accuracy:99.4%Progress:50.4 #Correct:1740 #Trained:1750 Training Accuracy:99.4%Progress:50.5 #Correct:1741 #Trained:1751 Training Accuracy:99.4%Progress:

Progress:57.2 #Correct:1975 #Trained:1985 Training Accuracy:99.4%Progress:57.3 #Correct:1976 #Trained:1986 Training Accuracy:99.4%Progress:57.3 #Correct:1977 #Trained:1987 Training Accuracy:99.4%Progress:57.3 #Correct:1978 #Trained:1988 Training Accuracy:99.4%Progress:57.3 #Correct:1979 #Trained:1989 Training Accuracy:99.4%Progress:57.4 #Correct:1980 #Trained:1990 Training Accuracy:99.4%Progress:57.4 #Correct:1981 #Trained:1991 Training Accuracy:99.4%Progress:57.4 #Correct:1982 #Trained:1992 Training Accuracy:99.4%Progress:57.5 #Correct:1983 #Trained:1993 Training Accuracy:99.4%Progress:57.5 #Correct:1984 #Trained:1994 Training Accuracy:99.4%Progress:57.5 #Correct:1985 #Trained:1995 Training Accuracy:99.4%Progress:57.5 #Correct:1986 #Trained:1996 Training Accuracy:99.4%Progress:57.6 #Correct:1987 #Trained:1997 Training Accuracy:99.4%Progress:57.6 #Correct:1988 #Trained:1998 Training Accuracy:99.4%Progress:57.6 #Correct:1989 #Trained:1999 Training Accuracy:99.4%Progress:

Progress:65.3 #Correct:2256 #Trained:2266 Training Accuracy:99.5%Progress:65.4 #Correct:2257 #Trained:2267 Training Accuracy:99.5%Progress:65.4 #Correct:2258 #Trained:2268 Training Accuracy:99.5%Progress:65.4 #Correct:2259 #Trained:2269 Training Accuracy:99.5%Progress:65.5 #Correct:2260 #Trained:2270 Training Accuracy:99.5%Progress:65.5 #Correct:2261 #Trained:2271 Training Accuracy:99.5%Progress:65.5 #Correct:2262 #Trained:2272 Training Accuracy:99.5%Progress:65.5 #Correct:2263 #Trained:2273 Training Accuracy:99.5%Progress:65.6 #Correct:2264 #Trained:2274 Training Accuracy:99.5%Progress:65.6 #Correct:2265 #Trained:2275 Training Accuracy:99.5%Progress:65.6 #Correct:2266 #Trained:2276 Training Accuracy:99.5%Progress:65.7 #Correct:2267 #Trained:2277 Training Accuracy:99.5%Progress:65.7 #Correct:2268 #Trained:2278 Training Accuracy:99.5%Progress:65.7 #Correct:2269 #Trained:2279 Training Accuracy:99.5%Progress:65.7 #Correct:2270 #Trained:2280 Training Accuracy:99.5%Progress:

Progress:73.6 #Correct:2542 #Trained:2553 Training Accuracy:99.5%Progress:73.7 #Correct:2543 #Trained:2554 Training Accuracy:99.5%Progress:73.7 #Correct:2544 #Trained:2555 Training Accuracy:99.5%Progress:73.7 #Correct:2545 #Trained:2556 Training Accuracy:99.5%Progress:73.7 #Correct:2546 #Trained:2557 Training Accuracy:99.5%Progress:73.8 #Correct:2547 #Trained:2558 Training Accuracy:99.5%Progress:73.8 #Correct:2548 #Trained:2559 Training Accuracy:99.5%Progress:73.8 #Correct:2549 #Trained:2560 Training Accuracy:99.5%Progress:73.9 #Correct:2550 #Trained:2561 Training Accuracy:99.5%Progress:73.9 #Correct:2551 #Trained:2562 Training Accuracy:99.5%Progress:73.9 #Correct:2552 #Trained:2563 Training Accuracy:99.5%Progress:73.9 #Correct:2553 #Trained:2564 Training Accuracy:99.5%Progress:74.0 #Correct:2554 #Trained:2565 Training Accuracy:99.5%Progress:74.0 #Correct:2555 #Trained:2566 Training Accuracy:99.5%Progress:74.0 #Correct:2556 #Trained:2567 Training Accuracy:99.5%Progress:

Progress:82.2 #Correct:2840 #Trained:2851 Training Accuracy:99.6%Progress:82.3 #Correct:2841 #Trained:2852 Training Accuracy:99.6%Progress:82.3 #Correct:2842 #Trained:2853 Training Accuracy:99.6%Progress:82.3 #Correct:2843 #Trained:2854 Training Accuracy:99.6%Progress:82.3 #Correct:2844 #Trained:2855 Training Accuracy:99.6%Progress:82.4 #Correct:2845 #Trained:2856 Training Accuracy:99.6%Progress:82.4 #Correct:2846 #Trained:2857 Training Accuracy:99.6%Progress:82.4 #Correct:2847 #Trained:2858 Training Accuracy:99.6%Progress:82.5 #Correct:2848 #Trained:2859 Training Accuracy:99.6%Progress:82.5 #Correct:2849 #Trained:2860 Training Accuracy:99.6%Progress:82.5 #Correct:2850 #Trained:2861 Training Accuracy:99.6%Progress:82.5 #Correct:2851 #Trained:2862 Training Accuracy:99.6%Progress:82.6 #Correct:2852 #Trained:2863 Training Accuracy:99.6%Progress:82.6 #Correct:2853 #Trained:2864 Training Accuracy:99.6%Progress:82.6 #Correct:2854 #Trained:2865 Training Accuracy:99.6%Progress:

Progress:90.6 #Correct:3130 #Trained:3141 Training Accuracy:99.6%Progress:90.6 #Correct:3131 #Trained:3142 Training Accuracy:99.6%Progress:90.7 #Correct:3132 #Trained:3143 Training Accuracy:99.6%Progress:90.7 #Correct:3133 #Trained:3144 Training Accuracy:99.6%Progress:90.7 #Correct:3134 #Trained:3145 Training Accuracy:99.6%Progress:90.7 #Correct:3135 #Trained:3146 Training Accuracy:99.6%Progress:90.8 #Correct:3136 #Trained:3147 Training Accuracy:99.6%Progress:90.8 #Correct:3137 #Trained:3148 Training Accuracy:99.6%Progress:90.8 #Correct:3138 #Trained:3149 Training Accuracy:99.6%Progress:90.9 #Correct:3139 #Trained:3150 Training Accuracy:99.6%Progress:90.9 #Correct:3140 #Trained:3151 Training Accuracy:99.6%Progress:90.9 #Correct:3141 #Trained:3152 Training Accuracy:99.6%Progress:90.9 #Correct:3142 #Trained:3153 Training Accuracy:99.6%Progress:91.0 #Correct:3143 #Trained:3154 Training Accuracy:99.6%Progress:91.0 #Correct:3144 #Trained:3155 Training Accuracy:99.6%Progress:

Progress:99.9 #Correct:3338 #Trained:3464 Training Accuracy:96.3%
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIV

POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE

# Understanding Neural Noise

In [132]:
#from IPython.display import Image
#Image(filename='assets/sentiment_network.png')

In [36]:
def update_input_layer(review):
    
    global layer_0
    
    # clear out previous state, reset the layer to be all 0s
    layer_0 *= 0
    for word in review.split(" "):
        layer_0[0][word2index[word]] += 1

update_input_layer(train_reviews[0])

In [37]:
layer_0

array([[0., 0., 0., ..., 0., 0., 0.]])

In [38]:
review_counter = Counter()

In [39]:
for word in train_reviews[0].split(" "):
    review_counter[word] += 1

In [40]:
review_counter.most_common()

[('1189', 2),
 ('5573', 1),
 ('4017', 1),
 ('1207', 1),
 ('4768', 1),
 ('8542', 1),
 ('17', 1),
 ('5085', 1),
 ('5773', 1)]

# Project 4: Reducing Noise in our Input Data

In [41]:
import time
import sys
import numpy as np

# Let's tweak our network from before to model these phenomena
class SentimentNetwork:
    def __init__(self, reviews,labels,hidden_nodes = 10, learning_rate = 0.1):
       
        # set our random number generator 
        np.random.seed(1)
    
        self.pre_process_data(reviews, labels)
        
        self.init_network(len(self.review_vocab),hidden_nodes, 1, learning_rate)
        
        
    def pre_process_data(self, reviews, labels):
        
        review_vocab = set()
        for review in reviews:
            for word in review.split(" "):
                review_vocab.add(word)
        self.review_vocab = list(review_vocab)
        
        label_vocab = set()
        for label in labels:
            label_vocab.add(label)
        
        self.label_vocab = list(label_vocab)
        
        self.review_vocab_size = len(self.review_vocab)
        self.label_vocab_size = len(self.label_vocab)
        
        self.word2index = {}
        for i, word in enumerate(self.review_vocab):
            self.word2index[word] = i
        
        self.label2index = {}
        for i, label in enumerate(self.label_vocab):
            self.label2index[label] = i
         
        
    def init_network(self, input_nodes, hidden_nodes, output_nodes, learning_rate):
        # Set number of nodes in input, hidden and output layers.
        self.input_nodes = input_nodes
        self.hidden_nodes = hidden_nodes
        self.output_nodes = output_nodes

        # Initialize weights
        self.weights_0_1 = np.zeros((self.input_nodes,self.hidden_nodes))
    
        self.weights_1_2 = np.random.normal(0.0, self.output_nodes**-0.5, 
                                                (self.hidden_nodes, self.output_nodes))
        
        self.learning_rate = learning_rate
        
        self.layer_0 = np.zeros((1,input_nodes))
    
        
    def update_input_layer(self,review):

        # clear out previous state, reset the layer to be all 0s
        self.layer_0 *= 0
        for word in review.split(" "):
            if(word in self.word2index.keys()):
                self.layer_0[0][self.word2index[word]] = 1
                
    def get_target_for_label(self,label):
        if(label == 'POSITIVE'):
            return 1
        else:
            return 0
        
    def sigmoid(self,x):
        return 1 / (1 + np.exp(-x))
    
    
    def sigmoid_output_2_derivative(self,output):
        return output * (1 - output)
    
    def train(self, training_reviews, training_labels):
        
        assert(len(training_reviews) == len(training_labels))
        
        correct_so_far = 0
        
        start = time.time()
        
        for i in range(len(training_reviews)):
            
            review = training_reviews[i]
            label = training_labels[i]
            
            #### Implement the forward pass here ####
            ### Forward pass ###

            # Input Layer
            self.update_input_layer(review)

            # Hidden layer
            layer_1 = self.layer_0.dot(self.weights_0_1)

            # Output layer
            layer_2 = self.sigmoid(layer_1.dot(self.weights_1_2))

            #### Implement the backward pass here ####
            ### Backward pass ###

            # TODO: Output error
            layer_2_error = layer_2 - self.get_target_for_label(label) # Output layer error is the difference between desired target and actual output.
            layer_2_delta = layer_2_error * self.sigmoid_output_2_derivative(layer_2)

            # TODO: Backpropagated error
            layer_1_error = layer_2_delta.dot(self.weights_1_2.T) # errors propagated to the hidden layer
            layer_1_delta = layer_1_error # hidden layer gradients - no nonlinearity so it's the same as the error

            # TODO: Update the weights
            self.weights_1_2 -= layer_1.T.dot(layer_2_delta) * self.learning_rate # update hidden-to-output weights with gradient descent step
            self.weights_0_1 -= self.layer_0.T.dot(layer_1_delta) * self.learning_rate # update input-to-hidden weights with gradient descent step

            if(np.abs(layer_2_error) < 0.5):
                correct_so_far += 1
            
            #reviews_per_second = i / float(time.time() - start)
            
            sys.stdout.write("\rProgress:" + str(100 * i/float(len(training_reviews)))[:4] + " #Correct:" + str(correct_so_far) + " #Trained:" + str(i+1) + " Training Accuracy:" + str(correct_so_far * 100 / float(i+1))[:4] + "%")
            if(i % 2500 == 0):
                print("")
    
    def test(self, testing_reviews):
        
        correct = 0
        
        start = time.time()
        
        for i in range(len(testing_reviews)):
            pred = self.run(testing_reviews[i])
            sys.stdout.write("\r\n" + pred)
            #if(pred == testing_labels[i]):
            #    correct += 1
            
            #reviews_per_second = i / float(time.time() - start)
            
            #sys.stdout.write("\rProgress:" + str(100 * i/float(len(testing_reviews)))[:4] \
            #                 + "% Speed(reviews/sec):" + str(reviews_per_second)[0:5] \
            #                + "% #Correct:" + str(correct) + " #Tested:" + str(i+1) + " Testing Accuracy:" + str(correct * 100 / float(i+1))[:4] + "%")
    
    def run(self, review):
        
        # Input Layer
        self.update_input_layer(review.lower())

        # Hidden layer
        layer_1 = self.layer_0.dot(self.weights_0_1)

        # Output layer
        layer_2 = self.sigmoid(layer_1.dot(self.weights_1_2))
        
        if(layer_2[0] > 0.5):
            return "POSITIVE"
        else:
            return "NEGATIVE"
        

In [42]:
mlp = SentimentNetwork(train_reviews[:],train_labels[:], learning_rate=0.1)

In [43]:
mlp.train(train_reviews[:],train_labels[:])

Progress:0.0 #Correct:0 #Trained:1 Training Accuracy:0.0%


Progress:0.02 #Correct:0 #Trained:2 Training Accuracy:0.0%Progress:0.05 #Correct:1 #Trained:3 Training Accuracy:33.3%Progress:0.08 #Correct:2 #Trained:4 Training Accuracy:50.0%Progress:0.11 #Correct:3 #Trained:5 Training Accuracy:60.0%Progress:0.14 #Correct:4 #Trained:6 Training Accuracy:66.6%Progress:0.17 #Correct:4 #Trained:7 Training Accuracy:57.1%Progress:0.20 #Correct:5 #Trained:8 Training Accuracy:62.5%Progress:0.23 #Correct:6 #Trained:9 Training Accuracy:66.6%Progress:0.25 #Correct:7 #Trained:10 Training Accuracy:70.0%Progress:0.28 #Correct:8 #Trained:11 Training Accuracy:72.7%Progress:0.31 #Correct:8 #Trained:12 Training Accuracy:66.6%Progress:0.34 #Correct:9 #Trained:13 Training Accuracy:69.2%Progress:0.37 #Correct:10 #Trained:14 Training Accuracy:71.4%Progress:0.40 #Correct:11 #Trained:15 Training Accuracy:73.3%Progress:0.43 #Correct:12 #Trained:16 Training Accuracy:75.0%Progress:0.46 #Correct:13 #Trained:17 Training Accuracy:76.4%Progress:0.49 #Correct:14 #T

Progress:13.8 #Correct:472 #Trained:480 Training Accuracy:98.3%Progress:13.8 #Correct:473 #Trained:481 Training Accuracy:98.3%Progress:13.8 #Correct:474 #Trained:482 Training Accuracy:98.3%Progress:13.9 #Correct:475 #Trained:483 Training Accuracy:98.3%Progress:13.9 #Correct:476 #Trained:484 Training Accuracy:98.3%Progress:13.9 #Correct:477 #Trained:485 Training Accuracy:98.3%Progress:14.0 #Correct:478 #Trained:486 Training Accuracy:98.3%Progress:14.0 #Correct:479 #Trained:487 Training Accuracy:98.3%Progress:14.0 #Correct:480 #Trained:488 Training Accuracy:98.3%Progress:14.0 #Correct:481 #Trained:489 Training Accuracy:98.3%Progress:14.1 #Correct:482 #Trained:490 Training Accuracy:98.3%Progress:14.1 #Correct:483 #Trained:491 Training Accuracy:98.3%Progress:14.1 #Correct:484 #Trained:492 Training Accuracy:98.3%Progress:14.2 #Correct:485 #Trained:493 Training Accuracy:98.3%Progress:14.2 #Correct:486 #Trained:494 Training Accuracy:98.3%Progress:14.2 #Correct:487 #Trained:495

Progress:21.2 #Correct:730 #Trained:738 Training Accuracy:98.9%Progress:21.3 #Correct:731 #Trained:739 Training Accuracy:98.9%Progress:21.3 #Correct:732 #Trained:740 Training Accuracy:98.9%Progress:21.3 #Correct:733 #Trained:741 Training Accuracy:98.9%Progress:21.3 #Correct:734 #Trained:742 Training Accuracy:98.9%Progress:21.4 #Correct:735 #Trained:743 Training Accuracy:98.9%Progress:21.4 #Correct:736 #Trained:744 Training Accuracy:98.9%Progress:21.4 #Correct:737 #Trained:745 Training Accuracy:98.9%Progress:21.5 #Correct:738 #Trained:746 Training Accuracy:98.9%Progress:21.5 #Correct:739 #Trained:747 Training Accuracy:98.9%Progress:21.5 #Correct:740 #Trained:748 Training Accuracy:98.9%Progress:21.5 #Correct:741 #Trained:749 Training Accuracy:98.9%Progress:21.6 #Correct:742 #Trained:750 Training Accuracy:98.9%Progress:21.6 #Correct:743 #Trained:751 Training Accuracy:98.9%Progress:21.6 #Correct:744 #Trained:752 Training Accuracy:98.9%Progress:21.7 #Correct:745 #Trained:753

Progress:28.8 #Correct:991 #Trained:999 Training Accuracy:99.1%Progress:28.8 #Correct:992 #Trained:1000 Training Accuracy:99.2%Progress:28.8 #Correct:993 #Trained:1001 Training Accuracy:99.2%Progress:28.8 #Correct:994 #Trained:1002 Training Accuracy:99.2%Progress:28.9 #Correct:995 #Trained:1003 Training Accuracy:99.2%Progress:28.9 #Correct:996 #Trained:1004 Training Accuracy:99.2%Progress:28.9 #Correct:997 #Trained:1005 Training Accuracy:99.2%Progress:29.0 #Correct:998 #Trained:1006 Training Accuracy:99.2%Progress:29.0 #Correct:999 #Trained:1007 Training Accuracy:99.2%Progress:29.0 #Correct:1000 #Trained:1008 Training Accuracy:99.2%Progress:29.0 #Correct:1001 #Trained:1009 Training Accuracy:99.2%Progress:29.1 #Correct:1002 #Trained:1010 Training Accuracy:99.2%Progress:29.1 #Correct:1003 #Trained:1011 Training Accuracy:99.2%Progress:29.1 #Correct:1004 #Trained:1012 Training Accuracy:99.2%Progress:29.2 #Correct:1005 #Trained:1013 Training Accuracy:99.2%Progress:29.2 #Corr

Progress:36.3 #Correct:1251 #Trained:1259 Training Accuracy:99.3%Progress:36.3 #Correct:1252 #Trained:1260 Training Accuracy:99.3%Progress:36.3 #Correct:1253 #Trained:1261 Training Accuracy:99.3%Progress:36.4 #Correct:1254 #Trained:1262 Training Accuracy:99.3%Progress:36.4 #Correct:1255 #Trained:1263 Training Accuracy:99.3%Progress:36.4 #Correct:1256 #Trained:1264 Training Accuracy:99.3%Progress:36.4 #Correct:1257 #Trained:1265 Training Accuracy:99.3%Progress:36.5 #Correct:1258 #Trained:1266 Training Accuracy:99.3%Progress:36.5 #Correct:1259 #Trained:1267 Training Accuracy:99.3%Progress:36.5 #Correct:1260 #Trained:1268 Training Accuracy:99.3%Progress:36.6 #Correct:1261 #Trained:1269 Training Accuracy:99.3%Progress:36.6 #Correct:1262 #Trained:1270 Training Accuracy:99.3%Progress:36.6 #Correct:1263 #Trained:1271 Training Accuracy:99.3%Progress:36.6 #Correct:1264 #Trained:1272 Training Accuracy:99.3%Progress:36.7 #Correct:1265 #Trained:1273 Training Accuracy:99.3%Progress:

Progress:43.3 #Correct:1494 #Trained:1502 Training Accuracy:99.4%Progress:43.3 #Correct:1495 #Trained:1503 Training Accuracy:99.4%Progress:43.3 #Correct:1496 #Trained:1504 Training Accuracy:99.4%Progress:43.4 #Correct:1497 #Trained:1505 Training Accuracy:99.4%Progress:43.4 #Correct:1498 #Trained:1506 Training Accuracy:99.4%Progress:43.4 #Correct:1499 #Trained:1507 Training Accuracy:99.4%Progress:43.5 #Correct:1500 #Trained:1508 Training Accuracy:99.4%Progress:43.5 #Correct:1501 #Trained:1509 Training Accuracy:99.4%Progress:43.5 #Correct:1502 #Trained:1510 Training Accuracy:99.4%Progress:43.5 #Correct:1503 #Trained:1511 Training Accuracy:99.4%Progress:43.6 #Correct:1504 #Trained:1512 Training Accuracy:99.4%Progress:43.6 #Correct:1505 #Trained:1513 Training Accuracy:99.4%Progress:43.6 #Correct:1506 #Trained:1514 Training Accuracy:99.4%Progress:43.7 #Correct:1507 #Trained:1515 Training Accuracy:99.4%Progress:43.7 #Correct:1508 #Trained:1516 Training Accuracy:99.4%Progress:

Progress:50.5 #Correct:1742 #Trained:1752 Training Accuracy:99.4%Progress:50.5 #Correct:1743 #Trained:1753 Training Accuracy:99.4%Progress:50.6 #Correct:1744 #Trained:1754 Training Accuracy:99.4%Progress:50.6 #Correct:1745 #Trained:1755 Training Accuracy:99.4%Progress:50.6 #Correct:1746 #Trained:1756 Training Accuracy:99.4%Progress:50.6 #Correct:1747 #Trained:1757 Training Accuracy:99.4%Progress:50.7 #Correct:1748 #Trained:1758 Training Accuracy:99.4%Progress:50.7 #Correct:1749 #Trained:1759 Training Accuracy:99.4%Progress:50.7 #Correct:1750 #Trained:1760 Training Accuracy:99.4%Progress:50.8 #Correct:1751 #Trained:1761 Training Accuracy:99.4%Progress:50.8 #Correct:1752 #Trained:1762 Training Accuracy:99.4%Progress:50.8 #Correct:1753 #Trained:1763 Training Accuracy:99.4%Progress:50.8 #Correct:1754 #Trained:1764 Training Accuracy:99.4%Progress:50.9 #Correct:1755 #Trained:1765 Training Accuracy:99.4%Progress:50.9 #Correct:1756 #Trained:1766 Training Accuracy:99.4%Progress:

Progress:58.0 #Correct:2001 #Trained:2011 Training Accuracy:99.5%Progress:58.0 #Correct:2002 #Trained:2012 Training Accuracy:99.5%Progress:58.0 #Correct:2003 #Trained:2013 Training Accuracy:99.5%Progress:58.1 #Correct:2004 #Trained:2014 Training Accuracy:99.5%Progress:58.1 #Correct:2005 #Trained:2015 Training Accuracy:99.5%Progress:58.1 #Correct:2006 #Trained:2016 Training Accuracy:99.5%Progress:58.1 #Correct:2007 #Trained:2017 Training Accuracy:99.5%Progress:58.2 #Correct:2008 #Trained:2018 Training Accuracy:99.5%Progress:58.2 #Correct:2009 #Trained:2019 Training Accuracy:99.5%Progress:58.2 #Correct:2010 #Trained:2020 Training Accuracy:99.5%Progress:58.3 #Correct:2011 #Trained:2021 Training Accuracy:99.5%Progress:58.3 #Correct:2012 #Trained:2022 Training Accuracy:99.5%Progress:58.3 #Correct:2013 #Trained:2023 Training Accuracy:99.5%Progress:58.4 #Correct:2014 #Trained:2024 Training Accuracy:99.5%Progress:58.4 #Correct:2015 #Trained:2025 Training Accuracy:99.5%Progress:

Progress:65.5 #Correct:2262 #Trained:2272 Training Accuracy:99.5%Progress:65.5 #Correct:2263 #Trained:2273 Training Accuracy:99.5%Progress:65.6 #Correct:2264 #Trained:2274 Training Accuracy:99.5%Progress:65.6 #Correct:2265 #Trained:2275 Training Accuracy:99.5%Progress:65.6 #Correct:2266 #Trained:2276 Training Accuracy:99.5%Progress:65.7 #Correct:2267 #Trained:2277 Training Accuracy:99.5%Progress:65.7 #Correct:2268 #Trained:2278 Training Accuracy:99.5%Progress:65.7 #Correct:2269 #Trained:2279 Training Accuracy:99.5%Progress:65.7 #Correct:2270 #Trained:2280 Training Accuracy:99.5%Progress:65.8 #Correct:2271 #Trained:2281 Training Accuracy:99.5%Progress:65.8 #Correct:2272 #Trained:2282 Training Accuracy:99.5%Progress:65.8 #Correct:2273 #Trained:2283 Training Accuracy:99.5%Progress:65.9 #Correct:2274 #Trained:2284 Training Accuracy:99.5%Progress:65.9 #Correct:2275 #Trained:2285 Training Accuracy:99.5%Progress:65.9 #Correct:2276 #Trained:2286 Training Accuracy:99.5%Progress:

Progress:72.1 #Correct:2491 #Trained:2502 Training Accuracy:99.5%Progress:72.2 #Correct:2492 #Trained:2503 Training Accuracy:99.5%Progress:72.2 #Correct:2493 #Trained:2504 Training Accuracy:99.5%Progress:72.2 #Correct:2494 #Trained:2505 Training Accuracy:99.5%Progress:72.3 #Correct:2495 #Trained:2506 Training Accuracy:99.5%Progress:72.3 #Correct:2496 #Trained:2507 Training Accuracy:99.5%Progress:72.3 #Correct:2497 #Trained:2508 Training Accuracy:99.5%Progress:72.4 #Correct:2498 #Trained:2509 Training Accuracy:99.5%Progress:72.4 #Correct:2499 #Trained:2510 Training Accuracy:99.5%Progress:72.4 #Correct:2500 #Trained:2511 Training Accuracy:99.5%Progress:72.4 #Correct:2501 #Trained:2512 Training Accuracy:99.5%Progress:72.5 #Correct:2502 #Trained:2513 Training Accuracy:99.5%Progress:72.5 #Correct:2503 #Trained:2514 Training Accuracy:99.5%Progress:72.5 #Correct:2504 #Trained:2515 Training Accuracy:99.5%Progress:72.6 #Correct:2505 #Trained:2516 Training Accuracy:99.5%Progress:

Progress:80.5 #Correct:2779 #Trained:2790 Training Accuracy:99.6%Progress:80.5 #Correct:2780 #Trained:2791 Training Accuracy:99.6%Progress:80.5 #Correct:2781 #Trained:2792 Training Accuracy:99.6%Progress:80.6 #Correct:2782 #Trained:2793 Training Accuracy:99.6%Progress:80.6 #Correct:2783 #Trained:2794 Training Accuracy:99.6%Progress:80.6 #Correct:2784 #Trained:2795 Training Accuracy:99.6%Progress:80.6 #Correct:2785 #Trained:2796 Training Accuracy:99.6%Progress:80.7 #Correct:2786 #Trained:2797 Training Accuracy:99.6%Progress:80.7 #Correct:2787 #Trained:2798 Training Accuracy:99.6%Progress:80.7 #Correct:2788 #Trained:2799 Training Accuracy:99.6%Progress:80.8 #Correct:2789 #Trained:2800 Training Accuracy:99.6%Progress:80.8 #Correct:2790 #Trained:2801 Training Accuracy:99.6%Progress:80.8 #Correct:2791 #Trained:2802 Training Accuracy:99.6%Progress:80.8 #Correct:2792 #Trained:2803 Training Accuracy:99.6%Progress:80.9 #Correct:2793 #Trained:2804 Training Accuracy:99.6%Progress:

Progress:87.8 #Correct:3034 #Trained:3045 Training Accuracy:99.6%Progress:87.9 #Correct:3035 #Trained:3046 Training Accuracy:99.6%Progress:87.9 #Correct:3036 #Trained:3047 Training Accuracy:99.6%Progress:87.9 #Correct:3037 #Trained:3048 Training Accuracy:99.6%Progress:87.9 #Correct:3038 #Trained:3049 Training Accuracy:99.6%Progress:88.0 #Correct:3039 #Trained:3050 Training Accuracy:99.6%Progress:88.0 #Correct:3040 #Trained:3051 Training Accuracy:99.6%Progress:88.0 #Correct:3041 #Trained:3052 Training Accuracy:99.6%Progress:88.1 #Correct:3042 #Trained:3053 Training Accuracy:99.6%Progress:88.1 #Correct:3043 #Trained:3054 Training Accuracy:99.6%Progress:88.1 #Correct:3044 #Trained:3055 Training Accuracy:99.6%Progress:88.1 #Correct:3045 #Trained:3056 Training Accuracy:99.6%Progress:88.2 #Correct:3046 #Trained:3057 Training Accuracy:99.6%Progress:88.2 #Correct:3047 #Trained:3058 Training Accuracy:99.6%Progress:88.2 #Correct:3048 #Trained:3059 Training Accuracy:99.6%Progress:

Progress:96.1 #Correct:3320 #Trained:3331 Training Accuracy:99.6%Progress:96.1 #Correct:3321 #Trained:3332 Training Accuracy:99.6%Progress:96.1 #Correct:3322 #Trained:3333 Training Accuracy:99.6%Progress:96.2 #Correct:3323 #Trained:3334 Training Accuracy:99.6%Progress:96.2 #Correct:3324 #Trained:3335 Training Accuracy:99.6%Progress:96.2 #Correct:3325 #Trained:3336 Training Accuracy:99.6%Progress:96.3 #Correct:3326 #Trained:3337 Training Accuracy:99.6%Progress:96.3 #Correct:3327 #Trained:3338 Training Accuracy:99.6%Progress:96.3 #Correct:3328 #Trained:3339 Training Accuracy:99.6%Progress:96.3 #Correct:3329 #Trained:3340 Training Accuracy:99.6%Progress:96.4 #Correct:3330 #Trained:3341 Training Accuracy:99.6%Progress:96.4 #Correct:3331 #Trained:3342 Training Accuracy:99.6%Progress:96.4 #Correct:3332 #Trained:3343 Training Accuracy:99.6%Progress:96.5 #Correct:3333 #Trained:3344 Training Accuracy:99.6%Progress:96.5 #Correct:3334 #Trained:3345 Training Accuracy:99.6%Progress:

In [44]:
# evaluate our model before training (just to show how horrible it is)
mlp.test(test_reviews[:])


POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
NEGATIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
NEGATIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
NEGATIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE


POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE

# Analyzing Inefficiencies in our Network

In [45]:
#Image(filename='assets/sentiment_network_sparse.png')

In [46]:
layer_0 = np.zeros(10)

In [47]:
layer_0

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [48]:
layer_0[4] = 1
layer_0[9] = 1

In [49]:
layer_0

array([0., 0., 0., 0., 1., 0., 0., 0., 0., 1.])

In [50]:
weights_0_1 = np.random.randn(10,5)

In [51]:
layer_0.dot(weights_0_1)

array([-0.10503756,  0.44222989,  0.24392938, -0.55961832,  0.21389503])

In [52]:
indices = [4,9]

In [53]:
layer_1 = np.zeros(5)

In [54]:
for index in indices:
    layer_1 += (weights_0_1[index])

In [55]:
layer_1

array([-0.10503756,  0.44222989,  0.24392938, -0.55961832,  0.21389503])

In [56]:
#Image(filename='assets/sentiment_network_sparse_2.png')

# Making the Network More Efficient

In [57]:
import time
import sys

# Let's tweak our network from before to model these phenomena
class SentimentNetwork:
    def __init__(self, reviews,labels,hidden_nodes = 10, learning_rate = 0.1):
       
        np.random.seed(1)
    
        self.pre_process_data(reviews, labels)
        
        self.init_network(len(self.review_vocab),hidden_nodes, 1, learning_rate)
        
        
    def pre_process_data(self,reviews, labels):
        
        review_vocab = set()
        for review in reviews:
            for word in review.split(" "):
                review_vocab.add(word)
        self.review_vocab = list(review_vocab)
        
        label_vocab = set()
        for label in labels:
            label_vocab.add(label)
        
        self.label_vocab = list(label_vocab)
        
        self.review_vocab_size = len(self.review_vocab)
        self.label_vocab_size = len(self.label_vocab)
        
        self.word2index = {}
        for i, word in enumerate(self.review_vocab):
            self.word2index[word] = i
        
        self.label2index = {}
        for i, label in enumerate(self.label_vocab):
            self.label2index[label] = i
         
        
    def init_network(self, input_nodes, hidden_nodes, output_nodes, learning_rate):
        # Set number of nodes in input, hidden and output layers.
        self.input_nodes = input_nodes
        self.hidden_nodes = hidden_nodes
        self.output_nodes = output_nodes

        # Initialize weights
        self.weights_0_1 = np.zeros((self.input_nodes,self.hidden_nodes))
    
        self.weights_1_2 = np.random.normal(0.0, self.output_nodes**-0.5, 
                                                (self.hidden_nodes, self.output_nodes))
        
        self.learning_rate = learning_rate
        
        self.layer_0 = np.zeros((1,input_nodes))
        self.layer_1 = np.zeros((1,hidden_nodes))
        
    def sigmoid(self,x):
        return 1 / (1 + np.exp(-x))
    
    
    def sigmoid_output_2_derivative(self,output):
        return output * (1 - output)
    
    def update_input_layer(self,review):

        # clear out previous state, reset the layer to be all 0s
        self.layer_0 *= 0
        for word in review.split(" "):
            self.layer_0[0][self.word2index[word]] = 1

    def get_target_for_label(self,label):
        if(label == 'POSITIVE'):
            return 1
        else:
            return 0
        
    def train(self, training_reviews_raw, training_labels):
        
        training_reviews = list()
        for review in training_reviews_raw:
            indices = set()
            for word in review.split(" "):
                if(word in self.word2index.keys()):
                    indices.add(self.word2index[word])
            training_reviews.append(list(indices))
        
        assert(len(training_reviews) == len(training_labels))
        
        correct_so_far = 0
        
        start = time.time()
        
        for i in range(len(training_reviews)):
            
            review = training_reviews[i]
            label = training_labels[i]
            
            #### Implement the forward pass here ####
            ### Forward pass ###

            # Input Layer

            # Hidden layer
#             layer_1 = self.layer_0.dot(self.weights_0_1)
            self.layer_1 *= 0
            for index in review:
                self.layer_1 += self.weights_0_1[index]
            
            # Output layer
            layer_2 = self.sigmoid(self.layer_1.dot(self.weights_1_2))

            #### Implement the backward pass here ####
            ### Backward pass ###

            # Output error
            layer_2_error = layer_2 - self.get_target_for_label(label) # Output layer error is the difference between desired target and actual output.
            layer_2_delta = layer_2_error * self.sigmoid_output_2_derivative(layer_2)

            # Backpropagated error
            layer_1_error = layer_2_delta.dot(self.weights_1_2.T) # errors propagated to the hidden layer
            layer_1_delta = layer_1_error # hidden layer gradients - no nonlinearity so it's the same as the error

            # Update the weights
            self.weights_1_2 -= self.layer_1.T.dot(layer_2_delta) * self.learning_rate # update hidden-to-output weights with gradient descent step
            
            for index in review:
                self.weights_0_1[index] -= layer_1_delta[0] * self.learning_rate # update input-to-hidden weights with gradient descent step

            if(np.abs(layer_2_error) < 0.5):
                correct_so_far += 1
            
            #reviews_per_second = i / float(time.time() - start)
            
            sys.stdout.write("\rProgress:" + str(100 * i/float(len(training_reviews)))[:4] + " #Correct:" + str(correct_so_far) + " #Trained:" + str(i+1) + " Training Accuracy:" + str(correct_so_far * 100 / float(i+1))[:4] + "%")
        
    
    def test(self, testing_reviews):
        
        correct = 0
        
        start = time.time()
        
        for i in range(len(testing_reviews)):
            pred = self.run(testing_reviews[i])
            sys.stdout.write("\r\n" + pred)
            #if(pred == testing_labels[i]):
            #    correct += 1
            
            #reviews_per_second = i / float(time.time() - start)
            
            #sys.stdout.write("\rProgress:" + str(100 * i/float(len(testing_reviews)))[:4] \
            #                 + "% Speed(reviews/sec):" + str(reviews_per_second)[0:5] \
            #                + "% #Correct:" + str(correct) + " #Tested:" + str(i+1) + " Testing Accuracy:" + str(correct * 100 / float(i+1))[:4] + "%")
    
    def run(self, review):
        
        # Input Layer


        # Hidden layer
        self.layer_1 *= 0
        unique_indices = set()
        for word in review.lower().split(" "):
            if word in self.word2index.keys():
                unique_indices.add(self.word2index[word])
        for index in unique_indices:
            self.layer_1 += self.weights_0_1[index]
        
        # Output layer
        layer_2 = self.sigmoid(self.layer_1.dot(self.weights_1_2))
        
        if(layer_2[0] > 0.5):
            return "POSITIVE"
        else:
            return "NEGATIVE"
        

In [58]:
mlp = SentimentNetwork(train_reviews[:],train_labels[:], learning_rate=0.1)

In [59]:
mlp.train(train_reviews[:],train_labels[:])

Progress:0.0 #Correct:0 #Trained:1 Training Accuracy:0.0%Progress:0.02 #Correct:0 #Trained:2 Training Accuracy:0.0%Progress:0.05 #Correct:1 #Trained:3 Training Accuracy:33.3%Progress:0.08 #Correct:2 #Trained:4 Training Accuracy:50.0%Progress:0.11 #Correct:3 #Trained:5 Training Accuracy:60.0%Progress:0.14 #Correct:4 #Trained:6 Training Accuracy:66.6%Progress:0.17 #Correct:4 #Trained:7 Training Accuracy:57.1%Progress:0.20 #Correct:5 #Trained:8 Training Accuracy:62.5%Progress:0.23 #Correct:6 #Trained:9 Training Accuracy:66.6%Progress:0.25 #Correct:7 #Trained:10 Training Accuracy:70.0%Progress:0.28 #Correct:8 #Trained:11 Training Accuracy:72.7%Progress:0.31 #Correct:8 #Trained:12 Training Accuracy:66.6%Progress:0.34 #Correct:9 #Trained:13 Training Accuracy:69.2%Progress:0.37 #Correct:10 #Trained:14 Training Accuracy:71.4%Progress:0.40 #Correct:11 #Trained:15 Training Accuracy:73.3%Progress:0.43 #Correct:12 #Trained:16 Training Accuracy:75.0%Progress:0.46 #Correct:13 #Train

Progress:46.1 #Correct:1589 #Trained:1598 Training Accuracy:99.4%Progress:46.1 #Correct:1590 #Trained:1599 Training Accuracy:99.4%Progress:46.1 #Correct:1591 #Trained:1600 Training Accuracy:99.4%Progress:46.1 #Correct:1592 #Trained:1601 Training Accuracy:99.4%Progress:46.2 #Correct:1593 #Trained:1602 Training Accuracy:99.4%Progress:46.2 #Correct:1594 #Trained:1603 Training Accuracy:99.4%Progress:46.2 #Correct:1595 #Trained:1604 Training Accuracy:99.4%Progress:46.3 #Correct:1596 #Trained:1605 Training Accuracy:99.4%Progress:46.3 #Correct:1597 #Trained:1606 Training Accuracy:99.4%Progress:46.3 #Correct:1598 #Trained:1607 Training Accuracy:99.4%Progress:46.3 #Correct:1599 #Trained:1608 Training Accuracy:99.4%Progress:46.4 #Correct:1600 #Trained:1609 Training Accuracy:99.4%Progress:46.4 #Correct:1601 #Trained:1610 Training Accuracy:99.4%Progress:46.4 #Correct:1602 #Trained:1611 Training Accuracy:99.4%Progress:46.5 #Correct:1603 #Trained:1612 Training Accuracy:99.4%Progress:

Progress:65.7 #Correct:2269 #Trained:2279 Training Accuracy:99.5%Progress:65.7 #Correct:2270 #Trained:2280 Training Accuracy:99.5%Progress:65.8 #Correct:2271 #Trained:2281 Training Accuracy:99.5%Progress:65.8 #Correct:2272 #Trained:2282 Training Accuracy:99.5%Progress:65.8 #Correct:2273 #Trained:2283 Training Accuracy:99.5%Progress:65.9 #Correct:2274 #Trained:2284 Training Accuracy:99.5%Progress:65.9 #Correct:2275 #Trained:2285 Training Accuracy:99.5%Progress:65.9 #Correct:2276 #Trained:2286 Training Accuracy:99.5%Progress:65.9 #Correct:2277 #Trained:2287 Training Accuracy:99.5%Progress:66.0 #Correct:2278 #Trained:2288 Training Accuracy:99.5%Progress:66.0 #Correct:2279 #Trained:2289 Training Accuracy:99.5%Progress:66.0 #Correct:2280 #Trained:2290 Training Accuracy:99.5%Progress:66.1 #Correct:2281 #Trained:2291 Training Accuracy:99.5%Progress:66.1 #Correct:2282 #Trained:2292 Training Accuracy:99.5%Progress:66.1 #Correct:2283 #Trained:2293 Training Accuracy:99.5%Progress:

Progress:92.6 #Correct:3199 #Trained:3210 Training Accuracy:99.6%Progress:92.6 #Correct:3200 #Trained:3211 Training Accuracy:99.6%Progress:92.6 #Correct:3201 #Trained:3212 Training Accuracy:99.6%Progress:92.7 #Correct:3202 #Trained:3213 Training Accuracy:99.6%Progress:92.7 #Correct:3203 #Trained:3214 Training Accuracy:99.6%Progress:92.7 #Correct:3204 #Trained:3215 Training Accuracy:99.6%Progress:92.8 #Correct:3205 #Trained:3216 Training Accuracy:99.6%Progress:92.8 #Correct:3206 #Trained:3217 Training Accuracy:99.6%Progress:92.8 #Correct:3207 #Trained:3218 Training Accuracy:99.6%Progress:92.8 #Correct:3208 #Trained:3219 Training Accuracy:99.6%Progress:92.9 #Correct:3209 #Trained:3220 Training Accuracy:99.6%Progress:92.9 #Correct:3210 #Trained:3221 Training Accuracy:99.6%Progress:92.9 #Correct:3211 #Trained:3222 Training Accuracy:99.6%Progress:93.0 #Correct:3212 #Trained:3223 Training Accuracy:99.6%Progress:93.0 #Correct:3213 #Trained:3224 Training Accuracy:99.6%Progress:

In [60]:
# evaluate our model before training (just to show how horrible it is)
mlp.test(test_reviews[:])


POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
NEGATIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
NEGATIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
NEGATIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE


# Further Noise Reduction

In [61]:
#Image(filename='assets/sentiment_network_sparse_2.png')

In [62]:
# words most frequently seen in a review with a "POSITIVE" label
pos_neg_ratios.most_common()

[('6700', 6.259581464064923),
 ('7526', 5.754686804517968),
 ('5107', 5.616771097666572),
 ('5045', 5.60947179518496),
 ('8406', 5.424950017481403),
 ('4608', 5.4116460518550396),
 ('7860', 5.402677381872279),
 ('4442', 5.356586274672012),
 ('2698', 5.319752647268102),
 ('7669', 5.272999558563747),
 ('5819', 5.247024072160486),
 ('5174', 5.1647859739235145),
 ('203', 5.0689042022202315),
 ('3468', 5.0238805208462765),
 ('6980', 5.003946305945459),
 ('6084', 4.930870325627393),
 ('7945', 4.927253685157205),
 ('572', 4.912654885736052),
 ('6343', 4.852030263919617),
 ('7957', 4.844187086458591),
 ('3771', 4.844187086458591),
 ('7473', 4.795790545596741),
 ('4799', 4.795790545596741),
 ('3080', 4.770684624465665),
 ('3601', 4.7535901911063645),
 ('3283', 4.749270529961848),
 ('77', 4.74493212836325),
 ('5620', 4.718498871295094),
 ('6451', 4.718498871295094),
 ('2655', 4.709530201312334),
 ('6843', 4.700480365792417),
 ('4521', 4.6913478822291435),
 ('3730', 4.68213122712422),
 ('2641', 4

In [63]:
# words most frequently seen in a review with a "NEGATIVE" label
list(reversed(pos_neg_ratios.most_common()))[0:30]

[('1280', 0.8732733467904643),
 ('4032', 1.4781019103730135),
 ('6589', 1.5141277326297755),
 ('1556', 1.5769147207285403),
 ('5326', 1.5848968035179827),
 ('4801', 1.6094379124341003),
 ('154', 1.7692866133759964),
 ('6619', 1.8549383708495866),
 ('6461', 2.02537432040956),
 ('7685', 2.0343211063993665),
 ('5373', 2.1078794770003695),
 ('5658', 2.1709566505156093),
 ('4017', 2.181224235989778),
 ('1994', 2.3163784151263815),
 ('3718', 2.354544831924757),
 ('5672', 2.412335956953165),
 ('5922', 2.488996635039525),
 ('5085', 2.515678308454754),
 ('4580', 2.545531271604435),
 ('237', 2.5598646007434267),
 ('1543', 2.5751535276357784),
 ('254', 2.6390573296152584),
 ('5795', 2.6430177308313554),
 ('5773', 2.7176261976704943),
 ('4976', 2.8134107167600364),
 ('5035', 2.8590212280120886),
 ('2230', 2.953172659135195),
 ('755', 2.9618307218783095),
 ('3746', 3.0122615755052013),
 ('753', 3.0291670496402285)]

In [64]:
from bokeh.models import ColumnDataSource, LabelSet
from bokeh.plotting import figure, show, output_file
from bokeh.io import output_notebook
output_notebook()

In [65]:
hist, edges = np.histogram(list(map(lambda x:x[1],pos_neg_ratios.most_common())), density=True, bins=100, normed=True)

p = figure(tools="pan,wheel_zoom,reset,save",
           toolbar_location="above",
           title="Word Positive/Negative Affinity Distribution")
p.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:], line_color="#555555")
show(p)

  """Entry point for launching an IPython kernel.


In [66]:
frequency_frequency = Counter()

for word, cnt in total_counts.most_common():
    frequency_frequency[cnt] += 1

In [67]:
hist, edges = np.histogram(list(map(lambda x:x[1],frequency_frequency.most_common())), density=True, bins=100, normed=True)

p = figure(tools="pan,wheel_zoom,reset,save",
           toolbar_location="above",
           title="The frequency distribution of the words in our corpus")
p.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:], line_color="#555555")
show(p)

  """Entry point for launching an IPython kernel.


# Reducing Noise by Strategically Reducing the Vocabulary

In [68]:
import time
import sys
import numpy as np

# Let's tweak our network from before to model these phenomena
class SentimentNetwork:
    def __init__(self, reviews,labels,min_count = 10,polarity_cutoff = 0.1,hidden_nodes = 10, learning_rate = 0.1):
       
        np.random.seed(1)
    
        self.pre_process_data(reviews, labels, polarity_cutoff, min_count)
        
        self.init_network(len(self.review_vocab),hidden_nodes, 1, learning_rate)
        
        
    def pre_process_data(self,reviews, labels, polarity_cutoff,min_count):
        
        positive_counts = Counter()
        negative_counts = Counter()
        total_counts = Counter()

        for i in range(len(reviews)):
            if(labels[i] == 'POSITIVE'):
                for word in reviews[i].split(" "):
                    positive_counts[word] += 1
                    total_counts[word] += 1
            else:
                for word in reviews[i].split(" "):
                    negative_counts[word] += 1
                    total_counts[word] += 1

        pos_neg_ratios = Counter()

        for term,cnt in list(total_counts.most_common()):
            if(cnt >= 50):
                pos_neg_ratio = positive_counts[term] / float(negative_counts[term]+1)
                pos_neg_ratios[term] = pos_neg_ratio

        for word,ratio in pos_neg_ratios.most_common():
            if(ratio > 1):
                pos_neg_ratios[word] = np.log(ratio)
            else:
                pos_neg_ratios[word] = -np.log((1 / (ratio + 0.01)))
        
        review_vocab = set()
        for review in reviews:
            for word in review.split(" "):
                if(total_counts[word] > min_count):
                    if(word in pos_neg_ratios.keys()):
                        if((pos_neg_ratios[word] >= polarity_cutoff) or (pos_neg_ratios[word] <= -polarity_cutoff)):
                            review_vocab.add(word)
                    else:
                        review_vocab.add(word)
        self.review_vocab = list(review_vocab)
        
        label_vocab = set()
        for label in labels:
            label_vocab.add(label)
        
        self.label_vocab = list(label_vocab)
        
        self.review_vocab_size = len(self.review_vocab)
        self.label_vocab_size = len(self.label_vocab)
        
        self.word2index = {}
        for i, word in enumerate(self.review_vocab):
            self.word2index[word] = i
        
        self.label2index = {}
        for i, label in enumerate(self.label_vocab):
            self.label2index[label] = i
         
        
    def init_network(self, input_nodes, hidden_nodes, output_nodes, learning_rate):
        # Set number of nodes in input, hidden and output layers.
        self.input_nodes = input_nodes
        self.hidden_nodes = hidden_nodes
        self.output_nodes = output_nodes

        # Initialize weights
        self.weights_0_1 = np.zeros((self.input_nodes,self.hidden_nodes))
    
        self.weights_1_2 = np.random.normal(0.0, self.output_nodes**-0.5, 
                                                (self.hidden_nodes, self.output_nodes))
        
        self.learning_rate = learning_rate
        
        self.layer_0 = np.zeros((1,input_nodes))
        self.layer_1 = np.zeros((1,hidden_nodes))
        
    def sigmoid(self,x):
        return 1 / (1 + np.exp(-x))
    
    
    def sigmoid_output_2_derivative(self,output):
        return output * (1 - output)
    
    def update_input_layer(self,review):

        # clear out previous state, reset the layer to be all 0s
        self.layer_0 *= 0
        for word in review.split(" "):
            self.layer_0[0][self.word2index[word]] = 1

    def get_target_for_label(self,label):
        if(label == 'POSITIVE'):
            return 1
        else:
            return 0
        
    def train(self, training_reviews_raw, training_labels):
        
        training_reviews = list()
        for review in training_reviews_raw:
            indices = set()
            for word in review.split(" "):
                if(word in self.word2index.keys()):
                    indices.add(self.word2index[word])
            training_reviews.append(list(indices))
        
        assert(len(training_reviews) == len(training_labels))
        
        correct_so_far = 0
        
        start = time.time()
        
        for i in range(len(training_reviews)):
            
            review = training_reviews[i]
            label = training_labels[i]
            
            #### Implement the forward pass here ####
            ### Forward pass ###

            # Input Layer

            # Hidden layer
#             layer_1 = self.layer_0.dot(self.weights_0_1)
            self.layer_1 *= 0
            for index in review:
                self.layer_1 += self.weights_0_1[index]
            
            # Output layer
            layer_2 = self.sigmoid(self.layer_1.dot(self.weights_1_2))

            #### Implement the backward pass here ####
            ### Backward pass ###

            # Output error
            layer_2_error = layer_2 - self.get_target_for_label(label) # Output layer error is the difference between desired target and actual output.
            layer_2_delta = layer_2_error * self.sigmoid_output_2_derivative(layer_2)

            # Backpropagated error
            layer_1_error = layer_2_delta.dot(self.weights_1_2.T) # errors propagated to the hidden layer
            layer_1_delta = layer_1_error # hidden layer gradients - no nonlinearity so it's the same as the error

            # Update the weights
            self.weights_1_2 -= self.layer_1.T.dot(layer_2_delta) * self.learning_rate # update hidden-to-output weights with gradient descent step
            
            for index in review:
                self.weights_0_1[index] -= layer_1_delta[0] * self.learning_rate # update input-to-hidden weights with gradient descent step

            if(layer_2 >= 0.5 and label == 'POSITIVE'):
                correct_so_far += 1
            if(layer_2 < 0.5 and label == 'NEGATIVE'):
                correct_so_far += 1
            
            #reviews_per_second = i / float(time.time() - start)
            
            sys.stdout.write("\rProgress:" + str(100 * i/float(len(training_reviews)))[:4] + " #Correct:" + str(correct_so_far) + " #Trained:" + str(i+1) + " Training Accuracy:" + str(correct_so_far * 100 / float(i+1))[:4] + "%")
        
    
    def test(self, testing_reviews):
        
        correct = 0
        
        start = time.time()
        
        for i in range(len(testing_reviews)):
            pred = self.run(testing_reviews[i])
            sys.stdout.write("\r\n" + pred)
            #if(pred == testing_labels[i]):
            #    correct += 1
            
            #reviews_per_second = i / float(time.time() - start)
            
            #sys.stdout.write("\rProgress:" + str(100 * i/float(len(testing_reviews)))[:4] \
            #                 + "% Speed(reviews/sec):" + str(reviews_per_second)[0:5] \
             #               + "% #Correct:" + str(correct) + " #Tested:" + str(i+1) + " Testing Accuracy:" + str(correct * 100 / float(i+1))[:4] + "%")
    
    def run(self, review):
        
        # Input Layer


        # Hidden layer
        self.layer_1 *= 0
        unique_indices = set()
        for word in review.lower().split(" "):
            if word in self.word2index.keys():
                unique_indices.add(self.word2index[word])
        for index in unique_indices:
            self.layer_1 += self.weights_0_1[index]
        
        # Output layer
        layer_2 = self.sigmoid(self.layer_1.dot(self.weights_1_2))
        
        if(layer_2[0] >= 0.5):
            return "POSITIVE"
        else:
            return "NEGATIVE"
        

In [69]:
mlp = SentimentNetwork(train_reviews[:],train_labels[:],min_count=20,polarity_cutoff=0.05,learning_rate=0.01)

In [70]:
mlp.train(train_reviews[:],train_labels[:])

Progress:0.0 #Correct:1 #Trained:1 Training Accuracy:100.%Progress:0.02 #Correct:2 #Trained:2 Training Accuracy:100.%Progress:0.05 #Correct:3 #Trained:3 Training Accuracy:100.%Progress:0.08 #Correct:4 #Trained:4 Training Accuracy:100.%Progress:0.11 #Correct:5 #Trained:5 Training Accuracy:100.%Progress:0.14 #Correct:6 #Trained:6 Training Accuracy:100.%Progress:0.17 #Correct:7 #Trained:7 Training Accuracy:100.%Progress:0.20 #Correct:8 #Trained:8 Training Accuracy:100.%Progress:0.23 #Correct:9 #Trained:9 Training Accuracy:100.%Progress:0.25 #Correct:10 #Trained:10 Training Accuracy:100.%Progress:0.28 #Correct:11 #Trained:11 Training Accuracy:100.%Progress:0.31 #Correct:12 #Trained:12 Training Accuracy:100.%Progress:0.34 #Correct:13 #Trained:13 Training Accuracy:100.%Progress:0.37 #Correct:14 #Trained:14 Training Accuracy:100.%Progress:0.40 #Correct:15 #Trained:15 Training Accuracy:100.%Progress:0.43 #Correct:16 #Trained:16 Training Accuracy:100.%Progress:0.46 #Correct:17 

Progress:45.5 #Correct:1578 #Trained:1578 Training Accuracy:100.%Progress:45.5 #Correct:1579 #Trained:1579 Training Accuracy:100.%Progress:45.5 #Correct:1580 #Trained:1580 Training Accuracy:100.%Progress:45.6 #Correct:1581 #Trained:1581 Training Accuracy:100.%Progress:45.6 #Correct:1582 #Trained:1582 Training Accuracy:100.%Progress:45.6 #Correct:1583 #Trained:1583 Training Accuracy:100.%Progress:45.6 #Correct:1584 #Trained:1584 Training Accuracy:100.%Progress:45.7 #Correct:1585 #Trained:1585 Training Accuracy:100.%Progress:45.7 #Correct:1586 #Trained:1586 Training Accuracy:100.%Progress:45.7 #Correct:1587 #Trained:1587 Training Accuracy:100.%Progress:45.8 #Correct:1588 #Trained:1588 Training Accuracy:100.%Progress:45.8 #Correct:1589 #Trained:1589 Training Accuracy:100.%Progress:45.8 #Correct:1590 #Trained:1590 Training Accuracy:100.%Progress:45.9 #Correct:1591 #Trained:1591 Training Accuracy:100.%Progress:45.9 #Correct:1592 #Trained:1592 Training Accuracy:100.%Progress:

Progress:69.7 #Correct:2416 #Trained:2416 Training Accuracy:100.%Progress:69.7 #Correct:2417 #Trained:2417 Training Accuracy:100.%Progress:69.7 #Correct:2418 #Trained:2418 Training Accuracy:100.%Progress:69.8 #Correct:2419 #Trained:2419 Training Accuracy:100.%Progress:69.8 #Correct:2420 #Trained:2420 Training Accuracy:100.%Progress:69.8 #Correct:2421 #Trained:2421 Training Accuracy:100.%Progress:69.8 #Correct:2422 #Trained:2422 Training Accuracy:100.%Progress:69.9 #Correct:2423 #Trained:2423 Training Accuracy:100.%Progress:69.9 #Correct:2424 #Trained:2424 Training Accuracy:100.%Progress:69.9 #Correct:2425 #Trained:2425 Training Accuracy:100.%Progress:70.0 #Correct:2426 #Trained:2426 Training Accuracy:100.%Progress:70.0 #Correct:2427 #Trained:2427 Training Accuracy:100.%Progress:70.0 #Correct:2428 #Trained:2428 Training Accuracy:100.%Progress:70.0 #Correct:2429 #Trained:2429 Training Accuracy:100.%Progress:70.1 #Correct:2430 #Trained:2430 Training Accuracy:100.%Progress:

In [71]:
mlp = SentimentNetwork(train_reviews[:],train_labels[:],min_count=20,polarity_cutoff=0.8,learning_rate=0.01)

In [72]:
mlp.train(train_reviews[:], train_labels[:])

Progress:0.0 #Correct:1 #Trained:1 Training Accuracy:100.%Progress:0.02 #Correct:2 #Trained:2 Training Accuracy:100.%Progress:0.05 #Correct:3 #Trained:3 Training Accuracy:100.%Progress:0.08 #Correct:4 #Trained:4 Training Accuracy:100.%Progress:0.11 #Correct:5 #Trained:5 Training Accuracy:100.%Progress:0.14 #Correct:6 #Trained:6 Training Accuracy:100.%Progress:0.17 #Correct:7 #Trained:7 Training Accuracy:100.%Progress:0.20 #Correct:8 #Trained:8 Training Accuracy:100.%Progress:0.23 #Correct:9 #Trained:9 Training Accuracy:100.%Progress:0.25 #Correct:10 #Trained:10 Training Accuracy:100.%Progress:0.28 #Correct:11 #Trained:11 Training Accuracy:100.%Progress:0.31 #Correct:12 #Trained:12 Training Accuracy:100.%Progress:0.34 #Correct:13 #Trained:13 Training Accuracy:100.%Progress:0.37 #Correct:14 #Trained:14 Training Accuracy:100.%Progress:0.40 #Correct:15 #Trained:15 Training Accuracy:100.%Progress:0.43 #Correct:16 #Trained:16 Training Accuracy:100.%Progress:0.46 #Correct:17 

Progress:49.8 #Correct:1728 #Trained:1728 Training Accuracy:100.%Progress:49.8 #Correct:1729 #Trained:1729 Training Accuracy:100.%Progress:49.9 #Correct:1730 #Trained:1730 Training Accuracy:100.%Progress:49.9 #Correct:1731 #Trained:1731 Training Accuracy:100.%Progress:49.9 #Correct:1732 #Trained:1732 Training Accuracy:100.%Progress:50.0 #Correct:1733 #Trained:1733 Training Accuracy:100.%Progress:50.0 #Correct:1734 #Trained:1734 Training Accuracy:100.%Progress:50.0 #Correct:1735 #Trained:1735 Training Accuracy:100.%Progress:50.0 #Correct:1736 #Trained:1736 Training Accuracy:100.%Progress:50.1 #Correct:1737 #Trained:1737 Training Accuracy:100.%Progress:50.1 #Correct:1738 #Trained:1738 Training Accuracy:100.%Progress:50.1 #Correct:1739 #Trained:1739 Training Accuracy:100.%Progress:50.2 #Correct:1740 #Trained:1740 Training Accuracy:100.%Progress:50.2 #Correct:1741 #Trained:1741 Training Accuracy:100.%Progress:50.2 #Correct:1742 #Trained:1742 Training Accuracy:100.%Progress:

Progress:78.6 #Correct:2727 #Trained:2727 Training Accuracy:100.%Progress:78.7 #Correct:2728 #Trained:2728 Training Accuracy:100.%Progress:78.7 #Correct:2729 #Trained:2729 Training Accuracy:100.%Progress:78.7 #Correct:2730 #Trained:2730 Training Accuracy:100.%Progress:78.8 #Correct:2731 #Trained:2731 Training Accuracy:100.%Progress:78.8 #Correct:2732 #Trained:2732 Training Accuracy:100.%Progress:78.8 #Correct:2733 #Trained:2733 Training Accuracy:100.%Progress:78.8 #Correct:2734 #Trained:2734 Training Accuracy:100.%Progress:78.9 #Correct:2735 #Trained:2735 Training Accuracy:100.%Progress:78.9 #Correct:2736 #Trained:2736 Training Accuracy:100.%Progress:78.9 #Correct:2737 #Trained:2737 Training Accuracy:100.%Progress:79.0 #Correct:2738 #Trained:2738 Training Accuracy:100.%Progress:79.0 #Correct:2739 #Trained:2739 Training Accuracy:100.%Progress:79.0 #Correct:2740 #Trained:2740 Training Accuracy:100.%Progress:79.0 #Correct:2741 #Trained:2741 Training Accuracy:100.%Progress:

In [73]:
mlp.test(test_reviews)


POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
NEGATIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
NEGATIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE


POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
NEGATIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
POSITIVE
