In [1]:
from __future__ import print_function
import keras

from keras.datasets import imdb
from keras.models import load_model
from keras.preprocessing import sequence
from keras.models import Sequential
import os
import numpy as np

import pandas as pd
import random
import enum


Using TensorFlow backend.


In [2]:
PredictionMode = enum.Enum("PredictionMode", "cnn lstm fasttext cnn_lstm bidirectional_lstm")


In [3]:
'''
SWITCH PREDICTION MODE
'''
prediction_mode = PredictionMode.cnn
#prediction_mode = PredictionMode.lstm
#prediction_mode = PredictionMode.fasttext
#prediction_mode = PredictionMode.cnn_lstm
#prediction_mode = PredictionMode.bidirectional_lstm

max_features = 5000
maxlen = 400
model_filename = ''
INDEX_FROM=3   # word index offset

if prediction_mode == PredictionMode.cnn: 
    max_features = 5000
    maxlen = 400 
    model_filename = 'models/model_imdb_cnn.h5'
elif prediction_mode == PredictionMode.lstm: 
    max_features = 20000 
    maxlen = 80 
    model_filename = 'models/model_imdb_lstm.h5' 
elif prediction_mode == PredictionMode.fasttext: 
    max_features = 20000 
    maxlen = 400 
    model_filename = 'models/model_imdb_fasttext.h5' 
elif prediction_mode == PredictionMode.cnn_lstm: 
    max_features = 20000 
    maxlen = 100 
    model_filename = 'models/model_imdb_cnn_lstm.h5' 
elif prediction_mode == PredictionMode.bidirectional_lstm: 
    max_features = 20000 
    maxlen = 100 
    model_filename = 'models/model_imdb_bidirectional_lstm.h5'

print('Loading data...') 
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features, index_from=INDEX_FROM)

print(len(x_train), 'train sequences') 
print(len(x_test), 'test sequences') 
print('x_train shape:', x_train.shape) 
print('y_train shape:', y_train.shape) 
print('x_test shape:', x_test.shape) 
print('y_test shape:', y_test.shape)

print('Pad sequences (samples x time)') 
original_x_test = x_test

x_train = sequence.pad_sequences(x_train, maxlen=maxlen) 
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)

''' LOAD MODEL '''
model = load_model(model_filename)

Loading data...
25000 train sequences
25000 test sequences
x_train shape: (25000,)
y_train shape: (25000,)
x_test shape: (25000,)
y_test shape: (25000,)
Pad sequences (samples x time)


In [4]:
# Make Word to ID dictionary
word_to_id = imdb.get_word_index()
word_to_id = {k:(v+INDEX_FROM) for k,v in word_to_id.items()}
word_to_id["[PAD]"] = 0
#word_to_id[""] = 0
word_to_id["[🏃]"] = 1 # START
word_to_id["[❓]"] = 2 # UNKNOWN

# Make ID to Word dictionary
id_to_word = {value:key for key,value in word_to_id.items()}

def restore_original_text(imdb_x_array):
    return (' '.join(id_to_word[id] for id in imdb_x_array ))

In [5]:
'''
PREDICT
'''
def imdb_class_to_str(imdb_class):
    if imdb_class == 0:
        return 'negative'
    return 'positive'

right = 0
mistake = 0

index_list = []
original_text_list = []
pred_prob_list = []
pred_class_list = []
y_test_list = []
fail_str_list = []

for i in range(100):
    index = random.randint(0, len(x_test))
    
    pred_prob = model.predict(x_test[index:(index+1)])[0][0] 
    pred_class = model.predict_classes(x_test[index:(index+1)])[0][0]
    
    '''
    print('pred_prod:', pred_prod)
    print('pred_class:', pred_class)
    print('y_test[index] :', y_test[index])
    '''
    fail_str = '' 
    
    if y_test[index] == pred_class:
        right += 1
    else:
        mistake += 1
        fail_str = 'Fail'
        
    original_text = restore_original_text(original_x_test[index])

    index_list.append(index)
    original_text_list.append(original_text)
    pred_prob_list.append(pred_prob)
    pred_class_list.append(imdb_class_to_str(pred_class))
    y_test_list.append(imdb_class_to_str(y_test[index]))
    fail_str_list.append(fail_str)

print(prediction_mode)
print("right : ", right)
print("mistake : ", mistake)
print("accuracy:", right/(right+mistake))

df = pd.DataFrame({'index': index_list, 
                   'x_train original_text': original_text_list, 
                   'probability': pred_prob_list, 
                   'pred_class': pred_class_list,
                   'y_test': y_test_list,
                   'is_fail': fail_str_list
                  })

df[['index', 'x_train original_text','probability','pred_class','y_test','is_fail']]


PredictionMode.cnn
right :  86
mistake :  14
accuracy: 0.86


Unnamed: 0,index,x_train original_text,probability,pred_class,y_test,is_fail
0,10606,[🏃] since i am not a big steven seagal fan i t...,0.032832,negative,negative,
1,10466,[🏃] one dark night is a [❓] in the [❓] low bud...,0.959998,positive,positive,
2,371,[🏃] another great tom [❓] performance [❓] sepa...,0.843495,positive,positive,
3,23193,[🏃] i am so disappointed in this movie i can't...,0.053851,negative,negative,
4,19655,[🏃] when i first saw this show i was 9 and it ...,0.974339,positive,positive,
5,1384,[🏃] this may not be [❓] as a review on any fil...,0.040330,negative,positive,Fail
6,9061,[🏃] it's hard to imagine in this day and age h...,0.760508,positive,positive,
7,12961,[🏃] from the brilliant mind that brought us th...,0.021393,negative,negative,
8,23342,[🏃] this movie cannot be serious because it ha...,0.000289,negative,negative,
9,20873,[🏃] so one day i was in the video store lookin...,0.001086,negative,negative,
