# PART I
* Read data
* Divide on train and test data
* Replace default ids with names
* Extend word dictionary by 3 places, to add special characters
* Decoding function
* Give each review same length (the preprocessing thing)
* Define layers of the network

In [13]:
import tensorflow as tf
from tensorflow import keras
import numpy as np

data = keras.datasets.imdb

(train_img, train_labels), (test_img, test_labels) = data.load_data(num_words=10000)

In [14]:
word_dir = data.get_word_index()

print(word_dir['whatever'])

842


In [15]:
for word, id in word_dir.items():
  print(word, " ", id)
  break

fawn   34701


In [16]:
word_dir = { name: (id+3) for name, id in word_dir.items()} # extending the dir by 3 places

In [17]:
print(word_dir['whatever'])

845


In [18]:
word_dir['[ADD]'] = 0 # additional character, if the movie review is too short
word_dir['[START]'] = 1
word_dir['[UNKNOWN]'] = 2
word_dir['[UNUSED]'] = 3

In [19]:
# reversing dictionary
# the word_dir is "word":123, but we want 123:"word" pointer

word_dir_rev = dict([(id, name) for name, id in word_dir.items()])

In [20]:
print(word_dir_rev[845])

whatever


In [21]:
def decoding(text):
  text_to_return = ""
  for word in text:
    text_to_return += " " + word_dir_rev[word]
  return text_to_return

In [22]:
decoding(test_img[0])

" [START] please give this one a miss br br [UNKNOWN] [UNKNOWN] and the rest of the cast rendered terrible performances the show is flat flat flat br br i don't know how michael madison could have allowed this one on his plate he almost seemed to know this wasn't going to work out and his performance was quite [UNKNOWN] so all you madison fans give this a miss"

In [25]:
# giving the reviews same shape - 250 charas
# (...).pad_sequences(data, maxlen, value, padding)
# |data| - data to alter, |maxlen| - max. length of each review, |value| - which value should be the review filled with,
# in case its too short, |padding| - should it appear on beginning or end of the review

test_data = keras.preprocessing.sequence.pad_sequences(test_img, maxlen=250, value=word_dir['[ADD]'], padding="post")
train_data = keras.preprocessing.sequence.pad_sequences(train_img, maxlen=250, value=word_dir['[ADD]'], padding="post")

In [26]:
decoding(test_data[0])

" [START] please give this one a miss br br [UNKNOWN] [UNKNOWN] and the rest of the cast rendered terrible performances the show is flat flat flat br br i don't know how michael madison could have allowed this one on his plate he almost seemed to know this wasn't going to work out and his performance was quite [UNKNOWN] so all you madison fans give this a miss [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [ADD] [

In [27]:
model = keras.Sequential()
model.add(keras.layers.Embedding(10000, 16))
model.add(keras.layers.GlobalAveragePooling1D())
model.add(keras.layers.Dense(16, activation="relu"))
model.add(keras.layers.Dense(1, activation='sigmoid'))

In [28]:
# PART 2 SAVING MODEL
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, None, 16)          160000    
_________________________________________________________________
global_average_pooling1d (Gl (None, 16)                0         
_________________________________________________________________
dense (Dense)                (None, 16)                272       
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 17        
Total params: 160,289
Trainable params: 160,289
Non-trainable params: 0
_________________________________________________________________


In [29]:
model.compile(optimizer="adam",
             loss="binary_crossentropy",
             metrics=['accuracy'])

In [30]:
x_val = train_data[:10000]
x_train = train_data[10000:]

y_val = train_labels[:10000]
y_train = train_labels[10000:]

In [32]:
fitModel = model.fit(x_train, y_train, epochs=40, validation_data=(x_val, y_val), verbose=1)

Train on 15000 samples, validate on 10000 samples
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [33]:
results = model.evaluate(test_data, test_labels)
print(results)

[1.5973482226109506, 0.83664]


In [34]:
model.save('model.h5')

In [35]:
model = keras.models.load_model("model.h5")

In [44]:
def review_encode(text):
    encoded = [1]
    
    for word in text:
        word = word.lower()
        if word in word_dir:
            encoded.append(word_dir[word])
        else:
            encoded.append(2)
    
    return encoded

In [48]:
with open("review.txt", encoding="utf-8") as f:
    for line in f.readlines():
        nline = (line.replace(",", "").replace(".", " ")
        .replace("(", "").replace(":", "").replace(";", "").replace(")", "")
        .replace("'", "").replace('"', "").replace("-", "").strip(" "))
        encode = review_encode(nline)
        encode = keras.preprocessing.sequence.pad_sequences([encode],
                                                   value=word_dir['[ADD]'],
                                                   padding="post",
                                                   maxlen=250)
        predict = model.predict(encode)
        print(line)
        print(encode)
        print(predict[0])

Stop with the nonsense. This show is hilarious. If they used the same cast or facsimiles of them, it wouldn't be a remake. It would be the British version. I've seen both versions, and each has its own charm, style, and tense scenarios. Ricky Gervais, who created the series, is a co-producer and writes some episodes, which is ironic considering he took the biggest part in the first, three episodes (to get it off its feet) and they have been trashed the most.

[[   6 1479 1983    2  590  830 5135 2014  963    2    6 3363 1095    2
   830  963 3363  590  963    2  590 1148  963 3363    6 1479   13 1604
   590    2    2 1479   13 1148 2295 5135    2 1331  963 1479 1964    6
    13  590    2 1992 2023 1604    2 1148 1479  963    6  830  963 1095
     2  830 2023  963    2  590  963 1479   13  963  590    2   13  590
     2    6    2 1148 1604 1657 1479 1604 1095 1206 1148  963 1479    2
     6 3363 1095    2 1992 1479   13  830  963  590    2  590 1604 1983
   963    2  963 1657   13  590 