In [1]:
# plaidml Keras default template code
import numpy as np
import pandas as pd
import os
import time
os.environ["KERAS_BACKEND"] = "plaidml.keras.backend"
import keras

# standard ds imports
import matplotlib.pyplot as plt
import matplotlib.image as plt_img
import seaborn as sns
%matplotlib inline
# set visualisation settings
sns.set(style='white', context='notebook')
sns.set_palette(palette='pastel', desat=0.8)
# set figure sizes
from matplotlib import rcParams
# figure size in inches
rcParams['figure.figsize'] = 11,7

# standard sklearn imports
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

# basic keras sequential model and image imports
from keras.utils.np_utils import to_categorical # for one hot encoding
from keras.models import Sequential # model
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool1D # layers
from keras.optimizers import RMSprop # learning optimizer
from keras.preprocessing.image import ImageDataGenerator # generator function for loading images
from keras.callbacks import ReduceLROnPlateau # learning rate callback for early stopping

Using plaidml.keras.backend backend.


In [2]:
# load imdb dataset
from keras.datasets import imdb

In [3]:
# security workaround for numpy pickle param
np_load_old = np.load
np.load = lambda *a: np_load_old(*a, allow_pickle=True)

In [4]:
# download data with 10k max words
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000)

In [5]:
# restore numpy load
np.load = np_load_old

In [6]:
type(train_data[0])
# data is an array of lists

list

In [7]:
# print length of sample reviews
for x in range(10): print(len(train_data[x]))

218
189
141
550
147
43
123
562
233
130


In [8]:
# verify number of dictionary words
print(max([max(x) for x in train_data]))

9999


In [9]:
# reverse dictionary lookup for reviews
# convert ints to strings using dictionary index
word_index = imdb.get_word_index()

In [10]:
# create dictionary mapping for reviews
reverse_word_index = dict(
    [(value, key) for (key,value) in word_index.items()]
)

In [11]:
# print first 10 reviews decoded as list of strings
# decode by integer 3 offset to remove reserved padding numbers
for x in range(10):
    decoded_review = ' '.join(
        [reverse_word_index.get(i-3, '') for i in train_data[x]]
    )
    print(decoded_review)
    print('\n\n')

 this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you could just imagine being there robert  is an amazing actor and now the same being director  father came from the same scottish island as myself so i loved the fact there was a real connection with this film the witty remarks throughout the film were great it was just brilliant so much that i bought the film as soon as it was released for  and would recommend it to everyone to watch and the fly fishing was amazing really cried at the end it was so sad and you know what they say if you cry at a film it must have been good and this definitely was also  to the two little boy's that played the  of norman and paul they were just brilliant children are often left out of the  list i think because the stars that play them all grown up are such a big profile for the whole film but these children are amazing and should be praised for what they have done don't you think the 

In [12]:
# prepare data
# one-hot encode interger lists into tensors of 10,000 dimensions
# define vectorizer function
def vectorize_seq(sequences, dimension=10000):
    # create all zeros matrix of 10000 dim and len(sequences)
    results = np.zeros(
        (len(sequences), dimension)
    )
    # iterate over sequences and fill with 1s for word index
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1.
    return results

In [13]:
# one-hot encode training and test data
x_train = vectorize_seq(train_data)
x_test = vectorize_seq(test_data)

In [14]:
# convert list of label into numpy arrays
y_train = np.asarray(train_labels).astype('float32')
y_test = np.asarray(test_labels).astype('float32')

In [15]:
# // data is in tesnor format and ready for network input

In [16]:
# // input data are tensors, labels are scalars
# a dense network with relu activations are best suited for this data
# simple data and simple network

In [17]:
# for this simple data and scalar prediction [0,1] a network with 2 hidden layers with 16 units anr relu activation
# perform best. The final layer will use a sigmoid activation to predict range of [0,1]
# // Relu zeros out negative values
# // Sigmoid squashes arbitrary values in the range [0,1] for easy interpretability 

In [18]:
# Define model architecture
# 3 layers, input[ relu, 16] output [sigmoid, 1]

In [52]:
model = Sequential()

In [22]:
# for input shape reference
for x in (0,1): print(x_train.shape[x])

25000
10000


In [53]:
# input layer
model.add(Dense(16, activation='relu', input_shape=(x_train.shape[1], ))) # // define input shape as tuple

In [54]:
# hidden layer
model.add(Dense(16, activation='relu'))
# add batch normalization to reset activation functions close to mean zero and std.dev 1
model.add(keras.layers.BatchNormalization())
model.add(Dropout(0.2))

In [55]:
# output layer
model.add(Dense(1, activation='sigmoid'))

In [33]:
# // Relu is a non-linear activation function
# it is applied to each layer as a dot product to increase the hypothesis space
# rather than linear transformations which offer no information value as the hyp. space is still restricted
# // binary cross entropy is the ideal loss function for this task
# as it is a [0,1] output, it is better than mean squared error
# crossentropy measures the difference between truth and predicted label
# // use rms_prop to optimise the loss function during learning

In [56]:
# compile model with loss and optimizer functions
# monitor accuracy during training
model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

In [35]:
# split training data for validation set
# use simple splicing to capture first 10,000 samples
x_val = x_train[:10000]
# use remaining samples for training as new variable
partial_x_train = x_train[10000:]

In [36]:
# repeat for labels
y_val = y_train[:10000]
partial_y_train = y_train[10000:]

In [57]:
# define callbacks for training optimisation
# reduce learning rate every 4 epochs if no improvement in val loss
# reduce by a factor of 0.2 or 1/5
reduce_lr = ReduceLROnPlateau(monitor='val_loss', patience=1, factor=0.2, min_lr=0.001)

In [60]:
# train model
# run for 20 iterations using batches of 512 samples
# use callbacks to reduce learning rate and monitor validation loss
# callbacks takes in list argument
history = model.fit(
    partial_x_train, partial_y_train, epochs=4, batch_size=512,
    validation_data=(x_val, y_val), callbacks=[reduce_lr], verbose=1
)

Train on 15000 samples, validate on 10000 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
