In [None]:
from keras.layers import Dense, LSTM, Reshape, BatchNormalization, Input, Conv2D, MaxPool2D, Lambda, Bidirectional, Dropout
from keras.models import Model
from keras.activations import relu, sigmoid, softmax
import keras.backend as K
from keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint
import keras
import numpy as np
import string
import matplotlib.pyplot as plt

In [None]:
# Load saved data
save_path = r'C:\Users\twalunjk\Downloads\DL\save'
label_lengths = np.load(os.path.join(save_path, 'label_lengths.npy'))
input_lengths = np.load(os.path.join(save_path, 'input_lengths.npy'))
images = np.load(os.path.join(save_path, 'images.npy'))
padded_texts = np.load(os.path.join(save_path, 'padded_texts.npy'))
max_label_len = np.load(os.path.join(save_path, 'max_text_len.npy'))


In [None]:
print(label_lengths.shape)
print(input_lengths.shape)
print(images.shape)
print(padded_texts.shape)

In [None]:
# Define character list
characters = string.ascii_letters + string.digits

In [None]:
# Define input layer
input_layer = Input(shape=(32, 128, 1))
normalized_inputs = Lambda(lambda x: x / 255)(input_layer)

# Convolutional layers
conv_layer1 = Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(normalized_inputs)
conv_layer1 = Dropout(0.25)(conv_layer1)
conv_layer1 = Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(conv_layer1)
pool_layer1 = MaxPool2D(pool_size=(2, 2), strides=2)(conv_layer1)

conv_layer2 = Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(pool_layer1)
conv_layer2 = BatchNormalization(axis=-1)(conv_layer2)
conv_layer2 = Dropout(0.25)(conv_layer2)
conv_layer2 = Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(conv_layer2)
pool_layer2 = MaxPool2D(pool_size=(2, 2), strides=2)(conv_layer2)

conv_layer3 = Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(pool_layer2)
conv_layer3 = BatchNormalization(axis=-1)(conv_layer3)
conv_layer3 = Dropout(0.25)(conv_layer3)
conv_layer3 = Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(conv_layer3)
conv_layer4 = Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(conv_layer3)
pool_layer4 = MaxPool2D(pool_size=(2, 1))(conv_layer4)

conv_layer5 = Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(pool_layer4)
batch_norm_layer5 = BatchNormalization()(conv_layer5)

conv_layer6 = Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(batch_norm_layer5)
batch_norm_layer6 = BatchNormalization()(conv_layer6)
pool_layer6 = MaxPool2D(pool_size=(2, 1))(batch_norm_layer6)

conv_layer7 = Conv2D(512, (2, 2), activation='relu')(pool_layer6)

squeezed_layer = Lambda(lambda x: K.squeeze(x, 1))(conv_layer7)

# Additional LSTM layers
blstm_layer1 = Bidirectional(LSTM(128, return_sequences=True, dropout=0.2))(squeezed_layer)
blstm_layer2 = Bidirectional(LSTM(128, return_sequences=True, dropout=0.2))(blstm_layer1)
blstm_layer3 = Bidirectional(LSTM(128, return_sequences=True, dropout=0.2))(blstm_layer2)
blstm_layer4 = Bidirectional(LSTM(128, return_sequences=True, dropout=0.2))(blstm_layer3)

output_layer = Dense(len(characters) + 1, activation='softmax')(blstm_layer4)

# model to be used at test time
model = Model(input_layer, output_layer)

model.summary()

In [None]:
labels = Input(name='the_labels', shape=[max_label_len], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')

def ctc_lambda_func(args):
    y_pred, labels, input_length, label_length = args
    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)

loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([output_layer, labels, input_length, label_length])

def display_image_and_prediction(x, y):
    model.load_weights(os.path.join(save_path, 'best_model.hdf5'))
    model.save(os.path.join(save_path, 'act_model.hdf5'))
    prediction = model.predict(x.reshape(1, 32, 128, 1))
    out = K.get_value(K.ctc_decode(prediction, input_length=np.ones(prediction.shape[0]) * prediction.shape[1],
                                   greedy=True)[0][0])
    x = x.reshape(32, 128)
    plt.title('Input Image')
    plt.imshow(x)
    plt.axis('off')
    plt.show()
    for x in out:
        print("predicted text = ", end='')
        for p in x:
            if int(p) != -1:
                print(characters[int(p)], end='')
        print('\n')


class DisplayCallback(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        global model
        model.save(os.path.join(save_path, 'model.h5'))
        i = np.random.randint(300000)
        display_image_and_prediction(images[i], padded_texts[i])

In [None]:
%xmode Verbose

batch_size = 1026
epochs = 20

callbacks_list = [checkpoint, DisplayCallback()]

model.fit(x=[images, padded_texts, input_lengths, label_lengths],
          y=np.zeros(len(images)),
          batch_size=batch_size,
          epochs=epochs,
          validation_split=0.08,
          verbose=1,
          callbacks=callbacks_list)

model.save(os.path.join(save_path, 'best_model.hdf5'))