In [None]:
from keras.preprocessing.sequence import pad_sequences
import os
import fnmatch
import cv2
import numpy as np
import string
import sys

In [None]:
# char_list:   'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
# total number of our output classes: len(char_list)
characters = string.ascii_letters + string.digits

def encode_to_labels(text):
    # encoding each output word into digits
    return [characters.index(char) for char in text if char in characters]

In [None]:
data_path = r'C:\Users\twalunjk\Downloads\mjsynth\mnt\ramdisk\max\90kDICT32px'

In [None]:
# lists for training dataset
images = np.zeros((390000, 32, 128, 1), dtype='uint8')
texts = []
input_lengths = np.full((390000,), 31, dtype='uint8')
label_lengths = np.zeros((390000,), dtype='uint8')

max_text_len = 0

for i, (root, _, filenames) in enumerate(os.walk(data_path)):
    for filename in fnmatch.filter(filenames, '*.jpg'):
        try:
            image = cv2.cvtColor(cv2.imread(os.path.join(root, filename)), cv2.COLOR_BGR2GRAY)

            # convert each image of shape (32, 128, 1)
            width, height = image.shape
            if height > 128 or width > 32:
                continue

            if width < 32:
                image = np.vstack((image, np.full((32-width, height), 255)))

            if height < 128:
                image = np.hstack((image, np.full((32, 128-height), 255)))

            image = np.expand_dims(image, axis=2)

            # get the text from the image
            text = filename.split('_')[1]

            # compute maximum length of the text
            max_text_len = max(max_text_len, len(text))

            if len(text) == 0:
                print(text)
                continue

            label_lengths[i] = len(text)
            images[i] = image
            texts.append(encode_to_labels(text))

            sys.stdout.write('\r' + str(i) + ' ' + str(np.count_nonzero(label_lengths)))

            # break the loop if total data is 150000
            if i == 389999:
                break
        except Exception as e:
            print('\n', os.path.join(root, filename), str(e))

    if i == 389999:
        break

In [None]:
# pad each output label to maximum text length
padded_texts = pad_sequences(texts, maxlen=max_text_len, padding='post', value=len(characters))

In [None]:
print(label_lengths.shape)
print(input_lengths.shape)
print(images.shape)
print(padded_texts.shape)

In [None]:
save_path = r'C:\Users\twalunjk\Downloads\DL\save'
np.save(os.path.join(save_path, 'label_lengths.npy'), label_lengths)
np.save(os.path.join(save_path, 'input_lengths.npy'), input_lengths)
np.save(os.path.join(save_path, 'images.npy'), images)
np.save(os.path.join(save_path, 'padded_texts.npy'), padded_texts)
np.save(os.path.join(save_path, 'max_text_len.npy'), max_text_len)

Run the following after CRNN

In [None]:
import matplotlib.pyplot as plt
from keras.layers import Dense, LSTM, Reshape, BatchNormalization, Input, Conv2D, MaxPool2D, Lambda, Bidirectional, Dropout
from keras.models import Model
from keras.activations import relu, sigmoid, softmax
import keras.backend as K
from keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint
import keras
import numpy as np
from IPython.display import display, Image

In [None]:
input_layer = Input(shape=(32,128,1))
normalized_inputs = Lambda(lambda x: x / 255) (input_layer)

conv_layer1 = Conv2D(16, (3,3), activation = 'relu', kernel_initializer='he_normal' ,padding='same')(normalized_inputs)
conv_layer1 = Dropout(0.25)(conv_layer1)
conv_layer1 = Conv2D(32, (3,3), activation = 'relu', kernel_initializer='he_normal' ,padding='same')(conv_layer1)
pool_layer1 = MaxPool2D(pool_size=(2, 2), strides=2)(conv_layer1)

conv_layer2 = Conv2D(32, (3,3), activation = 'relu',kernel_initializer='he_normal' , padding='same')(pool_layer1)
conv_layer2 = BatchNormalization(axis=-1)(conv_layer2)
conv_layer2 = Dropout(0.25)(conv_layer2)
conv_layer2 = Conv2D(32, (3,3), activation = 'relu', kernel_initializer='he_normal' ,padding='same')(conv_layer2)
pool_layer2 = MaxPool2D(pool_size=(2, 2), strides=2)(conv_layer2)

conv_layer3 = Conv2D(32, (3,3), activation = 'relu', kernel_initializer='he_normal' ,padding='same')(pool_layer2)
conv_layer3 = BatchNormalization(axis=-1)(conv_layer3)
conv_layer3 = Dropout(0.25)(conv_layer3)
conv_layer3 = Conv2D(32, (3,3), activation = 'relu', kernel_initializer='he_normal' ,padding='same')(conv_layer3)
conv_layer4 = Conv2D(64, (3,3), activation = 'relu', kernel_initializer='he_normal' ,padding='same')(conv_layer3)
pool_layer4 = MaxPool2D(pool_size=(2, 1))(conv_layer4)

conv_layer5 = Conv2D(256, (3,3), activation = 'relu',kernel_initializer='he_normal' , padding='same')(pool_layer4)
batch_norm_layer5 = BatchNormalization()(conv_layer5)

conv_layer6 = Conv2D(256, (3,3), activation = 'relu',kernel_initializer='he_normal' , padding='same')(batch_norm_layer5)
batch_norm_layer6 = BatchNormalization()(conv_layer6)
pool_layer6 = MaxPool2D(pool_size=(2, 1))(batch_norm_layer6)

conv_layer7 = Conv2D(512, (2,2), activation = 'relu')(pool_layer6)

squeezed_layer = Lambda(lambda x: K.squeeze(x, 1))(conv_layer7)

blstm_layer1 = Bidirectional(LSTM(128, return_sequences=True, dropout = 0.2))(squeezed_layer)
blstm_layer2 = Bidirectional(LSTM(128, return_sequences=True, dropout = 0.2))(blstm_layer1)

output_layer = Dense(62+1, activation = 'softmax')(blstm_layer2)

# model to be used at test time
model = Model(input_layer, output_layer)

model.summary()

In [None]:
def display_image_and_prediction(x,y):

  # load the saved best model weights
  model.load_weights(r'C:\Users\twalunjk\Downloads\DL\save\best_model.hdf5')
  prediction = model.predict(x.reshape(1,32,128,1))

  # use CTC decoder
  out = K.get_value(K.ctc_decode(prediction, input_length=np.ones(prediction.shape[0])*prediction.shape[1],
                         greedy=True)[0][0])
  x = x.reshape(32,128)
  plt.title('Input Image')
  plt.imshow(x)
  plt.axis('off')
  plt.show()

  # see the results
  for x in out:
      print("predicted text = ", end = '')
      for p in x:
          if int(p) != -1:
              print(char_list[int(p)], end = '')
      print('\n')

In [None]:
for _ in range(8):
  i = np.random.randint(300000)
  display_image_and_prediction(training_img[i],train_padded_txt[i])