In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPooling2D, MaxPool2D
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
import os

from tensorflow.keras.preprocessing.image import ImageDataGenerator

import PIL
from PIL import Image

import numpy as np
import cv2
from tensorflow.keras import backend as K
from collections import Counter

In [2]:

alphabet = {'0':0,'1':1,'2':2,'3':3,'4':4,'5':5,'6':6,'7':7,'8':8,'9':9,
            'A':10,'B':11,'C':12,'D':13,
            'E':14,'F':15,'G':16,'H':17
            ,'I':18,'J':19,'K':20,'L':21,
            'M':22,'N':23,'O':24,'P':25,
            'Q':26,'R':27,'S':28,'T':29,
            'U':30,'V':31,'W':32,
            'X':33,'Y':34,'Z':35}


In [3]:
# Needed for transformation of character to number.
def char_to_num(char):
    num = alphabet[char]
    return num


# Needed for transformation of number to character.
def num_to_char(num):
    for key in alphabet:
        if alphabet[key] == num:
            return key


# Load file paths and labels them.
def load_chars74k_data(dir="Binary"):  #chars74k-complete
    filenames = []
    label_list = []

    for path, dirs, files in os.walk(dir):
        for file in files:
            if file.endswith('.png'): #.jpg for chars#74 lite
                file = path + '/' + file
                filenames.append(file)

                label = path[-1:]
                label_list.append(label)

    return filenames, label_list


# Creates the dataset.
def create_dataset(file_paths, label_set, with_denoising=False):
    data_x = []
    data_y = []

    for path in file_paths:
        #single_x = np.asarray((PIL.Image.open(path).convert('L')).resize((20,20))).flatten()
        single_x = np.asarray((PIL.Image.open(path).convert('L')))
        x=single_x.shape[0]
        y=single_x.shape[1]
        if x>y:
            z=np.zeros((x,x))
            z[0:x,0:y]=single_x
        else:
            z=np.zeros((y,y))
            z[0:x,0:y]=single_x
        single_x=(cv2.resize(z,(64,64))).flatten()
        
        # Denoise image with help of OpenCV (increase time of computing).
        if with_denoising:
            single_x = cv2.fastNlMeansDenoising(single_x).flatten()
        data_x.append(single_x)

    for l in label_set:
        l_to_num = char_to_num(l)
        data_y.append(l_to_num)

    np_data_x = np.array(data_x)
    np_data_y = np.array(data_y)
    return np_data_x, np_data_y


# # Use the Keras data generator to augment data.
# def create_datagenerator(x_train, x_test, y_train, y_test):
# #     train_datagen = ImageDataGenerator(
# #         rescale=1. / 255,
# #         rotation_range=0. / 180,
# #         vertical_flip=True)
#     pass
    

# #     test_datagen = ImageDataGenerator(rescale=1. / 255)

#     train_generator = train_datagen.flow(x=x_train, y=y_train)
#     validation_generator = test_datagen.flow(x=x_test, y=y_test)

#     return train_generator, validation_generator

In [4]:
print('total output classes',len(alphabet.keys()))
batch_size = 32
num_classes = len(alphabet.keys()) #62 for lite 
# num_classes = 26
epochs = 200
img_rows, img_cols = 64,64

total output classes 36


In [5]:
print('Start loading data.')
files, labels = load_chars74k_data()
X, y = create_dataset(files, labels)
print('Data has been loaded.')

x_train, x_test, y_train, y_test = train_test_split(X, y, random_state=2, train_size=0.8)


Start loading data.
Data has been loaded.


ValueError: With n_samples=0, test_size=None and train_size=0.8, the resulting train set will be empty. Adjust any of the aforementioned parameters.

In [None]:
if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

In [None]:
np.unique(y_train)

In [None]:
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)

In [6]:
from tensorflow.keras.callbacks import ModelCheckpoint
filepath="weights-improvement-{epoch:02d}-{val_accuracy:.2f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]

In [8]:
modelvgg=tf.keras.applications.vgg16.VGG16(include_top=False, weights=None, input_tensor=None, input_shape=(64, 64, 1))
f0=(Flatten())
d1=(Dense(256, activation='relu'))
d2=(Dense(36, activation='softmax'))

vggmodel = tf.keras.Sequential([
  modelvgg,
  f0,
  d1,d2
])
vggmodel.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 2, 2, 512)         14713536  
_________________________________________________________________
flatten_1 (Flatten)          (None, 2048)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 256)               524544    
_________________________________________________________________
dense_3 (Dense)              (None, 36)                9252      
Total params: 15,247,332
Trainable params: 15,247,332
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.fit(x_train,y_train,
                    steps_per_epoch=896 // batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_data=(x_test,y_test),
                    validation_steps=6000 // batch_size,
                   callbacks=callbacks_list)

In [None]:
score = model.evaluate(x_test, y_test, verbose=0)
print('Model has been trained.')
print('Test loss:', score[0])
print('Test accuracy:', score[1])