In [1]:
import os
from config import config

CODA_DLL_PATH = config["preprocessing"]["coda_dll_path"]
H5_FILE = config["main"]["h5_file"]
CLASSES = config.get_classes()
IMG_SIZE = int(config["main"]["img_size"])

os.add_dll_directory(
    CODA_DLL_PATH
)  # https://github.com/tensorflow/tensorflow/issues/48868#issuecomment-841396124



import preprocessing
from plogging import logger

orig_dataset = preprocessing.create_dataset(H5_FILE, rotation=True, augment=True, augment_cycles=30, save=True)
dataset = orig_dataset.copy()

logger.info(CLASSES)
logger.info(dataset["font"].value_counts())
logger.info(dataset.head())

[18:47:04] Create dataset started [h5_file=SynthText.h5]


your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block0_values] [items->Index(['img', 'font', 'char', 'word', 'img_name'], dtype='object')]

  pytables.to_hdf(


[19:34:41] Function 'create_dataset' executed in 2856.4813s
[19:34:41] ['Ubuntu Mono', 'Alex Brush', 'Raleway', 'Roboto', 'Russo One', 'Open Sans', 'Michroma']
[19:34:41] Ubuntu Mono    155279
Alex Brush     135253
Raleway        133331
Roboto         132184
Russo One      123814
Open Sans      112747
Michroma        81499
Name: font, dtype: int64
[19:34:41]                                                  img       font char word  \
0  [[71, 71, 71, 71, 74, 91, 105, 116, 122, 122, ...  Open Sans    o  on,   
1  [[105, 105, 105, 105, 105, 105, 105, 105, 105,...  Open Sans    o  on,   
2  [[54, 55, 57, 58, 58, 57, 56, 55, 56, 57, 59, ...  Open Sans    o  on,   
3  [[129, 124, 117, 106, 94, 83, 80, 78, 75, 72, ...  Open Sans    o  on,   
4  [[58, 118, 141, 141, 141, 141, 141, 141, 141, ...  Open Sans    o  on,   

             img_name  
0  ant+hill_102.jpg_0  
1  ant+hill_102.jpg_0  
2  ant+hill_102.jpg_0  
3  ant+hill_102.jpg_0  
4  ant+hill_102.jpg_0  


In [2]:
# load additional datasets
# import pandas as pd

# DATASETS = ["generated_1641113062.h5"]
# a_dataset = pd.DataFrame()
# for file_ in DATASETS:
#     dataset_ = pd.read_hdf(file_)
#     a_dataset = pd.concat([a_dataset, dataset_])
# dataset = pd.concat([dataset, a_dataset])
# logger.info("With additional datasets:")
# logger.info(dataset["font"].value_counts())

In [3]:
# add additional images

dataset = dataset.sample(frac=1).reset_index(drop=True) # shuffle

In [4]:
import numpy as np
import tensorflow as tf

X = dataset.drop(columns=["font"])
Y = np.array(dataset["font"].apply(lambda s : CLASSES.index(s)))

CAT_CLASSES = tf.keras.utils.to_categorical(np.unique(Y))

f_ = lambda i : CAT_CLASSES[i]

Y = f_(Y)


In [5]:
import metrics
import vote

def log_stats(y_test, y_pred):
    recall = tf.keras.metrics.Recall()
    recall.update_state(y_test, y_pred)
    precision = tf.keras.metrics.Precision()
    precision.update_state(y_test, y_pred)
    auc = tf.keras.metrics.AUC()
    auc.update_state(y_test, y_pred)
    acc = tf.keras.metrics.CategoricalAccuracy()
    acc.update_state(y_test, y_pred)

    print(f"Accuracy: {acc.result().numpy()}")
    print(f"Recall: {recall.result().numpy()}")
    print(f"Precision: {precision.result().numpy()}")
    print(f"AUC: {auc.result().numpy()}")

def eval_model(history,x_test, y_test, y_pred, classes, save=True):
    logger.info("Model stats:")
    metrics.plot_acc(history, save=save)
    metrics.plot_loss(history, save=save)
    
    
    log_stats(y_test, y_pred)
    metrics.plot_roc(y_test, y_pred, CLASSES, zoom=False, save=save)
    metrics.plot_confusion_matrix(y_test, y_pred, classes, save=save)
    
    logger.info("After votes:")
    y_pred_ = vote.vote(x_test, y_pred)
    log_stats(y_test, y_pred_)
    metrics.plot_roc(y_test, y_pred_, CLASSES, zoom=False, save=save)
    metrics.plot_confusion_matrix(y_test, y_pred_, classes, save=save)

    logger.info("After votes v2:")
    y_pred_v2 = vote.vote(x_test, y_pred)
    log_stats(y_test, y_pred_v2)
    metrics.plot_roc(y_test, y_pred_v2, CLASSES, zoom=False, save=save)
    metrics.plot_confusion_matrix(y_test, y_pred_v2, classes, save=save)

    return y_pred_





In [6]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential

x_train, x_test, y_train, y_test = train_test_split(X, Y, random_state=0, test_size=0.1)
#metrics_ = ['accuracy', tf.keras.metrics.CategoricalAccuracy(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), tf.keras.metrics.AUC()] #interesting metrics
metrics_ = ["accuracy"]

x_train_norm = np.array(x_train["img"].to_list())/255 # normalize
x_test_norm = np.array(x_test["img"].to_list())/255 # normalize

logger.info(f"Train samples size [x_train={len(x_train)}, x_test={len(x_test)}]")

callbacks = [tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=5)]

model = Sequential([
    tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 1)),
    tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D((2,2)),
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2,2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(7, activation='softmax')
])

model.compile(optimizer='adam', metrics=metrics_, loss='categorical_crossentropy')
model.summary()
history = model.fit(x_train_norm, y_train, epochs=25, validation_data=(x_test_norm, y_test), verbose=1, callbacks=callbacks)
y_pred = model.predict(x_test_norm)


[19:34:55] Train samples size [x_train=786696, x_test=87411]
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 30, 30, 32)        320       
                                                                 
 conv2d_1 (Conv2D)           (None, 28, 28, 32)        9248      
                                                                 
 batch_normalization (BatchN  (None, 28, 28, 32)       128       
 ormalization)                                                   
                                                                 
 max_pooling2d (MaxPooling2D  (None, 14, 14, 32)       0         
 )                                                               
                                                                 
 conv2d_2 (Conv2D)           (None, 12, 12, 64)        18496     
                                                             

In [None]:
y_pred_ = eval_model(history,x_test,y_test, y_pred, CLASSES, save=True)

In [None]:
# plots a sample of model errors
import matplotlib.pyplot as plt

predict = np.argmax(y_pred_, axis=1)
true_y_val = np.argmax(y_test, axis=1)
errors = np.flatnonzero(predict != true_y_val)
c_generated = 0

for i in np.random.choice(errors, 10):
    x_ = x_test.iloc[i, :]
    plt.imshow(x_["img"], cmap="gray")
    plt.show()
    logger.info("Char: {}".format(x_["char"]))
    logger.info("Image: {}".format(x_["img_name"]))
    logger.info("Predicted label: {}".format(CLASSES[predict[i]]))
    logger.info("True label: {}".format(CLASSES[true_y_val[i]]))