In [None]:
import os
from config import config

CODA_DLL_PATH = config["preprocessing"]["coda_dll_path"]
H5_FILE = config["main"]["h5_file"]
CLASSES = config.get_classes()
IMG_SIZE = int(config["main"]["img_size"])

os.add_dll_directory(
    CODA_DLL_PATH
)  # https://github.com/tensorflow/tensorflow/issues/48868#issuecomment-841396124

# to test:
# test 15,30,50 augment images (v1,v2)
# play with the network structure

import pandas as pd
import preprocessing
from augment import augment_image_v2
from plogging import logger

x_train, x_test, y_train, y_test = preprocessing.create_dataset(H5_FILE, rotation=True, augment=True, augment_cycles=5, save=True, augment_method=augment_image_v2)

logger.info(x_train.head())
logger.info(x_test.head())

In [None]:
# load additional datasets
# def extract(file):
#     data = pd.read_hdf(file, key="db")
#     Y = data["font"]
#     X = data.drop(column=["font"])
#     return X,Y


# TRAIN_FILE = ""
# x_train, y_train = extract(TRAIN_FILE)
# # find corresponding test data

# TEST_FILE = TRAIN_FILE.replace("train", "test")
# x_test, y_test = extract(TEST_FILE)




In [None]:
import numpy as np
import tensorflow as tf

CAT_CLASSES = tf.keras.utils.to_categorical(range(len(CLASSES)))
to_cat = lambda i : CAT_CLASSES[i] 

y_train = to_cat(y_train)
y_test = to_cat(y_test)


In [None]:
import metrics
import vote

def log_stats(y_test, y_pred, save=True, file_path="stats"):
    recall = tf.keras.metrics.Recall()
    recall.update_state(y_test, y_pred)
    precision = tf.keras.metrics.Precision()
    precision.update_state(y_test, y_pred)
    auc = tf.keras.metrics.AUC()
    auc.update_state(y_test, y_pred)
    acc = tf.keras.metrics.CategoricalAccuracy()
    acc.update_state(y_test, y_pred)

    print(f"Accuracy: {acc.result().numpy()}")
    print(f"Recall: {recall.result().numpy()}")
    print(f"Precision: {precision.result().numpy()}")
    print(f"AUC: {auc.result().numpy()}")
    
    if save:
        with open(file_path, "w") as f:
            print(f"Accuracy: {acc.result().numpy()}", file=f)
            print(f"Recall: {recall.result().numpy()}",file=f )
            print(f"Precision: {precision.result().numpy()}",file=f)
            print(f"AUC: {auc.result().numpy()}", file=f)

def eval_model(history,x_test, y_test, y_pred, classes, save=True):
    logger.info("Model stats:")
    metrics.plot_acc(history, save=save)
    metrics.plot_loss(history, save=save)
    
    
    log_stats(y_test, y_pred, save=save, file_path="metrics/stats_before_votes.txt")
    metrics.plot_roc(y_test, y_pred, CLASSES, zoom=False, save=save)
    metrics.plot_confusion_matrix(y_test, y_pred, classes, save=save)
    
    logger.info("After votes:")
    y_pred_ = vote.vote(x_test, y_pred)
    log_stats(y_test, y_pred_, save=save, file_path="metrics/stats_aftervotes.txt")
    metrics.plot_roc(y_test, y_pred_, CLASSES, zoom=False, save=save)
    metrics.plot_confusion_matrix(y_test, y_pred_, classes, save=save)
    return y_pred_





In [None]:
from tensorflow.keras.models import Sequential

metrics_ = ["accuracy"]

x_train_norm = np.array(x_train["img"].to_list())/255 # normalize
x_test_norm = np.array(x_test["img"].to_list())/255 # normalize

logger.info(f"Train samples size [x_train={len(x_train)}, x_test={len(x_test)}]")

callbacks = [tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=5)] # stop if we don't get better after 5 epochs

model = Sequential([
    tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 1)),
    tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D((2,2)),
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2,2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(1024, activation='relu'),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(7, activation='softmax')
])

model.compile(optimizer='adam', metrics=metrics_, loss='categorical_crossentropy')
model.summary()
history = model.fit(x_train_norm, y_train, epochs=40, validation_data=(x_test_norm, y_test), verbose=1, callbacks=callbacks)
y_pred = model.predict(x_test_norm)


In [None]:
y_pred_ = eval_model(history,x_test,y_test, y_pred, CLASSES, save=True)

In [None]:
# plots a sample of model errors
import matplotlib.pyplot as plt

predict = np.argmax(y_pred_, axis=1)
true_y_val = np.argmax(y_test, axis=1)
errors = np.flatnonzero(predict != true_y_val)
c_generated = 0

for i in np.random.choice(errors, 10):
    x_ = x_test.iloc[i, :]
    plt.imshow(x_["img"], cmap="gray")
    plt.show()
    logger.info("Char: {}".format(x_["char"]))
    logger.info("Image: {}".format(x_["img_name"]))
    logger.info("Predicted label: {}".format(CLASSES[predict[i]]))
    logger.info("True label: {}".format(CLASSES[true_y_val[i]]))

In [None]:
# save model
# MODEL_NAME = "model_0301_963_after_votes_augment_v1_874107r"
# model.save(f"models/{MODEL_NAME}")