# Setup

In [None]:
!git clone https://github.com/rois-codh/kmnist.git

In [None]:
%cd kmnist

In [None]:
!python download_data.py

In [4]:
import numpy as np
import tensorflow as tf

# Prepare Dataset

In [5]:
from google.colab.patches import cv2_imshow
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import SGD
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from imutils import build_montages
import matplotlib.pyplot as plt
import argparse
import cv2
import matplotlib
matplotlib.use("Agg")

In [6]:
def load_mnist_dataset():
  TRAIN_DATA = "/content/kmnist/kmnist-train-imgs.npz"
  TRAIN_LABELS = "/content/kmnist/kmnist-train-labels.npz"
  TEST_DATA = "/content/kmnist/kmnist-test-imgs.npz"
  TEST_LABELS = "/content/kmnist/kmnist-test-labels.npz"

  train_data = np.load(TRAIN_DATA)["arr_0"]
  train_labels = np.load(TRAIN_LABELS)["arr_0"]
  test_data = np.load(TEST_DATA)["arr_0"]
  test_labels = np.load(TEST_LABELS)["arr_0"]
  
  data = np.vstack([train_data, test_data])
  labels = np.hstack([train_labels, test_labels])
  return (data, labels)

In [10]:
data, labels = load_mnist_dataset()
data = [cv2.resize(image, (32, 32)) for image in data]
data = np.array(data, dtype="float32")

data = np.expand_dims(data, axis=-1)
data /= 255.0

In [11]:
le = LabelBinarizer()
labels = le.fit_transform(labels)
counts = labels.sum(axis=0)

classTotals = labels.sum(axis=0)
classWeight = {}

for i in range(0, len(classTotals)):
  classWeight[i] = classTotals.max() / classTotals[i]

trainX, testX, trainY, testY = train_test_split(data, labels, test_size=0.20, stratify=labels, random_state=42)

In [12]:
aug = ImageDataGenerator(
  rotation_range=10,
  zoom_range=0.05,
  width_shift_range=0.1,
  height_shift_range=0.1,
  shear_range=0.15,
  horizontal_flip=False,
  fill_mode="nearest")

# Train Model

In [9]:
# Settings
EPOCHS = 50
INIT_LR = 1e-1
BS = 128

In [13]:
model = tf.keras.applications.resnet50.ResNet50(input_shape=(32, 32, 1), weights=None, classes=10)

In [14]:
opt = SGD(learning_rate=INIT_LR, decay=INIT_LR / EPOCHS)
model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"])

In [None]:
H = model.fit(
  aug.flow(trainX, trainY, batch_size=BS),
  validation_data=(testX, testY),
  steps_per_epoch=len(trainX)//BS,
  epochs=EPOCHS,
  class_weight=classWeight,
  verbose=1)

In [None]:
%cd /content/

In [None]:
model.save("manga_ocr.h5")

In [45]:
label_dict = {0: "お",
               1: "き",
               2: "す",
               3: "つ",
               4: "な",
               5: "は",
               6: "ま",
               7: "や",
               8: "れ",
               9: "を",}

label_names = [str(index) for index in label_dict]

# Evaluate Model

In [None]:
predictions = model.predict(testX, batch_size=BS)
print(classification_report(testY.argmax(axis=1), predictions.argmax(axis=1), target_names=label_names))

# Analyse Model

In [None]:
images = []

for i in np.random.choice(np.arange(0, len(testY)), size=(49,)):
  probs = model.predict(testX[np.newaxis, i])
  prediction = probs.argmax(axis=1)
  label = label_names[prediction[0]]
  image = (testX[i]*255).astype("uint8")
  color = (0, 255, 0)

  if prediction[0] != np.argmax(testY[i]):
    color = (0, 0, 255)

  image = cv2.merge([image] * 3)
  image = cv2.resize(image, (96, 96), interpolation=cv2.INTER_LINEAR)
  cv2.putText(image, label, (5, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.75, color, 2)
  images.append(image)

montage = build_montages(images, (96, 96), (7, 7))[0]

cv2_imshow(montage)
cv2.waitKey(0)

Reference

https://www.pyimagesearch.com/2020/08/17/ocr-with-keras-tensorflow-and-deep-learning/