# Setup

In [None]:
from google.colab import drive
drive.mount("/content/drive")

In [None]:
!git clone https://github.com/rois-codh/kmnist.git

In [None]:
%cd kmnist

In [None]:
# Use Kuzushiji-49 and Kuzushiji-Kanji
for i in range(2):
    !python download_data.py

In [5]:
import numpy as np
import tensorflow as tf
from google.colab.patches import cv2_imshow
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import SGD
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from imutils import build_montages
from PIL import Image
import matplotlib.pyplot as plt
import argparse
import cv2
import matplotlib
import os
import tarfile
matplotlib.use("Agg")

# Prepare Dataset

In [6]:
tar = tarfile.open("kkanji.tar")
tar.extractall()
tar.close()

In [7]:
kanji_label_dict = {}
index = 49

for folder in os.listdir("./kkanji2"):
    label = chr(int(folder[2:], 16)) # convert U+____ into Japanese characters
    kanji_label_dict[label] = index
    index += 1

In [None]:
%cd /content/drive/MyDrive/NRP/Project/OCRData

In [9]:
def open_kanji_data(filename: str):
    with open(filename, "rb") as data_dir:
        np_data = np.load(data_dir)["arr_0"]
        length = len(np_data)
        start, end = 0, 50

        data = [cv2.resize(image, (32, 32)) for image in np_data[start:end]]

        while end < length:
            start = end
            end += 50
            if end > length:
                small_data = [cv2.resize(image, (32, 32)) for image in np_data[start:]]
            else:
                small_data = [cv2.resize(image, (32, 32)) for image in np_data[start:end]]

        data = np.array(data, dtype="float32")
        data = np.expand_dims(data, axis=-1)
        data /= 255.0
        return data

In [10]:
def open_kanji_labels(filename: str):
    with open(filename, "rb") as labels_dir:
        labels = np.load(labels_dir)["arr_0"]
        return labels

In [11]:
kanji_one = open_kanji_data("kanji_one.npz")
kanji_two = open_kanji_data("kanji_two.npz")
kanji_three = open_kanji_data("kanji_three.npz")

In [12]:
kanji_labels_one = open_kanji_labels("kanji_labels_one.npz")
kanji_labels_two = open_kanji_labels("kanji_labels_two.npz")
kanji_labels_three = open_kanji_labels("kanji_labels_three.npz")

In [None]:
import csv
hiragana_label_dict = {}

with open("k49_classmap.csv", "r") as file:
    csvreader = csv.reader(file)
    header = next(csvreader)
    for row in csvreader:
        hiragana_label_dict[row[2]] = row[0]

In [14]:
def load_49_dataset():
    TRAIN_DATA = "/content/kmnist/k49-train-imgs.npz"
    TRAIN_LABELS = "/content/kmnist/k49-train-labels.npz"
    TEST_DATA = "/content/kmnist/k49-test-imgs.npz"
    TEST_LABELS = "/content/kmnist/k49-test-labels.npz"

    train_data = np.load(TRAIN_DATA)["arr_0"]
    train_labels = np.load(TRAIN_LABELS)["arr_0"]
    test_data = np.load(TEST_DATA)["arr_0"]
    test_labels = np.load(TEST_LABELS)["arr_0"]

    data = np.vstack([train_data, test_data])
    labels = np.hstack([train_labels, test_labels])
    return (data, labels)

In [15]:
data_49, labels_49 = load_49_dataset()

data_49 = [cv2.resize(image, (32, 32)) for image in data_49]
data_49 = np.array(data_49, dtype="float32")

data_49 = np.expand_dims(data_49, axis=-1)
data_49 /= 255.0

In [16]:
data = np.vstack([data_49, kanji_one, kanji_two, kanji_three])
labels = np.hstack([labels_49, kanji_labels_one, kanji_labels_two, kanji_labels_three])

In [None]:
le = LabelBinarizer()
labels = le.fit_transform(labels)
counts = labels.sum(axis=0)

class_totals = labels.sum(axis=0)
class_weight = {}

for i in range(0, len(class_totals)):
    class_weight[i] = class_totals.max() / class_totals[i]

train_x, test_x, train_y, test_y = train_test_split(data, labels, test_size=0.20, stratify=labels, random_state=42)

In [None]:
aug = ImageDataGenerator(
  rotation_range=10,
  zoom_range=0.05,
  width_shift_range=0.1,
  height_shift_range=0.1,
  shear_range=0.15,
  horizontal_flip=False,
  fill_mode="nearest")

# Train Model

In [None]:
# Settings
EPOCHS = 50
INIT_LR = 1e-1
BS = 128

In [None]:
model = tf.keras.applications.resnet50.ResNet50(input_shape=(32, 32, 1), weights=None, classes=3881)

In [None]:
opt = SGD(learning_rate=INIT_LR, decay=INIT_LR/EPOCHS)
model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"])

In [None]:
H = model.fit(
    aug.flow(train_x, train_y, batch_size=BS),
    validation_data=(test_x, test_y),
    steps_per_epoch=len(train_x)//BS,
    epochs=EPOCHS,
    class_weight=class_weight,
    verbose=1
    )

In [None]:
%cd /content/

In [None]:
model.save("manga_ocr.h5")

# Evaluate Model

In [None]:
label_dict = {}
label_dict.update(hiragana_label_dict)
label_dict.update(kanji_label_dict)

label_names = [str(index) for index in label_dict]

In [None]:
predictions = model.predict(test_x, batch_size=BS)
print(classification_report(test_y.argmax(axis=1), predictions.argmax(axis=1), target_names=label_names))

# Analyse Model

In [None]:
images = []

for i in np.random.choice(np.arange(0, len(test_y)), size=(49,)):
    probs = model.predict(test_x[np.newaxis, i])
    prediction = probs.argmax(axis=1)
    label = label_names[prediction[0]]
    image = (test_x[i]*255).astype("uint8")
    color = (0, 255, 0)

    if prediction[0] != np.argmax(test_y[i]):
        color = (0, 0, 255)

    image = cv2.merge([image] * 3)
    image = cv2.resize(image, (96, 96), interpolation=cv2.INTER_LINEAR)
    cv2.putText(image, label, (5, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.75, color, 2)
    images.append(image)

montage = build_montages(images, (96, 96), (7, 7))[0]

cv2_imshow(montage)
cv2.waitKey(0)