# Setup

Train in [Kaggle](https://www.kaggle.com/xyntechx/manga-ocr/edit)

In [None]:
from google.colab import drive
drive.mount("/content/drive")

In [None]:
!git clone https://github.com/rois-codh/kmnist.git

In [None]:
%cd kmnist

In [None]:
# Download Kuzushiji-49
!python download_data.py

In [None]:
import numpy as np
import tensorflow as tf
from google.colab.patches import cv2_imshow
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import SGD
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import imutils
from imutils import build_montages
from imutils.contours import sort_contours
import cv2
import matplotlib
matplotlib.use("Agg")

In [None]:
%cd /content/drive/MyDrive/NRP/Project/OCRData

# Prepare Dataset

In [None]:
def load_hiragana_dataset():
    TRAIN_DATA = "/content/kmnist/k49-train-imgs.npz"
    TRAIN_LABELS = "/content/kmnist/k49-train-labels.npz"
    TEST_DATA = "/content/kmnist/k49-test-imgs.npz"
    TEST_LABELS = "/content/kmnist/k49-test-labels.npz"

    train_data = np.load(TRAIN_DATA)["arr_0"]
    train_labels = np.load(TRAIN_LABELS)["arr_0"]
    test_data = np.load(TEST_DATA)["arr_0"]
    test_labels = np.load(TEST_LABELS)["arr_0"]

    data = np.vstack([train_data, test_data])
    data = [cv2.resize(image, (32, 32)) for image in data]
    data = np.array(data, dtype="float32")
    data = np.expand_dims(data, axis=-1)
    data /= 255.0

    labels = np.hstack([train_labels, test_labels])

    return data, labels

In [None]:
def load_kanji_dataset():
    TRAIN_DATA = "/content/drive/MyDrive/NRP/Project/OCRData/kuzushiji50_train_imgs.npy"
    TRAIN_LABELS = "/content/drive/MyDrive/NRP/Project/OCRData/kuzushiji50_train_labels.npy"
    TEST_DATA = "/content/drive/MyDrive/NRP/Project/OCRData/kuzushiji50_test_imgs.npy"
    TEST_LABELS = "/content/drive/MyDrive/NRP/Project/OCRData/kuzushiji50_test_labels.npy"

    train_data = np.load(TRAIN_DATA)
    train_labels = np.load(TRAIN_LABELS)
    test_data = np.load(TEST_DATA)
    test_labels = np.load(TEST_LABELS)

    data = np.vstack([train_data, test_data])
    data = [cv2.resize(image, (32, 32)) for image in data]
    data = np.array(data, dtype="float32")
    data = np.expand_dims(data, axis=-1)
    data /= 255.0

    labels = np.hstack([train_labels, test_labels])
    labels = [i+49 for i in labels]
    labels = np.array(labels, dtype="int")

    return data, labels

In [None]:
data_hiragana, labels_hiragana = load_hiragana_dataset()
data_kanji, labels_kanji = load_kanji_dataset()

data = np.vstack([data_hiragana, data_kanji])
labels = np.hstack([labels_hiragana, labels_kanji])

In [None]:
le = LabelBinarizer()
labels = le.fit_transform(labels)
counts = labels.sum(axis=0)

class_totals = labels.sum(axis=0)
class_weight = {}

for i in range(0, len(class_totals)):
    class_weight[i] = class_totals.max() / class_totals[i]

train_x, test_x, train_y, test_y = train_test_split(data, labels, test_size=0.20, stratify=labels, random_state=42)

In [None]:
aug = ImageDataGenerator(
    rotation_range=10,
    zoom_range=0.05,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.15,
    horizontal_flip=False,
    fill_mode="nearest"
)

# Train Model

In [None]:
# Settings
EPOCHS = 50
INIT_LR = 1e-1
BS = 128

In [None]:
model = tf.keras.applications.resnet50.ResNet50(input_shape=(32, 32, 1), weights=None, classes=99)

In [None]:
opt = SGD(learning_rate=INIT_LR, decay=INIT_LR/EPOCHS)
model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"])

In [None]:
H = model.fit(
    aug.flow(train_x, train_y, batch_size=BS),
    validation_data=(test_x, test_y),
    steps_per_epoch=len(train_x)//BS,
    epochs=EPOCHS,
    class_weight=class_weight,
    verbose=1
)

In [None]:
%cd /content/

In [None]:
model.save("manga_ocr.h5")

# Evaluate Model

In [None]:
model = tf.keras.applications.resnet50.ResNet50(input_shape=(32, 32, 1), weights=None, classes=99)
model.load_weights("/content/drive/MyDrive/NRP/Project/Working/OCR/kanji_hiragana.h5")
label_names = [str(index) for index in range(99)]
predictions = model.predict(test_x, batch_size=BS)

print(classification_report(test_y.argmax(axis=1), predictions.argmax(axis=1), target_names=label_names))

# Analyse Model

In [None]:
images = []

for i in np.random.choice(np.arange(0, len(test_y)), size=(49,)):
    probs = model.predict(test_x[np.newaxis, i])
    prediction = probs.argmax(axis=1)
    label = label_names[prediction[0]]
    image = (test_x[i]*255).astype("uint8")
    color = (0, 255, 0)

    if prediction[0] != np.argmax(test_y[i]):
        color = (0, 0, 255)

    image = cv2.merge([image] * 3)
    image = cv2.resize(image, (96, 96), interpolation=cv2.INTER_LINEAR)
    cv2.putText(image, label, (5, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.75, color, 2)
    images.append(image)

montage = build_montages(images, (96, 96), (7, 7))[0]

cv2_imshow(montage)
cv2.waitKey(0)

# Use Model

In [None]:
image = cv2.imread("/content/sample.png")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 0)

edged = cv2.Canny(blurred, 30, 150)
cnts = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
cnts = sort_contours(cnts, method="left-to-right")[0]

chars = []

In [None]:
cv2_imshow(image)

In [None]:
for c in cnts:
	(x, y, w, h) = cv2.boundingRect(c)
 
	if (w >= 5 and w <= 150) and (h >= 15 and h <= 120):
		roi = gray[y:y + h, x:x + w]
		thresh = cv2.threshold(roi, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
		(tH, tW) = thresh.shape

		if tW > tH:
			thresh = imutils.resize(thresh, width=32)
		else:
			thresh = imutils.resize(thresh, height=32)
   
		(tH, tW) = thresh.shape
		dX = int(max(0, 32 - tW) / 2.0)
		dY = int(max(0, 32 - tH) / 2.0)
  
		padded = cv2.copyMakeBorder(
              thresh,
              top=dY,
              bottom=dY,
              left=dX,
              right=dX,
              borderType=cv2.BORDER_CONSTANT,
              value=(0, 0, 0)
        )
  
		padded = cv2.resize(padded, (32, 32))
		padded = padded.astype("float32") / 255.0
		padded = np.expand_dims(padded, axis=-1)
  
		chars.append((padded, (x, y, w, h)))

In [None]:
model = tf.keras.applications.resnet50.ResNet50(input_shape=(32, 32, 1), weights=None, classes=99)
model.load_weights("/content/drive/MyDrive/NRP/Project/Working/kanji_hiragana.h5")

In [None]:
boxes = [b[1] for b in chars]
chars = np.array([c[0] for c in chars], dtype="float32")
preds = model.predict(chars)
label_names = [str(index) for index in range(99)]

In [None]:
for pred, (x, y, w, h) in zip(preds, boxes):
	i = np.argmax(pred)
	prob = pred[i]
	label = label_names[i]

	print("Label:", label, "\tProbability:", prob * 100)
	cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
	cv2.putText(image, label, (x - 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 255, 0), 2)

cv2_imshow(image)
cv2.waitKey(0)