https://github.com/agoila/lisa-faster-R-CNN/blob/master/pyimagesearch/nn/conv/resnet.py

In [19]:
import tensorflow as tf

# import the necessary packages
# from keras.layers.normalization import batch_normalization
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import AveragePooling2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.convolutional import ZeroPadding2D
from keras.layers.core import Activation
from keras.layers.core import Dense
from keras.layers import Flatten
from keras.layers import Input
from keras.models import Model
from keras.layers import add
from keras.regularizers import l2
from keras import backend as K

class ResNet:
	@staticmethod
	def residual_module(data, K, stride, chanDim, red=False,
		reg=0.0001, bnEps=2e-5, bnMom=0.9):
		# the shortcut branch of the ResNet module should be
		# initialize as the input (identity) data
		shortcut = data

		# the first block of the ResNet module are the 1x1 CONVs
		bn1 = tf.keras.layers.BatchNormalization(axis=chanDim, epsilon=bnEps,
			momentum=bnMom)(data)
		act1 = Activation("relu")(bn1)
		conv1 = Conv2D(int(K * 0.25), (1, 1), use_bias=False,
			kernel_regularizer=l2(reg))(act1)

		# the second block of the ResNet module are the 3x3 CONVs
		bn2 = tf.keras.layers.BatchNormalization(axis=chanDim, epsilon=bnEps,
			momentum=bnMom)(conv1)
		act2 = Activation("relu")(bn2)
		conv2 = Conv2D(int(K * 0.25), (3, 3), strides=stride,
			padding="same", use_bias=False,
			kernel_regularizer=l2(reg))(act2)

		# the third block of the ResNet module is another set of 1x1
		# CONVs
		bn3 = tf.keras.layers.BatchNormalization(axis=chanDim, epsilon=bnEps,
			momentum=bnMom)(conv2)
		act3 = Activation("relu")(bn3)
		conv3 = Conv2D(K, (1, 1), use_bias=False,
			kernel_regularizer=l2(reg))(act3)

		# if we are to reduce the spatial size, apply a CONV layer to
		# the shortcut
		if red:
			shortcut = Conv2D(K, (1, 1), strides=stride,
				use_bias=False, kernel_regularizer=l2(reg))(act1)

		# add together the shortcut and the final CONV
		x = add([conv3, shortcut])

		# return the addition as the output of the ResNet module
		return x

	@staticmethod
	def build(width, height, depth, classes, stages, filters,
		reg=0.0001, bnEps=2e-5, bnMom=0.9, dataset="cifar"):
		# initialize the input shape to be "channels last" and the
		# channels dimension itself
		inputShape = (height, width, depth)
		chanDim = -1

		# if we are using "channels first", update the input shape
		# and channels dimension
		if K.image_data_format() == "channels_first":
			inputShape = (depth, height, width)
			chanDim = 1

		# set the input and apply BN
		inputs = Input(shape=inputShape)
		x = tf.keras.layers.BatchNormalization(axis=chanDim, epsilon=bnEps,
			momentum=bnMom)(inputs)

		# check if we are utilizing the CIFAR dataset
		if dataset == "cifar":
			# apply a single CONV layer
			x = Conv2D(filters[0], (3, 3), use_bias=False,
				padding="same", kernel_regularizer=l2(reg))(x)

		# check to see if we are using the Tiny ImageNet dataset
		elif dataset == "tiny_imagenet":
			# apply CONV => BN => ACT => POOL to reduce spatial size
			x = Conv2D(filters[0], (5, 5), use_bias=False,
				padding="same", kernel_regularizer=l2(reg))(x)
			x = tf.keras.layers.BatchNormalization(axis=chanDim, epsilon=bnEps,
				momentum=bnMom)(x)
			x = Activation("relu")(x)
			x = ZeroPadding2D((1, 1))(x)
			x = MaxPooling2D((3, 3), strides=(2, 2))(x)

		# loop over the number of stages
		for i in range(0, len(stages)):
			# initialize the stride, then apply a residual module
			# used to reduce the spatial size of the input volume
			stride = (1, 1) if i == 0 else (2, 2)
			x = ResNet.residual_module(x, filters[i + 1], stride,
				chanDim, red=True, bnEps=bnEps, bnMom=bnMom)

			# loop over the number of layers in the stage
			for j in range(0, stages[i] - 1):
				# apply a ResNet module
				x = ResNet.residual_module(x, filters[i + 1],
					(1, 1), chanDim, bnEps=bnEps, bnMom=bnMom)

		# apply BN => ACT => POOL
		x = tf.keras.layers.BatchNormalization(axis=chanDim, epsilon=bnEps,
			momentum=bnMom)(x)
		x = Activation("relu")(x)
		x = AveragePooling2D((8, 8))(x)

		# softmax classifier
		x = Flatten()(x)
		x = Dense(classes, kernel_regularizer=l2(reg))(x)
		x = Activation("softmax")(x)

		# create the model
		model = Model(inputs, x, name="resnet")

		# return the constructed network architecture
		return model

In [20]:
import cv2
from imutils import build_montages
from keras.datasets import mnist
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD
import matplotlib; matplotlib.use("Agg")
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# import tensorflow_datasets as tfds

# from models.pyimagesearch import resnet
# from keras.layers.normalization import batch_normalization

def load_az_dataset(dataset_path):
    data = []
    labels = []

    # loop over the rows of the A-Z handwritten digit dataset
    for row in open(dataset_path):
        # parse the label and image from the row
        row = row.split(",")
        # first column for label
        label = int(row[0])
        # the rest (784 columns) for image
        image = np.array([int(x) for x in row[1:]], dtype="uint8")

        # images are represented as single channel (grayscale) images
        # that are 28x28=784 pixels -- we need to take this flattened
        # 784-d list of numbers and reshape them into a 28x28 matrix
        image = image.reshape((28, 28))

        data.append(image)
        labels.append(label)
    
    data = np.array(data, dtype='float32')
    labels = np.array(labels, dtype='int')

    return (data, labels)


def load_mnist_dataset():
    ((train_data, train_labels), (test_data, test_labels)) = mnist.load_data()
    data = np.vstack([train_data, test_data])
    labels = np.hstack([train_labels, test_labels])

    return (data, labels)

PATH = 'data/A_Z Handwritten Data.csv'

print("[INFO] loading datasets...")
(az_data, az_labels) = load_az_dataset(PATH)
(digits_data, digits_labels) = load_mnist_dataset()

[INFO] loading datasets...


In [10]:
# import argparse


# ap = argparse.ArgumentParser()
# ap.add_argument("-a", "--az", required=True, help="path to A-Z dataset")
# ap.add_argument("-m", "--model", required=True, type=str,
#                 help="path to output trained handwriting recognition model")
# ap.add_argument("-p", "--plot", type=str, default="plot.png",
#                 help="path to output training history file")
# args = vars(ap.parse_args())

EPOCHS = 50
INIT_LR = 1e-1
BS = 128

az_labels += 10

data = np.vstack([az_data, digits_data])
labels = np.hstack([az_labels, digits_labels])

print(data.shape, labels.shape)

data = [cv2.resize(image, (32, 32)) for image in data]
data = np.array(data, dtype='float32')

data = np.expand_dims(data, axis=-1)
data /= 255.0

[INFO] loading datasets...
(442451, 28, 28) (442451,)


In [8]:
le = LabelBinarizer()
labels = le.fit_transform(labels)
counts = labels.sum(axis=0)

class_totals = labels.sum(axis=0)
class_weight = {}

for i in range(0, len(class_totals)):
    class_weight[i] = class_totals.max() / class_totals[i]

(train_x, test_x, train_y, test_y) = train_test_split(
    data, labels, test_size=0.20, stratify=labels, random_state=42
)

# construct the image generator for data augmentation
aug = ImageDataGenerator(
    rotation_range=10,
    zoom_range=0.05,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.15,
    horizontal_flip=False,
    fill_mode='nearest'
)

opt = SGD(lr=INIT_LR, decay=INIT_LR/EPOCHS)
model = ResNet.build(32, 32, 1, len(le.classes_), (3, 3, 3), (64, 64, 128, 256),
                     reg=0.0005)
model.compile(loss="categorical_crossentropy", optimizer=opt,
              metrics=['accuracy'])

In [None]:
print("[INFO] training network...")
H = model.fit(
    aug.flow(train_x, train_y, batch_size=BS),
    validation_data=(test_x, test_y),
    steps_per_epoch=len(train_x) // BS,
    epochs=EPOCHS,
    class_weight=class_weight,
    verbose=1
)

label_names = "012345789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
label_names = [l for l in label_names]

print("[INFO] evaluatin network...")
predictions = model.predict(test_x, batch_size=BS)
print(classification_report(test_y.argmax(axis=1),
                            predictions.argmax(axis=1),
                            target_names=label_names))

# save the model to disk
print("[INFO] serializing network...")
model.save(args["model"], save_format="h5")

# construct a plot that plots and saves the training history
N = np.arange(0, EPOCHS)
plt.style.use("ggplot")
plt.figure()
plt.plot(N, H.history["loss"], label="train_loss")
plt.plot(N, H.history["val_loss"], label="val_loss")
plt.title("Training Loss and Accuracy")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower left")
# plt.savefig(args["plot"])

In [None]:
images = []

for i in np.random.choice(np.arange(0, len(test_y)), size=(49,)):
    probs = model.predict(test_x[np.newaxis, i])
    prediction = probs.argmax(axis=1)
    label = label_names[prediction[0]]

    image = (test_x * 255).astype("uint8")
    color = (0, 255, 0)

    if prediction[0] != np.argmax(test_y[i]):
        color = (0, 0, 255)
    
    image = cv2.merge([image] * 3)
    image = cv2.resize(image, (96, 96), interpolation=cv2.INTER_LINEAR)
    cv2.putText(image, label, (5, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.75, color, 2)

    images.append(image)

montage = build_montages(images, (96, 96), (7, 7))[0]
cv2.imshow("OCR Results", montage)
cv2.waitKey(0)