In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
cd job

[Errno 2] No such file or directory: 'job'
/content


In [None]:
import tensorflow as tf
import numpy as np
from numpy import matlib
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from skimage.transform import resize

IMG_SIZE = 224
NUM_CLASSES = 10
layerWidths = [28,14,7,4,2,1]
numBoxes = [3,3,3,3,3,3]
assert len(numBoxes) == len(layerWidths) # num_boxes for each layer and each layer has a specific width
outputChannels = NUM_CLASSES + 1 + 4 # 10 classes + background + cx,cy,h,w
assert outputChannels - NUM_CLASSES == 5

In [None]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


Reference is taken from this blog. https://d2l.ai/chapter_computer-vision/anchor.html \\
"*Assume that the input image has a height of h and width of w. We generate anchor boxes with different shapes centered on each pixel of the image. Assume the size is s∈(0,1], the aspect ratio is r>0, and the width and height of the anchor box are ws√r and hs/√r, respectively. When the center position is given, an anchor box with known width and height is determined.*" \\

s: scale, h: grid_size, w: grid_size and r: asp

In [None]:
def create_default_boxes():
	# number of scales is equal to the number of different resolutions ie num of layer widths
	# for a given resolution, we have different aspect ratios
	# num(scales) = num(layerWidth) = num(num_boxes) and num(asp_ratios) = num_boxes[i]
	MinScale = .1 					# Min and Max scale given as percentage
	MaxScale = 1.5
	scales = [ MinScale + x/len(layerWidths) * (MaxScale-MinScale) for x in range(len(layerWidths))]
	scales = scales[::-1] 						# reversing the order because the layer_widths go from high to low (lower to higher resoltuion)

	asp = [0.5,1.0,1.5]
	asp1 = [x**0.5 for x in asp]
	asp2 = [1/x for x in asp1]

	# Should be equal to the output of the MobileNetV2-SSD model.
	TOTAL_BOXES = sum([a*a*b for a,b in zip(layerWidths, numBoxes)])		# Computes total number of boxes.

	centres = np.zeros((TOTAL_BOXES,2))
	hw = np.zeros((TOTAL_BOXES,2))
	boxes = np.zeros((TOTAL_BOXES,4))

	# Calculating the default boxes (centres, height, width)
	idx = 0
	for grid_size, num_box, scale in zip(layerWidths, numBoxes, scales):
		step_size = IMG_SIZE*1.0/grid_size
		for i in range(grid_size):
			for j in range(grid_size):
				pos = idx + (i*grid_size+j) * num_box
				# centre is the same for all aspect ratios(=num_box)
				centres[ pos : pos + num_box , :] = i*step_size + step_size/2, j*step_size + step_size/2
				# height and width vary according to the scale and aspect ratio
				hw[ pos : pos + num_box , :] = np.multiply(grid_size*scale, np.squeeze(np.dstack([asp1, asp2]),axis=0))[:num_box,:]

		idx += grid_size*grid_size*num_box


	# (x,y) co-ordinates of top left and bottom right
	# This actually is not used anywhere. centres[] and hw[] are a good enough substitute
	boxes[:,0] = centres[:,0] - hw[:,0]/2
	boxes[:,1] = centres[:,1] - hw[:,1]/2
	boxes[:,2] = centres[:,0] + hw[:,0]/2
	boxes[:,3] = centres[:,1] + hw[:,1]/2

	return boxes, TOTAL_BOXES, centres, hw

In [None]:
# calculate IoU for a set of search boxes and default boxes
def IoU(box1, box2):
	box1 = box1.astype(np.float64)
	box2 = box2.astype(np.float64)

	x_top_left = np.maximum(box1[:,0], box2[:,0])			# find x-coordinate of top-left corner for intersection.
	x_bottom_right = np.minimum(box1[:,2], box2[:,2])			# find x-cordinate of bottom-right corner for intersection.
	y_top_left = np.maximum(box1[:,1], box2[:,1])			# find y-coordinate of top-left corner for intersection.
	y_bottom_right = np.minimum(box1[:,3], box2[:,3])			# find y-coordinate of bottom-right corner for intersection.

	intersection = np.abs(np.maximum(x_bottom_right - x_top_left,0) * np.maximum(y_bottom_right - y_top_left,0))
	
	boxArea1 = np.abs((box1[:,2] - box1[:,0]) * (box1[:,3] - box1[:,1]))
	boxArea2 = np.abs((box2[:,2] - box2[:,0]) * (box2[:,3] - box2[:,1]))
	
	unionArea = boxArea1 + boxArea2 - intersection
	assert (unionArea > 0).all()
	return intersection / unionArea

In [None]:
# give the index of the box correpsonding to the IoUs > threshold (=0.5) 
def bestIoU(searchBox):
	return np.argwhere(IoU(matlib.repmat(searchBox, TOTAL_BOXES, 1), boxes) > 0.4)

In [None]:
boxes, TOTAL_BOXES, centres, hw = create_default_boxes()
training_data_size = 2000
testing_data_size = 100

x_train = x_train[:training_data_size , : , :]
y_train = y_train[:training_data_size]
x_test = x_test[:testing_data_size , : , :]
y_test = y_test[:testing_data_size]

In [None]:
# take mnist x and y pairs and convert to input, output pairs for the MobileNetv2+SSD model
def create_dataset(images, labels, num_digits=3):
	MNIST_SIZE = images.shape[-1]
	scale_range = [1.5, 1, 2]
	corners = np.array([np.random.randint(IMG_SIZE - int(MNIST_SIZE*max(scale_range)), size=(images.shape[0], 2)) for _ in range(num_digits)])
	digits = np.array([np.random.randint(images.shape[0]) for _ in range(num_digits) for _ in range(images.shape[0])])
	digits = digits.reshape(images.shape[0], num_digits)
	scales = np.array([np.random.choice(scale_range) for _ in range(num_digits) for _ in range(images.shape[0])])
	scales = scales.reshape(images.shape[0], num_digits)

	# Create a input image data.
	input = np.zeros((images.shape[0], IMG_SIZE, IMG_SIZE, 3))

	# Add mnist digits in the images.
	for idx in range(images.shape[0]):
		for i in range(num_digits):
			SIZE = int(MNIST_SIZE*scales[idx, i])
			lx = corners[i, idx, 0]
			ly = corners[i, idx, 1]
			insertion_image = (resize(images[digits[idx, i],:,:], (SIZE, SIZE))*255).astype(np.uint8)
			# insertion_image = images[digits[idx, i],:,:]
			input[idx, lx:lx+SIZE, ly:ly+SIZE, :] = np.repeat(np.expand_dims(np.array(insertion_image), axis=-1), 3, axis=-1)

	# Define the ground truth bounding boxes for each digit's image.
    output = np.zeros((labels.shape[0], TOTAL_BOXES, 1+4))	# [class + (cx, cy, h, w)] for each box.
	output[:,:,0] = NUM_CLASSES
	for idx in range(images.shape[0]):
		for i in range(num_digits):
			SIZE = int(MNIST_SIZE*scales[idx, i])
			bbox = np.zeros(4)
			bbox[:2] = corners[i, idx]
			bbox[2:] = corners[i, idx] + (SIZE, SIZE)
			box_idx = bestIoU(bbox).astype(np.uint16)
			output[idx, box_idx, 0] = labels[digits[idx, i]]
			output[idx, box_idx, 1] = (bbox[0] + bbox[2])/2.0 - centres[box_idx, 0]         # cx (difference between ground truth's center and default bounding box's center.)
			output[idx, box_idx, 2] = (bbox[1] + bbox[3])/2.0 - centres[box_idx,1]          # cy
			output[idx, box_idx, 3] = SIZE - hw[box_idx,0]                                  # delta h (difference between ground truth's size and default bounding box's size.)
			output[idx, box_idx, 4] = SIZE - hw[box_idx,1]                                  # delta w

	return input, output

In [None]:
test_x, test_y = create_dataset(x_test, y_test)
train_x, train_y = create_dataset(x_train, y_train)

print(train_x.shape, train_y.shape)
print(test_x.shape, test_y.shape)

def _bytes_feature(value):
	return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def write_dataset(x_data, y_data, filename):
	writer = tf.compat.v1.python_io.TFRecordWriter(tfrecord_filename)
	for x, y in zip(x_data, y_data):
		y = y.reshape(-1)
		x = x.reshape(-1)
		feature = {'label': _bytes_feature(tf.compat.as_bytes(y.tostring())),
				   'image': _bytes_feature(tf.compat.as_bytes(x.tostring()))}
		example = tf.train.Example(features=tf.train.Features(feature=feature))
		writer.write(example.SerializeToString())

	writer.close()

tfrecord_filename = 'mnist_obj_detection_2000_train.tfrecords'
write_dataset(train_x, train_y, tfrecord_filename)
tfrecord_filename = 'mnist_obj_detection_100_test.tfrecords'
write_dataset(test_x, test_y, tfrecord_filename)

(2000, 224, 224, 3) (2000, 3150, 5)
(100, 224, 224, 3) (100, 3150, 5)


  from ipykernel import kernelapp as app
  app.launch_new_instance()
