In [1]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
class Metrics:
    # rectangle: [y1, x1, y2, x2] (y1 <= y2, x1 <= x2)
    def iou(self, r1, r2):
        oy = max(0, min(r1[2], r2[2]) - max(r1[0], r2[0]))
        ox = max(0, min(r1[3], r2[3]) - max(r1[1], r2[1]))

        R1 = (r1[2] - r1[0]) * (r1[3] - r1[1])
        R2 = (r2[2] - r2[0]) * (r2[3] - r2[1])
        Rol = oy * ox
        return abs(Rol) / abs(R1 + R2 - Rol)

In [3]:
class LossFunction:
    def sigmoid(self, x):
        return 1.0 / (1.0 + np.exp(-x))

In [4]:
class MachineLearning:
    def __init__(self, seed=0):
        self.metrics = Metrics()
        np.random.seed(seed)
        
    def load_image(self, img_path):
        self.img = cv2.imread(img_path)
        assert len(self.img.shape) == 3, "invalid image dimension."
        self.H, self.W, self.C = self.img.shape
        
    def random_cropping(self, gt, samples=200, hsize=60, wsize=60):
        assert 0 < hsize <= self.H, "hsize value is invalid"
        assert 0 < wsize <= self.W, "wsize value is invalid"
        
        rectangles = np.zeros((samples, 4), dtype=np.float32)
        labels = np.zeros((samples), dtype=np.uint8)
        for i in range(samples):
            w = np.random.randint(self.W - wsize)
            h = np.random.randint(self.H - hsize)
            rectangles[i] = np.array((h, w, h+hsize, w+wsize))
            labels[i] = int(self.metrics.iou(gt, rectangles[i]) >= 0.5) * 2
        return rectangles, labels
        
    def add_rectangles(self, rectangles, labels, gt=None):
        output_img = self.img.copy()
        colors = [
            (255, 0, 0),  # blue : not close
            (0, 255, 0),  # green: ground truth
            (0, 0, 255,)  # red  : close
        ]
        for rect, label in zip(rectangles, labels):
            y1, x1, y2, x2 = rect.astype(np.int)
            c = colors[label]
            output_img = cv2.rectangle(output_img, (x1, y1), (x2, y2), c)
        if gt is not None:
            y1, x1, y2, x2 = gt.astype(np.int)
            c = colors[1]
            output_img = cv2.rectangle(output_img, (x1, y1), (x2, y2), c)
        return output_img

In [41]:
class NeuralNetwork:
    def __init__(self, idx=2, w1=64, w2=64, out_dim=1, lr=0.1, seed=-1):
        if seed >= 0:
            np.random.seed(seed)
        self.w1 = np.random.normal(0, 1, [idx, w1])
        self.b1 = np.random.normal(0, 1, [w1])
        self.w2 = np.random.normal(0, 1, [w1, w2])
        self.b2 = np.random.normal(0, 1, [w2])
        self.wout = np.random.normal(0, 1, [w2, out_dim])
        self.bout = np.random.normal(0, 1, [out_dim])
        self.lr = lr
        self.loss_func = LossFunction()
        
    def forward(self, x):
        self.z1 = x
        self.z2 = self.loss_func.sigmoid(np.dot(self.z1, self.w1) + self.b1)
        self.z3 = self.loss_func.sigmoid(np.dot(self.z2, self.w2) + self.b2)
        self.out = self.loss_func.sigmoid(np.dot(self.z3, self.wout) + self.bout)
        return self.out
    
    def back_propagation(self, x, t):
        En = (self.out - t) * self.out * (1 - self.out)
        grad_En = En
        grad_wout = np.dot(self.z3.T, En)
        grad_bout = np.dot(np.ones([En.shape[0]]), En)
        self.wout -= self.lr * grad_wout
        self.bout -= self.lr * grad_bout
        
        grad_u2 = np.dot(En, self.wout.T) * self.z3 * (1 - self.z3)
        grad_w2 = np.dot(self.z2.T, grad_u2)
        grad_b2 = np.dot(np.ones([grad_u2.shape[0]]), grad_u2)
        self.w2 -= self.lr * grad_w2
        self.b2 -= self.lr * grad_b2
        
        grad_u1 = np.dot(grad_u2, self.w2.T) * self.z2 * (1 - self.z2)
        grad_w1 = np.dot(self.z1.T, grad_u1)
        grad_b1 = np.dot(np.ones([grad_u1.shape[0]]), grad_u1)
        self.w1 -= self.lr * grad_w1
        self.b1 -= self.lr * grad_b1
        
    def train(self, train_x, train_t, iteration=5000):
        for i in range(iteration):
            self.forward(train_x)
            self.back_propagation(train_x, train_t)

In [42]:
class Solver:
    def __init__(self, seed=0):
        self.ml = MachineLearning()
    
    def problem_94(self, img_path):
        self.ml.load_image(img_path)
        gt = np.array([41, 47, 103, 129], dtype=np.float32)
        rects, labels = self.ml.random_cropping(gt, samples=200, hsize=60, wsize=60)
        output_img = self.ml.add_rectangles(rects, labels, gt)
        plt.imshow(cv2.cvtColor(output_img, cv2.COLOR_BGR2RGB))
        plt.show()
        
    def problem_95(self):
        train_x = np.array([ [0, 0], [0, 1], [1, 0], [1, 1] ], dtype=np.float32)
        train_t = np.array([ [0], [1], [1], [0] ], dtype=np.float32)
        nn = NeuralNetwork(idx=train_x.shape[1], seed=0)
        nn.train(train_x, train_t, 5000)
            
        for j in range(4):
            x = train_x[j]
            t = train_t[j]
            print("in:", x, "pred:", nn.forward(x))

In [43]:
solver = Solver()
solver.problem_95()

in: [0. 0.] pred: [0.02112243]
in: [0. 1.] pred: [0.97690913]
in: [1. 0.] pred: [0.97987911]
in: [1. 1.] pred: [0.02191621]
