In [42]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
%matplotlib inline

In [39]:
class Interpolation:
    def __call__(self, img, py=32, px=32):
        sy = py / img.shape[0]
        sx = px / img.shape[1]
        return self.bilinear(img, sy, sx)
    
    def bilinear(self, img, sy=1.5, sx=1.5):
        H, W = img.shape[:2]
        ch = 1 if len(img.shape) < 3 else img.shape[2]
        H_big, W_big = int(H * sy), int(W * sx)
        output_img = np.zeros((H_big, W_big, ch))

        for i in range(H_big):
            for j in range(W_big):
                y, x = min(H-2, int(i/sy)), min(W-2, int(j/sx))
                dy, dx = i/sy - y, j/sx - x
                D = [(1-dy)*(1-dx), dy*(1-dx), (1-dy)*dx, dy*dx]
                if len(img.shape) == 3:
                    I = [img[y, x, :], img[y+1, x, :], img[y, x+1, :], img[y+1, x+1, :]]
                    output_img[i, j, :] = sum(d*z for (d, z) in zip(D, I))
                else:
                    I = [img[y, x], img[y+1, x], img[y, x+1], img[y+1, x+1]]
                    output_img[i, j] = sum(d*z for (d, z) in zip(D, I))

        # 仮に幅や高さが 1 になるような画像であると成立しないので注意
        output_img = np.squeeze(np.clip(output_img, 0, 255).astype("uint8"))
        return output_img

In [5]:
class HOGFeatureValue:        
    def quantize(self, value):
        res = np.floor(value / 20)
        if res >= 9: res = 8
        return res
    
    # reference solution は差分計算でオーバーフローをおこしていそう
    def calc_gx_gy(self, img):
        input_img = img.copy().astype(np.int32)
        H, W = input_img.shape
        gx = np.zeros_like(input_img).astype(np.int32)
        gy = np.zeros_like(input_img).astype(np.int32)
        for y in range(H):
            for x in range(W):
                gx[y, x] = input_img[y, min(x+1, W-1)] - input_img[y, max(x-1, 0)]
                gy[y, x] = input_img[min(y+1, H-1), x] - input_img[max(y-1, 0), x]
        return gx, gy
    
    def calc_mag_ang(self, gx, gy):
        H, W = gx.shape
        mag = np.zeros_like(gx, dtype=np.float32)
        ang = np.zeros_like(gy, dtype=np.float32)
        for y in range(H):
            for x in range(W):
                mag[y, x] = np.sqrt(gx[y, x]**2 + gy[y, x]**2)
                ang[y, x] = np.arctan2(gy[y, x], gx[y, x])
                if ang[y, x] < 0: ang[y, x] += np.pi
                ang[y, x] = self.quantize(ang[y, x] / np.pi * 180.0)
        ang = ang.astype(np.uint8)
        return mag, ang
                
    def get_colorized_ang(self, ang):
        color = [
            [255, 0, 0],
            [0, 255, 0],
            [0, 0, 255],
            [255, 255, 0],
            [255, 0, 255],
            [0, 255, 255],
            [127, 127, 0],
            [127, 0, 127],
            [0, 127, 127],
        ]
        H, W = ang.shape
        colorized_ang = np.zeros((H, W, 3), dtype=np.uint8)
        for i in range(9):
            colorized_ang[ang == i] = color[i]
        return colorized_ang
    
    # reference solution で x, y に掛ける数が 4 なのはおそらく間違い
    def gradient_histogram(self, mag, ang, N):
        H, W = mag.shape
        cell_H, cell_W = H // N, W // N
        histogram = np.zeros((cell_H, cell_W, 9), dtype=np.float32)
        
        for y in range(cell_H):
            for x in range(cell_W):
                for j in range(N):
                    for i in range(N):
                        histogram[y, x, ang[y*N+j, x*N+i]] += mag[y*N+j, x*N+i]
        return histogram
    
    def normalization(self, histogram, epsilon=1):
        cell_H, cell_W, _ = histogram.shape
        result_histogram = histogram.copy().astype(np.float32)
        for y in range(cell_H):
            for x in range(cell_W):
                result_histogram[y, x] /= np.sqrt(
                    np.sum(
                        histogram[max(y-1, 0):min(y+2, cell_H),
                                  max(x-1, 0):min(x+2, cell_W)]**2
                    ) + epsilon
                )
        return result_histogram
    
    def draw_feature_value(self, img_gray, histogram, N=8):
        H, W = img_gray.shape
        cell_H, cell_W, _ = histogram.shape
        
        output_img = img_gray.copy().astype(np.uint8)
        for y in range(cell_H):
            for x in range(cell_W):
                cx = x*N + N//2
                cy = y*N + N//2
                x1, y1, x2, y2 = cx + N//2 - 1, cy, cx - N//2 + 1, cy
                h = histogram[y, x] / np.sum(histogram[y, x])
                h /= h.max()
                
                for c in range(9):
                    theta = (20 * c + 10) / 180.0 * np.pi
                    rx = int(np.sin(theta)*(x1-cx) + np.cos(theta)*(y1-cy) + cx)
                    ry = int(np.cos(theta)*(x1-cx) - np.cos(theta)*(y1-cy) + cy)
                    lx = int(np.sin(theta)*(x2-cx) + np.cos(theta)*(y2-cy) + cx)
                    ly = int(np.cos(theta)*(x2-cx) - np.cos(theta)*(y2-cy) + cy)
                    color = int(255.0 * h[c])
                    cv2.line(output_img, (lx, ly), (rx, ry), (color, color, color))
        return output_img
    
    def get_hog_matrix(self, img, cell_size=8):
        if len(img.shape) == 3:
            img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        elif len(img.shape) == 2:
            img_gray = img.copy()
        else:
            assert False, "invlaid image dimension."

        gx, gy = self.calc_gx_gy(img_gray)
        mag, ang = self.calc_mag_ang(gx, gy)
        histogram = self.normalization(self.gradient_histogram(mag, ang, cell_size))
        return histogram

In [4]:
class Metrics:
    # rectangle: [y1, x1, y2, x2] (y1 <= y2, x1 <= x2)
    def iou(self, r1, r2):
        oy = max(0, min(r1[2], r2[2]) - max(r1[0], r2[0]))
        ox = max(0, min(r1[3], r2[3]) - max(r1[1], r2[1]))

        R1 = (r1[2] - r1[0]) * (r1[3] - r1[1])
        R2 = (r2[2] - r2[0]) * (r2[3] - r2[1])
        Rol = oy * ox
        return abs(Rol) / abs(R1 + R2 - Rol)
    
    def accuracy(self, num_of_samples, correct):
        return correct / num_of_samples

In [54]:
class Solver:
    def __init__(self, seed=0):
        self.hog = HOGFeatureValue()
        self.resize = Interpolation()
        
    def get_bounding_boxes(self, img, stride=4):
        recs = np.array(((42, 42), (56, 56), (70, 70)), dtype=np.float32)
        H, W = img.shape[:2]
        
        num_boxes = ((H+stride-1)//stride) * ((W+stride-1)//stride) * recs.shape[0]
        boxes = np.zeros((num_boxes, 4))
        i = 0
        for y in range(0, H, stride):
            for x in range(0, W, stride):
                for dy, dx in recs:
                    y1, x1 = y, x
                    y2, x2 = min(H-1, y+dy), min(W-1, x+dx)
                    boxes[i] = np.array([y1, x1, y2, x2])
                    i += 1     
        return boxes
    
    def get_hog_matrices(self, img, boxes, img_size=32, cell_size=8):
        vec_size = (img_size//cell_size)**2 * 9
        data = np.zeros((boxes.shape[0], vec_size))
        for i, box in enumerate(tqdm(boxes)):
            y1, x1, y2, x2 = box.astype(np.int)
            resized_img = self.resize(img[y1:y2, x1:x2], img_size, img_size)
            data[i] = self.hog.get_hog_matrix(resized_img, cell_size).ravel().astype(np.float32)
        return data
    
    def get_test_data(self, img, stride=4, img_size=32, cell_size=8):
        boxes = self.get_bounding_boxes(img, stride)
        test_data = self.get_hog_matrices(img, boxes, img_size, cell_size)
        return test_data
        
    def problem_97(self, img_path):
        input_img = cv2.imread(img_path)
        # stride = 4 だと 5 分掛かるので妥協。並列化したらマシになるかもしれないけど
        test_data = self.get_test_data(input_img, stride=8)

In [55]:
solver = Solver()
solver.problem_97("../imori_many.jpg")

100%|██████████| 1728/1728 [01:32<00:00, 18.66it/s]
