In [1]:
%matplotlib inline
import skimage
from skimage import io, color, transform
from matplotlib import pyplot as plt
from pylab import rcParams
import numpy as np
import glob
import os
import sys
import pickle
from timeit import timeit

rcParams['figure.figsize'] = 10, 10

EPS = sys.float_info.epsilon

In [2]:
import time
import pandas as pd

def show_imgs(imgs, cmap='gray'):
    for img in imgs:
        plt.figure()
        if cmap == 'gray':
            plt.imshow(img, cmap=cmap)
        else:
            plt.imshow(img)
            
def read_data(input_data, output_data):
    size = len(input_data)
    start_time = time.time()
    parts = np.linspace(0, size, 9)
    parts = [int(i) for i in parts]
    for i in xrange(size):
        img = skimage.io.imread(input_data[i]).astype('float64')
        img = skimage.transform.resize(img, (32, 32))
        output_data.append(skimage.color.rgb2gray(img))
        if i in parts:
            print("Done {0:.1f} % for {1:.3f} sec".format(float(i) / size * 100, time.time() - start_time))
            
def load_target(filename='./gt.csv'):
    data = pd.read_csv(filename)
    return data['class_id']
        
def save(data, filename):
    with open(filename, 'wb') as pickle_file:
        pickle.dump(data, pickle_file)

In [3]:
%%time

imgs = []
read_data(sorted(glob.glob(os.path.join('./train', '*.png'))), imgs)

Done 0.0 % for 0.099 sec
Done 12.5 % for 11.163 sec
Done 25.0 % for 21.078 sec
Done 37.5 % for 32.896 sec
Done 50.0 % for 44.420 sec
Done 62.5 % for 56.022 sec
Done 75.0 % for 68.180 sec
Done 87.5 % for 79.237 sec
CPU times: user 1min 10s, sys: 3.11 s, total: 1min 13s
Wall time: 1min 30s


In [4]:
target = load_target()
target.shape

(39209,)

In [5]:
def calcDerv(img):
    Ix = np.zeros(img.shape)
    Iy = np.zeros(img.shape)
    uuimg = img[0: img.shape[0] - 2, 1: img.shape[1] - 1]
    ddimg = img[2: img.shape[0], 1: img.shape[1] - 1]
    llimg = img[1: img.shape[0] - 1, 0: img.shape[1] - 2]
    rrimg = img[1: img.shape[0] - 1, 2: img.shape[1]]
    Ix[1: Ix.shape[0] - 1, 1: Iy.shape[1] - 1] = ddimg - uuimg
    Iy[1: Iy.shape[0] - 1, 1: Iy.shape[1] - 1] = rrimg - llimg
    return Ix, Iy

def calculate_norm_gradients(img, I_x, I_y):
    return (I_x * I_x + I_y * I_y) ** 0.5

def calculate_angles(I_x, I_y):
    return np.arctan2(I_x, I_y)

def hist(cell_angles, cell_norm_gradients, bincount=12):
    bins_threshold = np.linspace(-np.pi, np.pi, bincount + 1)
    hist, _ = np.histogram(cell_angles, bins=bins_threshold, weights=cell_norm_gradients)
    return hist
    
def get_block_vector(block_angles, block_norm_gradients, cell_shape, bincount=12):
    block_height, block_width = block_angles.shape
    cell_height, cell_width = cell_shape
    if block_height % cell_height != 0 or block_width % cell_width != 0:
        raise Exception("Cell size is not appropriate")
    block_vector = np.zeros(shape=(bincount * (block_height // cell_height) * (block_width // cell_width), ), 
                            dtype=np.float64)
    counter = 0
    for x in xrange(0, block_height, cell_height):
        for y in xrange(0, block_width, cell_width):
            temp = hist(block_angles[x: x + cell_height, y: y + cell_width], 
                                 block_norm_gradients[x: x + cell_height, y: y + cell_width], bincount)
            block_vector[bincount * counter: bincount * (counter + 1)] = temp
            counter += 1
    norm = np.sqrt(np.linalg.norm(block_vector) ** 2 + EPS)
    return block_vector / norm
    
def get_img_vector(img_angles, img_norm_gradients, block_shape, cell_shape, bincount=12):
    height, width = img_angles.shape
    block_height, block_width = block_shape
    cell_height, cell_width = cell_shape
    length = (height - block_height) * (width - block_width) * (block_height // cell_height) * \
            (block_width // cell_width) * bincount
    block_length = (block_height // cell_height) * (block_width // cell_width) * bincount
    img_vector = np.zeros(shape=(length, ), dtype=np.float64)
    counter = 0
    for x in xrange(height - block_height):
        for y in xrange(width - block_width):
            img_vector[counter * block_length: (counter + 1) * block_length] = get_block_vector(
                    img_angles[x: x + block_height, y: y + block_width],
                    img_norm_gradients[x: x + block_height, y: y + block_width],
                    cell_shape, bincount)
            counter += 1
    return img_vector

def extract_hog(img, block_shape=(20, 20), cell_shape=(4, 4), bincount=12):
    I_x, I_y = calcDerv(img)
    angles = calculate_angles(I_x, I_y)
    norms = calculate_norm_gradients(img, I_x, I_y)
    return get_img_vector(angles, norms, block_shape, cell_shape)

def fit_and_classify(hogs, train_labels):
    clf = svm.SVC(kernel='linear', C=1)
    scores = cross_val_score(clf, hogs, train_labels, cv=5)
    print (scores)
    clf.fit(hogs, train_labels)
    return clf

In [6]:
def extract_hog_a(img, cell_rows=8, cell_cols=8, block_row_cells=2, block_col_cells=2, 
                bin_count=9, eps=1e-9, mirror_grads=True, use_sobel=False, preserve_channels=False):
    height = img.shape[0]
    width = img.shape[1]
    channels = img.shape[2]
    block_rows = block_row_cells * cell_rows
    block_cols = block_col_cells * cell_cols
    if use_sobel:
        Dx = np.array([[-1, 0, 1],
                       [-2, 0, 2],
                       [-1, 0, 1]])
        Dy = np.array([[1, 2, 1],
                       [0, 0, 0],
                       [-1, -2, -1]])
    else:
        Dx = np.array([[-1, 0, 1]])
        Dy = np.array([[-1], [0], [1]])
    if not preserve_channels:
        new_img = np.zeros(shape=(height, width, 1))
        new_img[:, :, 0] = rgb2gray(img)
        img = new_img
        channels = 1
    shape = (height, width, channels)
    Ix = np.zeros(shape=shape)
    Iy = np.zeros(shape=shape)
    G = np.zeros(shape=shape)
    for channel in range(channels):
        Ix[:, :, channel] = convolve(img[:, :, channel], Dx, mode='constant', cval=0.0)
        Iy[:, :, channel] = convolve(img[:, :, channel], Dy, mode='constant', cval=0.0)
    G = np.sqrt(Ix ** 2 + Iy ** 2)
    Theta = np.arctan2(Iy, Ix)
    if mirror_grads:
        Theta[Theta < 0] += np.pi
    feats = []
    for channel in range(channels):
        G_channel = G[:, :, channel]
        Theta_channel = Theta[:, :, channel]
        for block_row_start in range(height - block_rows):
            for block_col_start in range(width - block_cols):
                
                G_block = G_channel[block_row_start:block_row_start+block_rows,
                                    block_col_start:block_col_start+block_cols]
                Theta_block = Theta_channel[block_row_start:block_row_start+block_rows,
                                            block_col_start:block_col_start+block_cols]
                v = []
                for block_row_cell in range(block_row_cells):
                    for block_col_cell in range(block_col_cells):
                        G_cell = G_block[block_row_cell*cell_rows:block_row_cell*cell_rows+cell_rows,
                                         block_col_cell*cell_cols:block_col_cell*cell_cols+cell_cols]
                        Theta_cell = Theta_block[block_row_cell*cell_rows:block_row_cell*cell_rows+cell_rows,
                                                 block_col_cell*cell_cols:block_col_cell*cell_cols+cell_cols]
                        if mirror_grads:
                            bin_range = (0, np.pi)
                        else:
                            bin_range = (0, -np.pi)
                        hist, _ = np.histogram(Theta_cell.flatten(), bins=bin_count, weights=G_cell.flatten(), range=bin_range)
                        v.extend(hist)
                v = np.array(v)
                v = v / np.sqrt(eps + np.sum(v ** 2))
                feats.extend(v)
    return np.array(feats)
    

In [7]:
%%time
import time
start_time = time.time()
extract_hog(imgs[0])
print(time.time() - start_time)

0.416364908218
CPU times: user 420 ms, sys: 0 ns, total: 420 ms
Wall time: 417 ms


In [None]:
%%time
import multiprocessing

size = len(imgs)
start_time = time.time()
parts = map(int, np.linspace(0, size, 1001))
descriptors = []
for i in xrange(len(imgs)):
    descriptors.append(extract_hog(imgs[i]))
    if i in parts:
        print("{0:.3f} % is done for {1:.3f} sec".format(float(i) / size * 100, time.time() - start_time))

0.000 % is done for 0.503 sec
0.099 % is done for 16.608 sec
0.199 % is done for 32.705 sec
0.298 % is done for 48.993 sec
0.398 % is done for 66.560 sec
0.500 % is done for 82.546 sec
0.599 % is done for 97.720 sec
0.699 % is done for 112.898 sec
