In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pathlib
import tqdm
import itertools
import xml.etree.ElementTree as ET
import cv2

def create_data(image_paths,winSize,stride,bbox_path='',threshold=0.9):
    threshold = 0.8
    bbox_path = pathlib.Path(bbox_path)
    x = []
    y = []

    for p in tqdm.tqdm(image_paths):
        image = plt.imread(p.as_posix())
        
        if image.dtype != np.uint8:
            image = cv2.normalize(image, None, 0, 255, cv2.NORM_MINMAX, cv2.CV_8U)
        
        h,w = image.shape[:2]
        W = list(range(0,w-winSize[0],stride)) + [w-winSize[0]]
        H = list(range(0,h-winSize[1],stride)) + [h-winSize[1]]
        
        bbox_label = bbox_path.joinpath(p.with_suffix('.xml').name)
        if bbox_label.exists():
            xml_tree = ET.parse(bbox_label.as_posix())
            gt_xmin, gt_ymin, gt_xmax, gt_ymax = list(int(x.text) for x in xml_tree.find('object').find('bndbox'))
            gt_area = (gt_xmax-gt_xmin)*(gt_ymax-gt_ymin)
        else:
            xml_tree = None
            gt_xmin, gt_ymin, gt_xmax, gt_ymax = None,None,None,None
            gt_area = None
            
        
        for tl in itertools.product(H,W):
            t,l = tl
            
            cp_xmin, cp_xmax = l,l+winSize[1]
            cp_ymin, cp_ymax = t,t+winSize[0]
            
            if bbox_label.exists():
                inter_xmin = max(cp_xmin,gt_xmin)
                inter_ymin = max(cp_ymin,gt_ymin)
                inter_xmax = min(cp_xmax,gt_xmax)
                inter_ymax = min(cp_ymax,gt_ymax)
                inter_area = max(0,inter_xmax-inter_xmin+1) * max(0,inter_ymax-inter_ymin+1)
                gt_contained_ratio = inter_area/gt_area
            else:
                gt_contained_ratio = 0
                
            cropped_image = image[cp_ymin:cp_ymax,cp_xmin:cp_xmax,:]
            
            if gt_contained_ratio>=threshold:
                x.append(cropped_image)
                y.append(1)
            elif gt_contained_ratio==0:
                x.append(cropped_image)
                y.append(0)
            
    x = np.stack(x,axis=0)
    y = np.stack(y,axis=0)
    return x,y

def preprocess(hog,images):
    output= []
    for img in images:
        hog_img = hog.compute(img, winStride=(1,1), padding=(0,0)).reshape((-1,))
        output.append(hog_img)
    return np.stack(output)

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

winSize = (48,48)
hog_setting = dict(_winSize=winSize,_blockSize=(16,16),_blockStride=(8,8),_cellSize=(8,8),_nbins=9)


image_paths = list(pathlib.Path(r'D:\Datasets\Golf\IdeasLab\training\golfheadcropped').glob('*.jpg'))
bbox_path = r'D:\Datasets\Golf\IdeasLab\training\golfheadcropped_bbox'

train_paths, valid_paths = train_test_split(image_paths,test_size=30,random_state=123)
x,y = create_data(train_paths,winSize,stride=8,bbox_path=bbox_path,threshold=0.9)
val_x,val_y = create_data(valid_paths,winSize,stride=8,bbox_path=bbox_path,threshold=0.9)

additional_paths = list(pathlib.Path(r'D:\Datasets\Pedestrian\PennFudanPed\PNGImages').glob('*.jpg'))
additional_paths = additional_paths + list(pathlib.Path(r'D:\Datasets\Golf\custom').glob('*.jpg'))
add_x, add_y = create_data(additional_paths,winSize,stride=16)

x = np.concatenate([x,add_x],axis=0)
y = np.concatenate([y,add_y],axis=0)

hog = cv2.HOGDescriptor(**hog_setting)
hog_x = preprocess(hog,x)
hog_val_x = preprocess(hog,val_x)

100%|██████████| 313/313 [00:01<00:00, 269.14it/s]
100%|██████████| 30/30 [00:00<00:00, 254.85it/s]
100%|██████████| 184/184 [00:05<00:00, 35.79it/s]


In [6]:
detector = SVC(kernel='rbf')
detector.fit(hog_x,y)

SVC()

In [7]:
pred = detector.predict(hog_val_x)

((val_y==1)&(pred==1)).sum()/(val_y==1).sum()

0.2611111111111111

In [8]:
import pickle

file = pathlib.Path('saved_models/svm_rbf_model_4.ml')
with file.open('wb') as f:
    pickle.dump(dict(detector=detector,hog_setting=hog_setting),f)