In [46]:
from os import listdir
from os.path import isfile, join
from io import open
from time import time

import cv2
import os
import numpy as np
from random import shuffle
import imutils #need to install this with pip
import matplotlib.pyplot as plt
from skimage import data, color, exposure
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
%matplotlib inline

In [47]:
label_list = ["Entree", "Salad", "Dessert"]

In [48]:
# Utility method to get all the clinical text documents for a given label.
# For simplicity, the training and test documents are origanized into folders whose name is same as the 
# label name.
def get_file_list_for_label(label):
    """
    Returns the list of files for a given label. Assumes that the clinical documents are arranged into
    folder with same name as the labels under '../Data' directory.
    """
    data_file_path = "./Data/Training/" + label
    return [join(data_file_path, f) for f in listdir(data_file_path) if isfile(join(data_file_path, f))]

In [49]:
# Constructs and returns a list of images and the corresponding labels
# The feature at this point is nothing but the image itself. Later in pre-processing,
# the image content will be reduced into features using HOG.
def get_images_and_labels():
    """
    Returns all the image contents (features), their corresponding label names as 
    two a list of tuples.
    """
    image_label_list = []
    for label in label_list:
        file_list = get_file_list_for_label(label)
        for file in file_list:
            img = cv2.imread(file)
            image_label_list.append((img, label))
    shuffle(image_label_list)
    return image_label_list

In [50]:
minDim = 80
blockSize = (16,16)
blockStride = (8,8)
cellSize = (8,8)
nbins = 9

dims = (minDim, minDim)

hog = cv2.HOGDescriptor(dims, blockSize, blockStride, cellSize, nbins)

In [51]:
def get_hog_histograms_with_labels(image_label_list, hog, dims):
    data = []
    for img_label in image_label_list: 
        img = cv2.resize(img_label[0], dims)

        #for images with transparency layer, reduce to 3 layers
        hist = hog.compute(img[:,:,:3])
        
        data.append((hist, img_label[1]))
    shuffle(data)
    return map(list, zip(*data))

In [52]:
image_label_list = get_images_and_labels()

X, y = get_hog_histograms_with_labels(image_label_list, hog, dims)

X = [x.flatten() for x in X]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

lsvm = SVC(kernel='linear', C = 1.0, probability=True)
lsvm.fit(X_train, y_train)

print("test accuracy ", lsvm.score(X_test, y_test))
y_pred = lsvm.predict(X_test)
print(classification_report(y_test, y_pred))

test accuracy  0.666666666667
             precision    recall  f1-score   support

    Dessert       0.75      0.79      0.77        19
     Entree       0.59      0.59      0.59        22
      Salad       0.67      0.63      0.65        19

avg / total       0.67      0.67      0.67        60

