In [38]:
# import the necessary packages
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
#from imutils import paths
import pandas as pd
import numpy as np
import argparse
#import imutils
import cv2
import os
import glob

In [31]:
def image_to_feature_vector(image, size=(200, 200)):
    # resize the image to a fixed size, then flatten the image into
    # a list of raw pixel intensities
    return cv2.resize(image, size).flatten()

In [34]:
def extract_color_histogram(image, bins=(8, 8, 8)):
    # extract a 3D color histogram from the HSV color space using
    # the supplied number of `bins` per channel
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hist = cv2.calcHist([hsv], [0, 1, 2], None, bins,
        [0, 180, 0, 256, 0, 256])
 
    hist = cv2.normalize(hist, hist)

    return hist.flatten()

In [75]:
def list_images(paths):
    out = []
    for p in paths: 
        out.extend(glob.glob(p+'*.png'))
    return out


In [76]:
def getFeatures(imagePaths):
    # initialize the raw pixel intensities matrix, the features matrix,
    # and labels list
    rawImages = []
    features = []
    labels = []
    
    # grab the list of images that we'll be describing
    print("[INFO] describing images...")

    # loop over the input images
    for (i, imagePath) in enumerate(imagePaths):
        # load the image and extract the class label (assuming that our
        # path as the format: /path/to/dataset/{class}_id-num.jpg
        image = cv2.imread(imagePath)
        label = imagePath.split('/')[-1].split("_")[0]

        # extract raw pixel intensity "features", followed by a color
        # histogram to characterize the color distribution of the pixels
        # in the image
        pixels = image_to_feature_vector(image)
        hist = extract_color_histogram(image)

        # update the raw images, features, and labels matricies,
        # respectively
        rawImages.append(pixels)
        features.append(hist)
        labels.append(label)

        # show an update every 1,000 images
        if i > 0 and i % 1000 == 0:
            print("[INFO] processed {}/{}".format(i, len(imagePaths)))
    print("Done getting features and labels")
    
    rawImages = np.array(rawImages)
    features = np.array(features)
    labels = np.array(labels)
    return rawImages, features, labels

In [47]:
tear_set = 'train_images/tear/'
none_set = 'train_images/none/'
dataset = (tear_set, none_set)

#Tune KNN Classifier parameters
k = 1
#number of cores to use (j=-1 to use all available)
j = 2

#imagePaths = list_images(args["dataset"])
imagePaths = list_images(dataset)
rawImages, features, labels = getFeatures(imagePaths)
        
# show some information on the memory consumed by the raw images
# matrix and features matrix
ulabels = set(labels)

print("[INFO] pixels matrix: {:.2f}MB".format(
    rawImages.nbytes / (1024 * 1000.0)))
print("[INFO] features matrix: {:.2f}MB".format(
    features.nbytes / (1024 * 1000.0)))
print("[INFO]: unique labels found: ", str(ulabels))

[INFO] describing images...
[INFO] processed 1000/1521
[INFO] pixels matrix: 178.24MB
[INFO] features matrix: 3.04MB
[INFO]: unique labels found:  {'Tear', 'None'}


In [45]:
# partition the data into training and testing splits, using 75%
# of the data for training and the remaining 25% for testing
(trainRI, testRI, trainRL, testRL) = train_test_split(
    rawImages, labels, test_size=0.25, random_state=42)
(trainFeat, testFeat, trainLabels, testLabels) = train_test_split(
    features, labels, test_size=0.25, random_state=42)


# train and evaluate a k-NN classifer on the raw pixel intensities
print("[INFO] evaluating raw pixel accuracy...")
model = KNeighborsClassifier(n_neighbors=k,
    n_jobs=j)
model.fit(trainRI, trainRL)

acc = model.score(testRI, testRL)
print("[INFO] raw pixel accuracy: {:.2f}%".format(acc * 100))

[INFO] evaluating raw pixel accuracy...
[INFO] raw pixel accuracy: 100.00%


In [37]:
print("[INFO] evaluating histogram accuracy...")
model = KNeighborsClassifier(n_neighbors=k,
    n_jobs=j)
model.fit(trainFeat, trainLabels)
acc = model.score(testFeat, testLabels)
print("[INFO] histogram accuracy: {:.2f}%".format(acc * 100))

[INFO] evaluating histogram accuracy...
[INFO] histogram accuracy: 100.00%


In [77]:
valset_tear = 'validation_images/tear/'
valset_none = 'validation_images/none/'
imagePaths = list_images((valset_tear, valset_none))
rawTestImgs, testFeatures, testLabels = getFeatures(imagePaths)

rawTestImgs.shape

[INFO] describing images...
Done getting features and labels


(168, 120000)

In [None]:
evalModel = KNeighborsClassifier(n_neighbors=k, n_jobs=j)
evalModel.fit(features, labels)
#testFeatures.size
#acc = model.score(testFeatures, testLabels)
#print("[INFO] histogram accuracy: {:.2f}%".format(acc * 100))

In [82]:
acc = evalModel.score(testFeatures, testLabels)
print(acc)

1.0
