In [339]:
import cv2
import os
import numpy as np
from sklearn.cluster import KMeans
from sklearn import svm
from sklearn.model_selection import train_test_split
import pickle

In [340]:
rdir_path = './newData'
nClusters = 512
vocabFile = 'vocab_512.sav'
clsFile = 'cls.sav'
svm_c = 0.005
svm_gamma = 0.1
svm_kernal = 'linear'

splitProp = 0.8

In [341]:
def loadDataset():
    imgs = []
    labels = []
    label = -1
    
    onlyDirs = [f for f in os.listdir(rdir_path) if os.path.isdir(os.path.join(rdir_path, f))]
    
    for directory in onlyDirs:
        label = label + 1
        path = rdir_path+'/'+directory
        onlyfiles = [f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))]
        
        for file in onlyfiles:
            imgPath = path+'/'+file
            imgs.append(imgPath)
            labels.append(label)
    
    return (imgs,labels)

In [342]:
def getDescriptors(img, extractor):
    gray = cv2.cvtColor(img,cv2.COLOR_RGB2GRAY)
    keypoints, descriptors = extractor.detectAndCompute(gray, None)
    return descriptors

In [343]:
def getBatchDesc(imgs):
    descriptors = np.asarray([])
    extractor = cv2.xfeatures2d.SIFT_create()
    #i = 0
    for img in imgs:
        desc = getDescriptors(img,extractor)
        if type(desc) == np.ndarray :
            if descriptors.shape[0] == 0:
                descriptors = desc
            else:
                descriptors = np.concatenate((descriptors, desc), axis=0)
    return descriptors

In [344]:
def getImgs(imgsPath):
    imgs = []
    for imgName in imgsPath:
        img = cv2.imread(imgName)
        imgs.append(img)
    imgs = np.asarray(imgs)
    return imgs

In [345]:
def getVocabularies(imgs):
    print("collecting descriptors...")
    descriptors = getBatchDesc(imgs)
    print("vector quantization started...")
    kmeans = KMeans(n_clusters = nClusters)
    kmeans.fit(descriptors)
    return kmeans

In [346]:
def getBagOfKP(img,extractor,vocab):
    bok = [0]*nClusters
    gray = cv2.cvtColor(img,cv2.COLOR_RGB2GRAY)
    keypoints, descriptors = extractor.detectAndCompute(gray, None)
    if type(descriptors) == np.ndarray:
        pred = vocab.predict(descriptors)
        for ele in pred:
            bok[ele] = bok[ele] + 1
    return bok

In [347]:
def getBatchBOK(imgs,vocab):
    batchBOK = []
    extractor = cv2.xfeatures2d.SIFT_create()
    for img in imgs:
        bok = getBagOfKP(img,extractor,vocab)
        batchBOK.append(bok)
    return  np.asarray(batchBOK)

In [348]:
dataset = loadDataset()
trainImgPaths,testImgPaths, trainLabels, testLabels = train_test_split(dataset[0], dataset[1], 
                                                train_size=splitProp, random_state=42,shuffle = True,stratify = dataset[1])



In [349]:
trainImgs = getImgs(trainImgPaths)
vocab = getVocabularies(trainImgs)
pickle.dump(vocab, open(vocabFile, 'wb'))
#vocab = pickle.load(open(vocabFile, 'rb'))

In [350]:
features = getBatchBOK(trainImgs,vocab)

In [351]:
#model = svm.SVC(C=svm_c,kernel=svm_kernal,gamma=svm_gamma)
model = svm.SVC(C=svm_c,kernel=svm_kernal)
print("training started...")
model.fit(features,trainLabels)
pickle.dump(model, open(clsFile, 'wb'))

training started...


In [352]:
testImgs = getImgs(testImgPaths)
features = getBatchBOK(testImgs,vocab)

In [353]:
acc = model.score(features,testLabels)

In [354]:
print(acc)

0.9722222222222222
