-
Notifications
You must be signed in to change notification settings - Fork 60
/
findFeatures.py
83 lines (65 loc) · 2.63 KB
/
findFeatures.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#!/usr/local/bin/python2.7
#python findFeatures.py -t dataset/train/
import argparse as ap
import cv2
import numpy as np
import os
from sklearn.externals import joblib
from scipy.cluster.vq import *
from sklearn import preprocessing
from rootsift import RootSIFT
import math
# Get the path of the training set
parser = ap.ArgumentParser()
parser.add_argument("-t", "--trainingSet", help="Path to Training Set", required="True")
args = vars(parser.parse_args())
# Get the training classes names and store them in a list
train_path = args["trainingSet"]
#train_path = "dataset/train/"
training_names = os.listdir(train_path)
numWords = 1000
# Get all the path to the images and save them in a list
# image_paths and the corresponding label in image_paths
image_paths = []
for training_name in training_names:
image_path = os.path.join(train_path, training_name)
image_paths += [image_path]
# Create feature extraction and keypoint detector objects
fea_det = cv2.FeatureDetector_create("SIFT")
des_ext = cv2.DescriptorExtractor_create("SIFT")
# List where all the descriptors are stored
des_list = []
for i, image_path in enumerate(image_paths):
im = cv2.imread(image_path)
print "Extract SIFT of %s image, %d of %d images" %(training_names[i], i, len(image_paths))
kpts = fea_det.detect(im)
kpts, des = des_ext.compute(im, kpts)
# rootsift
#rs = RootSIFT()
#des = rs.compute(kpts, des)
des_list.append((image_path, des))
# Stack all the descriptors vertically in a numpy array
#downsampling = 1
#descriptors = des_list[0][1][::downsampling,:]
#for image_path, descriptor in des_list[1:]:
# descriptors = np.vstack((descriptors, descriptor[::downsampling,:]))
# Stack all the descriptors vertically in a numpy array
descriptors = des_list[0][1]
for image_path, descriptor in des_list[1:]:
descriptors = np.vstack((descriptors, descriptor))
# Perform k-means clustering
print "Start k-means: %d words, %d key points" %(numWords, descriptors.shape[0])
voc, variance = kmeans(descriptors, numWords, 1)
# Calculate the histogram of features
im_features = np.zeros((len(image_paths), numWords), "float32")
for i in xrange(len(image_paths)):
words, distance = vq(des_list[i][1],voc)
for w in words:
im_features[i][w] += 1
# Perform Tf-Idf vectorization
nbr_occurences = np.sum( (im_features > 0) * 1, axis = 0)
idf = np.array(np.log((1.0*len(image_paths)+1) / (1.0*nbr_occurences + 1)), 'float32')
# Perform L2 normalization
im_features = im_features*idf
im_features = preprocessing.normalize(im_features, norm='l2')
joblib.dump((im_features, image_paths, idf, numWords, voc), "bof.pkl", compress=3)