In [19]:
import numpy as np
from skimage.io import imread_collection
from skimage.transform import resize
import os
import glob


def get_tiny_images(image_paths):
    # load images
    images = imread_collection(image_paths)

    w_size = h_size = 16
    tiny_images = []

    for image in images:
        # resize to 16*16
        image = resize(image, (w_size, h_size))
        image = (image - np.mean(image)) / np.std(image)
        tiny_images.append(image.flatten())

    return np.array(tiny_images)

def get_image_paths(data_path, categories, num_train_per_cat):
    '''
    This function returns lists containing the file path for each train
    and test image, as well as lists with the label of each train and
    test image. By default both lists will be 1500x1, where each
    entry is a char array (or string).
    '''

    num_categories = len(categories) # number of scene categories.

    # This paths for each training and test image. By default it will have 1500
    # entries (15 categories * 100 training and test examples each)
    train_image_paths = [None] * (num_categories * (num_train_per_cat-30))
    test_image_paths  = [None] * (num_categories * (num_train_per_cat-70))

    # The name of the category for each training and test image. With the
    # default setup, these arrays will actually be the same, but they are built
    # independently for clarity and ease of modification.
    train_labels = [None] * (num_categories * (num_train_per_cat-30))
    test_labels  = [None] * (num_categories * (num_train_per_cat-70))

    for i,cat in enumerate(categories):
        images = glob.glob(os.path.join(data_path, 'training', cat, '*.jpg'))

        for j in range(len(images)):
            if j <=69:
                train_image_paths[i * (num_train_per_cat-30) + j] = images[j]
                train_labels[i * (num_train_per_cat-30) + j] = cat
            else:
                test_image_paths[i*30+j-70] = images[j]
                test_labels[i*30+j-70] = cat
        
#         images = glob.glob(os.path.join(data_path, 'training', cat, '*.jpg'))
#         for j in range(70,num_train_per_cat):

#             test_image_paths[i * num_train_per_cat + j] = images[j]
#             test_labels[i * num_train_per_cat + j] = cat

    return (train_image_paths, train_labels,test_image_paths,test_labels)

In [20]:
data_path = './'

# This is the list of categories / directories to use. The categories are
# somewhat sorted by similarity so that the confusion matrix looks more
# structured (indoor and then urban and then rural).
categories = ['kitchen', 'store', 'bedroom', 'livingRoom', 'Office',
       'industrial', 'Suburb', 'InsideCity', 'TallBuilding', 'Street',
       'Highway', 'OpenCountry', 'Coast', 'Mountain', 'Forest']

# Number of training examples per category to use. Max is 100. For
# simplicity, we assume this is the number of test cases per category as
# well.
num_train_per_cat = 100

train_image_paths, train_labels, test_image_paths,test_labels = get_image_paths(data_path, categories, num_train_per_cat)

print(test_image_paths)
print(test_labels[0:10])

# for i in range(len(train_image_paths)):
#     print(i)
#     get_tiny_images(train_image_paths[i])

# c = get_tiny_images(train_image_paths[:5])


['./training\\kitchen\\72.jpg', './training\\kitchen\\73.jpg', './training\\kitchen\\74.jpg', './training\\kitchen\\75.jpg', './training\\kitchen\\76.jpg', './training\\kitchen\\77.jpg', './training\\kitchen\\78.jpg', './training\\kitchen\\79.jpg', './training\\kitchen\\8.jpg', './training\\kitchen\\80.jpg', './training\\kitchen\\81.jpg', './training\\kitchen\\82.jpg', './training\\kitchen\\83.jpg', './training\\kitchen\\84.jpg', './training\\kitchen\\85.jpg', './training\\kitchen\\86.jpg', './training\\kitchen\\87.jpg', './training\\kitchen\\88.jpg', './training\\kitchen\\89.jpg', './training\\kitchen\\9.jpg', './training\\kitchen\\90.jpg', './training\\kitchen\\91.jpg', './training\\kitchen\\92.jpg', './training\\kitchen\\93.jpg', './training\\kitchen\\94.jpg', './training\\kitchen\\95.jpg', './training\\kitchen\\96.jpg', './training\\kitchen\\97.jpg', './training\\kitchen\\98.jpg', './training\\kitchen\\99.jpg', './training\\store\\72.jpg', './training\\store\\73.jpg', './training\\

In [14]:
from scipy.spatial.distance import cdist
from collections import Counter

def nearest_neighbor_classify(train_image_feats, train_labels, test_image_feats, k = 1):
    distances = cdist(test_image_feats, train_image_feats, 'euclidean')

    # 1) Find the k closest features to each test image feature in euclidean space
    predictions = []

    for distance in distances:
        k_small_dis_labels = []
        sorted_dis_index = np.argsort(distance)

        # 2) Determine the labels of those k features
        for i in range(k):
            k_small_dis_labels.append(train_labels[sorted_dis_index[i]])
        
        # 3) Pick the most common label from the k
        most_common_label = Counter(k_small_dis_labels).most_common(1)[0][0]

        # 4) Store that label in a list
        predictions.append(most_common_label)

    return np.array(predictions)

train_image_feats = get_tiny_images(train_image_paths)
test_image_feats  = get_tiny_images(test_image_paths)

predicted_categories = nearest_neighbor_classify(train_image_feats, train_labels, test_image_feats)
print(predicted_categories)


['Suburb' 'kitchen' 'Suburb' 'kitchen' 'Suburb' 'bedroom' 'bedroom'
 'kitchen' 'bedroom' 'Suburb' 'kitchen' 'kitchen' 'kitchen' 'Suburb'
 'kitchen' 'kitchen' 'TallBuilding' 'industrial' 'bedroom' 'kitchen'
 'Suburb' 'Office' 'kitchen' 'kitchen' 'kitchen' 'InsideCity' 'kitchen'
 'kitchen' 'kitchen' 'Street' 'Suburb' 'Highway' 'store' 'Office'
 'bedroom' 'livingRoom' 'Suburb' 'kitchen' 'livingRoom' 'kitchen'
 'kitchen' 'store' 'livingRoom' 'InsideCity' 'InsideCity' 'kitchen'
 'Mountain' 'Suburb' 'kitchen' 'Forest' 'Mountain' 'Office' 'InsideCity'
 'Suburb' 'kitchen' 'Suburb' 'industrial' 'InsideCity' 'Office' 'Street'
 'kitchen' 'bedroom' 'bedroom' 'kitchen' 'bedroom' 'bedroom' 'bedroom'
 'livingRoom' 'bedroom' 'Suburb' 'Office' 'bedroom' 'TallBuilding'
 'bedroom' 'bedroom' 'Office' 'bedroom' 'InsideCity' 'kitchen' 'Suburb'
 'Suburb' 'kitchen' 'bedroom' 'bedroom' 'Suburb' 'Suburb' 'InsideCity'
 'Suburb' 'Suburb' 'kitchen' 'Street' 'bedroom' 'Suburb' 'InsideCity'
 'bedroom' 'bedroom' 'liv

In [15]:
c = 0
for i in range(len(test_labels)):
    if test_labels[i] == predicted_categories[i]:
        c += 1
print("正确率：", c/450*100)

正确率： 19.333333333333332


In [21]:
from skimage.feature import hog
from numpy.linalg import norm

def get_bags_of_words(image_paths):
    vocab = np.load('vocab.npy')
    print('Loaded vocab from file.')

    images = imread_collection(image_paths)
    images_histograms = []

    cells_per_block = (2, 2) # Change for lower compute time
    t = cells_per_block[0]
    pixels_per_cell = (4, 4)
    images_feature_vectors = []

    for i, image in enumerate(images):
        feature_vector = hog(
            image, 
            feature_vector = True, 
            pixels_per_cell = pixels_per_cell,
            cells_per_block = cells_per_block, 
            visualize = False
        ).reshape(-1, t*t*9)

        # 计算当前图片的feature与词袋的距离
        dist = cdist(vocab, feature_vector, metric='euclidean')

        # 选择最短距离，计算直方图
        min_dis_index = np.argmin(dist, axis=0)
        histogram, bin_edges = np.histogram(min_dis_index, bins=len(vocab))
        histogram = histogram / norm(histogram)

        images_histograms.append(histogram)

    return np.array(images_histograms)

In [22]:
from sklearn.cluster import KMeans, MiniBatchKMeans
import time

def build_vocabulary(image_paths, vocab_size):
    images = imread_collection(image_paths)

    cells_per_block = (2, 2)
    pixels_per_cell = (4, 4)
    t = cells_per_block[0]
    images_feature_vectors = []

    for image in images:
        feature_vector = hog(
            image, 
            feature_vector = True, 
            pixels_per_cell = pixels_per_cell,
            cells_per_block = cells_per_block, 
            visualize = False
        ).reshape(-1, t*t*9)

        images_feature_vectors.append(feature_vector)

    images_feature_vectors = np.vstack(images_feature_vectors)

    # MiniBatchKMeans相比KMeans用时更少，资源消耗更少，质量相对要差一点，但差别不大
    t0 = time.time()
    k_means = KMeans(n_clusters=vocab_size, max_iter=5).fit(images_feature_vectors)
#     k_means = MiniBatchKMeans(n_clusters=vocab_size, max_iter=500).fit(images_feature_vectors)
    print('time spend：', time.time() - t0)

    vocabulary = np.vstack(k_means.cluster_centers_)

    return vocabulary

In [23]:
if not os.path.isfile('vocab.npy'):
    print('No existing visual word vocabulary found. Computing one from training images.')

    #Larger values will work better (to a point), but are slower to compute
    vocab_size = 200

    # YOU CODE build_vocabulary (see student.py)
    vocab = build_vocabulary(train_image_paths, vocab_size)
    np.save('vocab.npy', vocab)

# YOU CODE get_bags_of_words.m (see student.py)
train_image_feats = get_bags_of_words(train_image_paths)
# You may want to write out train_image_features here as a *.npy and
# load it up later if you want to just test your classifiers without
# re-computing features

test_image_feats  = get_bags_of_words(test_image_paths)

No existing visual word vocabulary found. Computing one from training images.
time spend： 1244.657154083252
Loaded vocab from file.
Loaded vocab from file.


In [24]:
from sklearn.svm import LinearSVC

def svm_classify(train_image_feats, train_labels, test_image_feats):
    l_svc = LinearSVC(random_state=0, tol=1e-5)

    # train LinearSVC model
    l_svc.fit(train_image_feats, train_labels)

    # make prediction
    predictions = l_svc.predict(test_image_feats)

    return predictions

In [25]:
predicted_categories = svm_classify(train_image_feats, train_labels, test_image_feats)
print(predicted_categories)

['kitchen' 'kitchen' 'kitchen' 'kitchen' 'kitchen' 'kitchen' 'kitchen'
 'kitchen' 'kitchen' 'kitchen' 'kitchen' 'kitchen' 'kitchen' 'kitchen'
 'kitchen' 'kitchen' 'Suburb' 'store' 'bedroom' 'bedroom' 'kitchen'
 'kitchen' 'kitchen' 'bedroom' 'kitchen' 'kitchen' 'kitchen' 'kitchen'
 'industrial' 'kitchen' 'store' 'store' 'Suburb' 'store' 'store' 'store'
 'store' 'store' 'store' 'store' 'store' 'store' 'store' 'store' 'Office'
 'store' 'store' 'store' 'store' 'store' 'store' 'store' 'store' 'store'
 'store' 'Suburb' 'store' 'store' 'store' 'store' 'bedroom' 'kitchen'
 'bedroom' 'Office' 'Highway' 'bedroom' 'bedroom' 'bedroom' 'bedroom'
 'bedroom' 'bedroom' 'bedroom' 'Office' 'bedroom' 'bedroom' 'bedroom'
 'bedroom' 'Suburb' 'kitchen' 'bedroom' 'Suburb' 'bedroom' 'bedroom'
 'Suburb' 'kitchen' 'Suburb' 'OpenCountry' 'kitchen' 'kitchen' 'kitchen'
 'livingRoom' 'OpenCountry' 'livingRoom' 'livingRoom' 'livingRoom'
 'livingRoom' 'Coast' 'livingRoom' 'livingRoom' 'livingRoom' 'Coast'
 'livingRoo

In [26]:
c = 0
for i in range(len(test_labels)):
    if test_labels[i] == predicted_categories[i]:
        c += 1
print("正确率：", c/450*100)

正确率： 65.77777777777779
