In [None]:
import sys
sys.version_info

In [None]:
import hnswlib
from os import listdir, path
import numpy as np
from keras.applications import mobilenetv2

images_directory = "household_images/images"

desc_index = hnswlib.Index(space='cosine', dim=1280)
desc_index.init_index(max_elements=7000000, ef_construction=500, M=32)
desc_index.set_ef(500)

feature_net = mobilenetv2.MobileNetV2(weights="imagenet", include_top=False, input_shape=(224, 224, 3))



In [None]:
from numpy import genfromtxt

def prepare_image_paths_labels():
    labels_file = "household_images/labels.csv"
    data = genfromtxt(labels_file, delimiter=',')
    return data


In [None]:
prepare_image_paths_labels()

In [None]:
import csv

def prepare_image_paths_labels():
    labels_file = "household_images/labels.csv"

    with open(labels_file, "rt", encoding="utf8") as f_input:
        csv_input = csv.reader(f_input)
        header = next(csv_input)
        data = [row for row in csv_input]
    
    image_paths = [row[0] for row in data]
    image_labels = [int(row[1]) for row in data]
    
    return image_paths, image_labels

In [None]:
prepare_image_paths_labels()

In [None]:
list(data)

In [None]:
import cv2
import math

def open_and_prepare_image(image_path, tile_size = 224, vertical_tiles = 1):
    image = cv2.imread(image_path)
    
    target_height = tile_size * vertical_tiles
    scale_percent = target_height/image.shape[0]
    
    horizontal_tiles = image.shape[1] * scale_percent // tile_size
    target_width = tile_size * horizontal_tiles
    
    scale_width = math.floor(scale_percent * image.shape[1])
    
    print("(scale_width, target_height)", (scale_width, target_height))
    image = cv2.resize(image, (scale_width, target_height), interpolation=cv2.INTER_CUBIC)
    return image


def extract_windows(image, window_size=224, channel_count=3, interior_w=False, interior_h=False):
    
    n_cells = (image.shape[0] // window_size, image.shape[1] // window_size)

    if interior_w:
        n_cells = (n_cells[0] - 1, n_cells[1])

    if interior_h:
        n_cells = (n_cells[0], n_cells[1] - 1)

    img_shape = (n_cells[0] * window_size, n_cells[1] * window_size)

    margins = ((image.shape[0] - img_shape[0])//2, (image.shape[1] - img_shape[1])//2)

    windows = np.zeros((n_cells[0] * n_cells[1], window_size, window_size, channel_count))
    coords = np.zeros((n_cells[0] * n_cells[1], 2))

    for i in range(n_cells[0]):
        for j in range(n_cells[1]):
            img = image[(margins[0] + window_size*i):(margins[0] + window_size*(i+1)), (margins[1] + window_size*j):(margins[1] + window_size*(j+1))]
            windows[i * n_cells[1] + j] = img
            coords[i * n_cells[1] + j] = (margins[0] + window_size*i + window_size//2, margins[1] + window_size*j + window_size//2)

    return windows, coords

In [None]:
from matplotlib import pyplot as plt
img = open_and_prepare_image("household_images/images/IMG_0175.jpg")
img2 = img[:,:,::-1]
plt.imshow(img2)

In [None]:
import numpy as np
res = extract_windows(img)
windows = res[0]
windows.shape

In [None]:
img2 = windows[0][:,:,::-1]/255
plt.imshow(img2)

In [None]:
import cv2
import math
import csv
import hnswlib
from os import path
import numpy as np

# InceptionResNetV2: 1536
# DenseNet121: 1024
# Xception 2048
# ResNet50 2048
# InceptionV3
# VGG16 512
# VGG19 512
# MobileNet
# MobileNetV2
# NASNet: 4032

######################################################
from keras.applications import vgg16
from keras.applications.vgg16 import preprocess_input

feature_dimenstion = 512 
default_tile_size = 224
default_vertical_tiles = 1
default_neighbor_count = 3
######################################################

def prepare_image_paths_labels():
    labels_file = "household_images/labels.csv"
    data = genfromtxt(labels_file, delimiter=',')
    return data


def prepare_image_paths_labels():
    directory_path = "household_images/images"
    labels_file = "household_images/labels.csv"

    with open(labels_file, "rt", encoding="utf8") as f_input:
        csv_input = csv.reader(f_input)
        header = next(csv_input)
        data = [row for row in csv_input]
    
    image_paths = [path.join(directory_path, row[0]) for row in data]
    image_labels = [int(row[1]) for row in data]
    
    return image_paths, image_labels


def open_and_prepare_image(image_path, tile_size = default_tile_size, vertical_tiles = default_vertical_tiles):
    image = cv2.imread(image_path)
    
    target_height = tile_size * vertical_tiles
    scale_percent = target_height/image.shape[0]
    
    horizontal_tiles = image.shape[1] * scale_percent // tile_size
    target_width = tile_size * horizontal_tiles
    
    scale_width = math.floor(scale_percent * image.shape[1])
    
    #print("(scale_width, target_height)", (scale_width, target_height))
    image = cv2.resize(image, (scale_width, target_height), interpolation=cv2.INTER_CUBIC)
    return image


def extract_windows(image, window_size=default_tile_size, channel_count=3, interior_w=False, interior_h=False):
    
    n_cells = (image.shape[0] // window_size, image.shape[1] // window_size)

    if interior_w:
        n_cells = (n_cells[0] - 1, n_cells[1])

    if interior_h:
        n_cells = (n_cells[0], n_cells[1] - 1)

    img_shape = (n_cells[0] * window_size, n_cells[1] * window_size)

    margins = ((image.shape[0] - img_shape[0])//2, (image.shape[1] - img_shape[1])//2)

    windows = np.zeros((n_cells[0] * n_cells[1], window_size, window_size, channel_count))
    coords = np.zeros((n_cells[0] * n_cells[1], 2))

    for i in range(n_cells[0]):
        for j in range(n_cells[1]):
            img = image[(margins[0] + window_size*i):(margins[0] + window_size*(i+1)), (margins[1] + window_size*j):(margins[1] + window_size*(j+1))]
            windows[i * n_cells[1] + j] = img
            coords[i * n_cells[1] + j] = (margins[0] + window_size*i + window_size//2, margins[1] + window_size*j + window_size//2)

    return windows, coords

def convert_output(feats):
    print("feats.shape", feats.shape)
    
    reduced_feats = np.zeros((feats.shape[0], feats.shape[3]))

    for i in range(feats.shape[0]):

        patch_feats = feats[i] # 3x3x1280

        tot = np.zeros((patch_feats.shape[2],))

        for j in range(patch_feats.shape[0]):
            for k in range(patch_feats.shape[1]):
                tot = tot + patch_feats[j, k]

        avg = tot / (patch_feats.shape[0] * patch_feats.shape[1])

        reduced_feats[i] = avg

    return reduced_feats

def main():
    
    ######################################################
    model = vgg16.VGG16(weights="imagenet", include_top=False, input_shape=(default_tile_size, default_tile_size, 3))
    ######################################################
    
    image_paths, image_labels = prepare_image_paths_labels()
    
    image_count = len(image_paths)
    neighbor_count = default_neighbor_count
    
    descriptors = []
    labels = []
    indexes = []

    scores = []
    
    window_count = 0
    
    
    for i in range(image_count):
    
        image_path = image_paths[i]
        print(i, image_path)
    
        image = open_and_prepare_image(image_path)
        windows, coords = extract_windows(image)

        window_count = windows.shape[0]

        print("window_count", window_count)
    
        batch = np.zeros((window_count, default_tile_size, default_tile_size, 3))

        for j in range(window_count):
            batch[j] = windows[j]

        x = preprocess_input(batch)
        y = model.predict(x)
        feature = convert_output(y)

        #print("feature.shape", feature.shape)
        
        for j in range(window_count):
            descriptors.append(feature[j, :])
            labels.append(image_labels[i])
            indexes.append(i)
            
    desc_index = hnswlib.Index(space='cosine', dim=feature_dimenstion)
    desc_index.init_index(max_elements=1000000, ef_construction=500, M=32)
    desc_index.set_ef(500)    
    desc_index.add_items(np.array(descriptors), np.arange(len(descriptors)))
    
    
    for i in range(len(descriptors)):
        descriptor = descriptors[i]
        #print("descriptor.shape", descriptor.shape)
        
        idxs, distances = desc_index.knn_query(descriptor, k=neighbor_count + window_count)
        #print("idxs.shape", idxs.shape)
        #print("distances.shape", distances.shape)
        
        count = 0
        candidates = 0
        
        for j in range(len(idxs[0])):
            idx = idxs[0][j]
            #print("idx", idx)
            
            if indexes[i] == indexes[idx]:
                continue
                
            candidates += 1
            
            if labels[i] == labels[idx]:
                count += 1
                
            if candidates >= neighbor_count:
                break
        
        #print("Score", count/candidates)
        scores.append(count/candidates)
    
    return sum(scores)/len(scores)


main()

## ResNet50

#### 1 Tile, Cosine
0.7532008830022079

#### 1 Tile, Cosine, 7 nearest neighbors
0.6365184484389776

#### 4 Tile, Cosine
0.6441501103752777

#### 12 Tile, Cosine
0.5460632818248746

#### 20 Tile, Cosine
0.5170860927152365

#### 30 Tile, Cosine
0.4913612950699003

#### 48 Tile, Cosine
0.4563097866078034

#### 63 Tile, Cosine
0.43885209713025397

#### 1 Tile, 7x7 local feats, Cosine
0.46622516556291393

#### 4 Tile, 14x14 local feats, Cosine
0.33907284768212403


# VGG16

### 1 Tile, Cosine
0.7479028697571752

### 1 Tile, Euclidian
0.6693156732891836

#### 1 Tile, Cosine, 7 nearest neighbors
0.6285714285714273

#### 4 Tile, Cosine
0.6324503311258295

#### 12 Tile, Cosine
0.5251287711552626

#### 1 Tile, 7x7 local feats, Cosine
0.43178807947019865


# NASNet

### 1 Tile, Cosine
0.7434878587196473


# DenseNet

### 1 Tile, Cosine
0.7434878587196473

### 20 Tiles, Cosine
0.49942604856512457


# InceptionResNetV2

### 1 Tile, Cosine
0.7037527593818991


# Xception

### 1 Tile, Cosine
0.7302428256070645

### 1 Tile, Cosine, non optimal tile size
0.6750551876379697

# InceptionV3

### 1 Tile, Cosine
0.702428256070641

### 1 Tile, Cosine, non optimal tile size
0.6066225165562928

# VGG19

### 1 Tile, Cosine
0.7359823399558499


# MobileNetv2

### 1 Tile, Cosine, input not preprocessed
0.6838852097130254

### 1 Tile, Cosine
0.7249448123620316

### 1 Tile, Euclidian, input not preprocessed
0.6569536423841069

### 4 Tiles, Cosine, input not preprocessed
0.5366445916114811

### 4 Tiles, Cosine
0.5793598233995606

### 12 Tiles, Cosine, input not preprocessed
0.41662987490802034

### 63 Tiles, Cosine, input not preprocessed
0.28294614387331396

In [None]:
from keras.applications import densenet
default_tile_size = 224
model = densenet.DenseNet201(weights="imagenet", include_top=False, input_shape=(default_tile_size, default_tile_size, 3))
model.summary()