In [1]:
import os
import cv2
import numpy as np
import pandas as pd
import csv

## Config

In [None]:
TRAINING_DATA_PATH = './TargetData-TF-Train.csv'
VALIDATION_DATA_PATH = './TargetData-TF-Valid.csv'
VALIDATION_DATA_PREDICTION_PATH = './TargetData-TF-Valid-Prediction.csv'
TEST_DATA_PATH = './TargetData-TF-Test.csv'

IMAGE_FOLDER_PATH = './TargetImages-TF/'

TARGET_CLASSES_MAPPING = {}
TARGET_CLASSES_MAPPING[10419] = 'Qutub Minar'
TARGET_CLASSES_MAPPING[47378] = 'Eiffel Tower'
TARGET_CLASSES_MAPPING[168098] = 'Golden Gate Bridge'
TARGET_CLASSES_MAPPING[162833] = 'Pakistan Monument'
TARGET_CLASSES_MAPPING[1924] = 'Niagara River'

SIFT_FACTOR = 0.50
SIFT = cv2.xfeatures2d.SIFT_create()

index_params = dict(algorithm=0, trees=5)
search_params = dict()

## Dataset

In [3]:
def load_data(filename):
    df = pd.read_csv(filename)
    
    id_mapping = {}
    id_sift_des = {}
    
    
    for index in range(len(df)):
        image_id = df['id'][index]
        image_label = df['landmark_id'][index]
        
        if index % 25 == 0:
            print(f'Read {index} images')
            
        id_mapping[image_id] = image_label
        image_path = os.path.join(IMAGE_FOLDER_PATH, image_id + '.jpg')        
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)        
        _, des_sift = SIFT.detectAndCompute(image, None)
        
        id_sift_des[image_id] = des_sift
        
    return id_mapping, id_sift_des

In [4]:
train_id_mapping, train_id_sift_des = load_data(TRAINING_DATA_PATH)

Read 0 images
Read 25 images
Read 50 images
Read 75 images
Read 100 images
Read 125 images
Read 150 images
Read 175 images
Read 200 images
Read 225 images


In [5]:
valid_id_mapping, valid_id_sift_des = load_data(VALIDATION_DATA_PATH)

Read 0 images
Read 25 images
Read 50 images
Read 75 images
Read 100 images


In [6]:
def good_matching(matches, factor):    
    good_points = []
    for m, n in matches:
        if m.distance < (factor*n.distance):
            good_points.append(m)    
    
    return len(good_points)


def good_matching(des_a, des_b):
    flann = cv2.FlannBasedMatcher(index_params, search_params)
    matches = flann.knnMatch(des_a, des_b, k=2)

    return matches


def tf_predictor(reference_id_mapping, reference_id_sift_des, sample_sift_des):
    
    good_matches_sift = {}
    for key,values in TARGET_CLASSES_MAPPING.items():
        good_matches_sift[key] = 0
        
    gm_count = 0
    max_gm_count = 0
    max_gm_label = -1
    max_gm_reference = ''
    for image_id, image_label in reference_id_mapping.items():
        
        gm_count = good_matching(
            matching(reference_id_sift_des[image_id], sample_sift_des),
            SIFT_FACTOR)
        
        good_matches_sift[image_label] += gm_count
        
        if gm_count > max_gm_count:
            max_gm_count = gm_count
            max_gm_label = image_label
            max_gm_reference = image_id            
        
        
    max_agg_matches = 0
    max_agg_label = -1
    for image_label, good_match_count in  good_matches_sift.items():
        if good_match_count > max_agg_matches:
            max_agg_matches = good_match_count
            max_agg_label = image_label            
    
    return max_agg_label, max_gm_label, max_gm_reference

## Prediction

In [None]:
df_valid = pd.read_csv(VALIDATION_DATA_PATH)
df_valid['prediction_agg'] = 'Missing Prediction'
df_valid['prediction_max'] = 'Missing Prediction'
df_valid['prediction_max_ref'] = 'Missing Prediction'

total_test_correct_agg = 0
total_test_correct_max = 0
index = 0

for index in range(len(df_valid)):
    image_id = df_valid['id'][index]
    image_label = df_valid['landmark_id'][index]
    
    max_agg_label, max_gm_label, max_gm_reference= tf_predictor(
        train_id_mapping, train_id_sift_des, valid_id_sift_des[image_id])
    
    
    if image_label == max_agg_label:
        total_test_correct_agg += 1
    
    if image_label == max_gm_label:
        total_test_correct_max += 1
        
    
    df_valid['prediction_agg'][index] = max_agg_label
    df_valid['prediction_max'][index] = max_gm_label
    df_valid['prediction_max_ref'][index] = max_gm_reference

    if index % 10 == 0:
        print(f'Total Images: {index + 1}, Correct Agg Count: {total_test_correct_agg}, Correct Max Count: {total_test_correct_max}')



In [9]:
print(f'Total Images: {index + 1}, Correct Agg Count: {total_test_correct_agg}, Correct Max Count: {total_test_correct_max}')
df_valid.to_csv(VALIDATION_DATA_PREDICTION_PATH, sep=',')

Total Images: 125, Correct Agg Count: 69, Correct Max Count: 60


## Single Predictor

In [None]:
valid_image_id = '6335b108046384ea'

max_agg_label, max_gm_label, max_gm_reference= tf_predictor(
    train_id_mapping, train_id_sift_des, valid_id_sift_des['6335b108046384ea'])


print(f'Label : {valid_id_mapping[valid_image_id]}')
print(f'Max Agg Label : {max_agg_label}')
print(f'Max GM Label : {max_gm_label}')
print(f'Max GM Reference Image Id : {max_gm_reference}')