In [7]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle
import cv2
from tqdm import tqdm
from itertools import combinations

In [2]:
dist = pd.read_csv('./resnet101_matches_distance.csv').values
fname = pd.read_csv('./resnet101_matches_filename.csv').values

In [3]:
good = pd.read_csv('./sift_matches_distance.csv').values
fgood = pd.read_csv('./sift_matches_filename.csv').values

In [4]:
train_xy = pd.read_csv('train.csv', index_col=0)
test_path = pd.read_csv('imagenames.csv')['id'].values

In [5]:
# Limit the candidate pictures to a limited decrease only, comparing to the top one
extract_match = lambda i, thresh: fname[i,np.argwhere(dist[i] < dist[i,0] + thresh)]

In [None]:
FOV_X = 73.3*np.pi/180
FOV_Y = 53.1*np.pi/180

cx = 680/2
cy = 490/2

fx = cx/np.tan(FOV_X/2)
fy = cy/np.tan(FOV_Y/2)

K = np.array([[fx,0,cx],
              [0,fy,cy],
              [0,0,1]])

In [None]:
test = ...
coords = ...
fnames = ...
cam_matrix = K
ratio = 0.6
max_range = 1.2

sift = cv2.SIFT_create()
# FLANN parameters and initialize
FLANN_INDEX_KDTREE = 1
index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
search_params = dict(checks=50)   # or pass empty dictionary
flann = cv2.FlannBasedMatcher(index_params,search_params)

test_img = cv2.imread('./test/' + test + '.jpg')
kp_test,des_test = sift.detectAndCompute(test_img,None)

kp_train = []
des_train = []
train_vecs = []
cluster_central = None
cluster_count = []

for train in fnames:
    train_img = cv2.imread('./train/' + train + '.jpg')
    kp,des = sift.detectAndCompute(train_img,None)
    kp_train.append(kp)
    des_train.append(des)

    # Matching descriptor using KNN algorithm
    matches = flann.knnMatch(des,des_test,k=2)

    # Store all good matches as per Lowe's Ratio test.
    good = [m for m,n in matches if m.distance < ratio*n.distance]
    pts_train = np.float32([kp[m.queryIdx].pt for m in good]).reshape(-1,1,2)
    pts_test = np.float32([kp_test[m.trainIdx].pt for m in good]).reshape(-1,1,2)
    E,_ = cv2.findEssentialMat(pts_train,pts_test,cam_matrix,method=cv2.FM_LMEDS)
    _,_,T = cv2.decomposeEssentialMat(E)
    train_vecs.append(T[[0,2]])

for pt1,pt2 in combinations(range(len(fnames)),2):
    displacement = (coords[pt2] - coords[pt1]).reshape(2,1)
    unit_vectors = np.append(train_vecs[pt1],train_vecs[pt2], axis=1)
    const = np.linalg.solve(unit_vectors,displacement)[0,0]
    loc = coords[pt1] + const * train_vecs[pt1]
    if cluster_central is None:
        cluster_centrals = np.array([loc])
        cluster_count.append(1)
    # Get distance from point to each cluster
    distances = np.sum((cluster_centrals - loc)**2, axis=1)**0.5
    nearest = np.argmin(distances)
    # If point is far away from clusters, it's on its own cluster
    if distances[nearest] > max_range:
        cluster_centrals = np.append(cluster_centrals,[loc], axis=0)
        cluster_count.append(1)
    # If not, it belongs to cluster with nearest centeal. Update that one
    else:
        cluster_centrals[nearest] = (cluster_centrals[nearest] 
                                    * cluster_count[nearest] 
                                    + loc) / (cluster_count[nearest]+1)
        cluster_count[nearest] += 1

    biggest_cluster = np.argmax(cluster_count)

In [None]:
sift = cv2.SIFT_create()
    
# FLANN parameters and initialize
FLANN_INDEX_KDTREE = 1
index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
search_params = dict(checks=50)   # or pass empty dictionary
flann = cv2.FlannBasedMatcher(index_params,search_params)

def displacement_calculation(test, coords, fnames, cam_matrix, ratio=0.6, max_range=1.2):
    test_img = cv2.imread('./test/' + test + '.jpg')
    kp_test,des_test = sift.detectAndCompute(test_img,None)
    
    kp_train = []
    des_train = []
    train_vecs = []
    cluster_central = None
    cluster_count = []

    for train in fnames:
        train_img = cv2.imread('./train/' + train + '.jpg')
        kp,des = sift.detectAndCompute(train_img,None)
        kp_train.append(kp)
        des_train.append(des)

        # Matching descriptor using KNN algorithm
        matches = flann.knnMatch(des,des_test,k=2)

        # Store all good matches as per Lowe's Ratio test.
        good = [m for m,n in matches if m.distance < ratio*n.distance]
        pts_train = np.float32([kp[m.queryIdx].pt for m in good]).reshape(-1,1,2)
        pts_test = np.float32([kp_test[m.trainIdx].pt for m in good]).reshape(-1,1,2)
        E,_ = cv2.findEssentialMat(pts_train,pts_test,cam_matrix,method=cv2.FM_LMEDS)
        _,_,T = cv2.decomposeEssentialMat(E)
        train_vecs.append(T[[0,2]])

    for pt1,pt2 in combinations(range(len(fnames)),2):
        displacement = (coords[pt2] - coords[pt1]).reshape(2,1)
        unit_vectors = np.append(train_vecs[pt1],train_vecs[pt2], axis=1)
        const = np.linalg.solve(unit_vectors,displacement)[0,0]
        loc = coords[pt1] + const * train_vecs[pt1]
        if cluster_central is None:
            cluster_centrals = np.array([loc])
            cluster_count.append(1)
        # Get distance from point to each cluster
        distances = np.sum((cluster_centrals - loc)**2, axis=1)**0.5
        nearest = np.argmin(distances)
        # If point is far away from clusters, it's on its own cluster
        if distances[nearest] > max_range:
            cluster_centrals = np.append(cluster_centrals,[loc], axis=0)
            cluster_count.append(1)
        # If not, it belongs to cluster with nearest centeal. Update that one
        else:
            cluster_centrals[nearest] = (cluster_centrals[nearest] 
                                       * cluster_count[nearest] 
                                       + loc) / (cluster_count[nearest]+1)
            cluster_count[nearest] += 1

        biggest_cluster = np.argmax(cluster_count)
        return cluster_central[biggest_cluster]

In [25]:
v0 = np.array([5,6]).reshape(2,1)
v1 = np.array([3,4]).reshape(2,1)
v2 = np.array([1,2]).reshape(2,1)
v12 = np.append(v1,-v2,axis=1)
#v1
np.linalg.solve(v12,v0)[0,0]

2.0

In [None]:
# Similar Histograms Online Clustered K-Means (SHOCK) 
def hist_onl_kmeans(data, hist, fnames, max_clusters, max_range, min_size = 1, take_best_hist = False):
    def get_hist(fn):
        curr_img = cv2.imread('./train/' + fn + '.jpg')
        d_hist = cv2.calcHist([curr_img],[0],None,[256],[0,256])
        return cv2.compareHist(hist,d_hist,cv2.HISTCMP_INTERSECT)
    
    cluster_centrals = None
    cluster_elems = []
    cluster_filename = []
    cluster_hist = []
    cluster_count = []
    for i,coord in enumerate(data):
        # Adding the first point as the first cluster central
        if cluster_centrals is None:
            cluster_centrals = np.array([coord])
            cluster_elems.append([coord])
            cluster_filename.append([fnames[i]])
            compared_hist = get_hist(fnames[i])
            cluster_hist.append(compared_hist)
            cluster_count.append(1)
            continue
        # Get distance from point to each cluster
        distances = np.sum((cluster_centrals - coord)**2, axis=1)**0.5
        nearest = np.argmin(distances)
        # If point is far away from clusters, it's on its own cluster
        if distances[nearest] > max_range:
            # Stop when max number of clusters reached and have a big enough cluster
            if cluster_centrals.shape[0] >= max_clusters and max_clusters > 0:
                if np.max(cluster_count) >= min_size: break
                # Not big enough clusters means that the CNN is messed up
                return None,None,None
            cluster_centrals = np.append(cluster_centrals,[coord], axis=0)
            cluster_elems.append([coord])
            cluster_filename.append([fnames[i]])
            compared_hist = get_hist(fnames[i])
            cluster_hist.append(compared_hist)
            cluster_count.append(1)
        # If not, it belongs to cluster with nearest centeal. Update that one
        else:
            cluster_centrals[nearest] = (cluster_centrals[nearest] 
                                       * cluster_count[nearest] 
                                       + coord) / (cluster_count[nearest]+1)
            cluster_elems[nearest].append(coord)
            cluster_filename[nearest].append(fnames[i])
            compared_hist = get_hist(fnames[i])
            if compared_hist > cluster_hist[nearest]: cluster_hist[nearest] = compared_hist
            cluster_count[nearest] += 1
    # Return the coordinates, filenames, and center of the largest cluster
    #print(';'.join([f'{cluster_centrals[c]}: {cluster_hist[c]}' for c,_ in enumerate(cluster_centrals)]))
    biggest_cluster = np.argmax(cluster_count)
    similar_hist_cluster = np.argmax(cluster_hist)
    best_cluster = 0 if biggest_cluster == 0 and not take_best_hist else similar_hist_cluster
    return cluster_elems[best_cluster], \
           cluster_filename[best_cluster], \
           cluster_centrals[best_cluster]

In [6]:
# Similar Histograms Online Clustered K-Means (SHOCK) 
def sift_onl_kmeans(data, hist, fnames, max_clusters, max_range, min_size = 1, take_best_hist = False):
    def get_hist(fn):
        curr_img = cv2.imread('./train/' + fn + '.jpg')
        d_hist = cv2.calcHist([curr_img],[0],None,[256],[0,256])
        return cv2.compareHist(hist,d_hist,cv2.HISTCMP_INTERSECT)
    
    cluster_centrals = None
    cluster_elems = []
    cluster_filename = []
    cluster_hist = []
    cluster_count = []
    for i,coord in enumerate(data):
        # Adding the first point as the first cluster central
        if cluster_centrals is None:
            cluster_centrals = np.array([coord])
            cluster_elems.append([coord])
            cluster_filename.append([fnames[i]])
            compared_hist = get_hist(fnames[i])
            cluster_hist.append(compared_hist)
            cluster_count.append(1)
            continue
        # Get distance from point to each cluster
        distances = np.sum((cluster_centrals - coord)**2, axis=1)**0.5
        nearest = np.argmin(distances)
        # If point is far away from clusters, it's on its own cluster
        if distances[nearest] > max_range:
            # Stop when max number of clusters reached and have a big enough cluster
            if cluster_centrals.shape[0] >= max_clusters and max_clusters > 0:
                if np.max(cluster_count) >= min_size: break
                # Not big enough clusters means that the CNN is messed up
                return None,None,None
            cluster_centrals = np.append(cluster_centrals,[coord], axis=0)
            cluster_elems.append([coord])
            cluster_filename.append([fnames[i]])
            compared_hist = get_hist(fnames[i])
            cluster_hist.append(compared_hist)
            cluster_count.append(1)
        # If not, it belongs to cluster with nearest centeal. Update that one
        else:
            cluster_centrals[nearest] = (cluster_centrals[nearest] 
                                       * cluster_count[nearest] 
                                       + coord) / (cluster_count[nearest]+1)
            cluster_elems[nearest].append(coord)
            cluster_filename[nearest].append(fnames[i])
            compared_hist = get_hist(fnames[i])
            if compared_hist > cluster_hist[nearest]: cluster_hist[nearest] = compared_hist
            cluster_count[nearest] += 1
    # Return the coordinates, filenames, and center of the largest cluster
    #print(';'.join([f'{cluster_centrals[c]}: {cluster_hist[c]}' for c,_ in enumerate(cluster_centrals)]))
    biggest_cluster = np.argmax(cluster_count)
    similar_hist_cluster = np.argmax(cluster_hist)
    best_cluster = 0 if biggest_cluster == 0 and not take_best_hist else similar_hist_cluster
    return cluster_elems[best_cluster], \
           cluster_filename[best_cluster], \
           cluster_centrals[best_cluster]

## Exhaustive SIFT, vote between best match, biggest cluster and most similar color spectrum

In [9]:
# COCK params for images with few features
MIN_MATCHES = 5
threshold = 5
max_clusters = 5
max_radius = 15#7
min_size = 1
max_match_keep = 0.4

# FLANN specs
FLANN_INDEX_KDTREE = 1
index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
search_params = dict(checks=50)
flann = cv2.FlannBasedMatcher(index_params,search_params)

centroids = []
for i,test in enumerate(tqdm(test_path)):
    test_img = cv2.imread('./test/' + test + '.jpg')
    t_hist = cv2.calcHist([test_img],[0],None,[256],[0,256])

    img_idx = extract_match(i,threshold).flatten()
    
    sift_fname = fgood[i,:]
    sift_match = good[i,:]

    # Weak finding: Do COCK instead
    if sift_fname[0] is np.nan:
        coords = train_xy.loc[img_idx].values
        _, _, centroid = hist_onl_kmeans(coords, t_hist, img_idx, -1, max_radius, min_size, take_best_hist=True)
        centroids.append(centroid)
        continue

    # Basically get all cnn indices in order of best SIFT matches
    matchings = []
    good_m = []
    for match_idx,m in enumerate(sift_fname):
        if m in img_idx:
            matchings.append(m)
            good_m.append(sift_match[match_idx])
    # Once again do thresholding
    good_match = [m for idx,m in enumerate(matchings) 
                  if good_m[idx] > good_m[0]*max_match_keep]

    # Weak finding: Do COCK instead
    if good_m[0] < MIN_MATCHES:
        coords = train_xy.loc[img_idx].values
        _, _, centroid = hist_onl_kmeans(coords, t_hist, img_idx, -1, max_radius, min_size, take_best_hist=True)
        centroids.append(centroid)
        continue

    coords = train_xy.loc[good_match].values
    _, _, centroid = hist_onl_kmeans(coords, t_hist, good_match, max_clusters, max_radius, min_size)
    
    centroids.append(centroid)

100%|██████████| 1200/1200 [01:22<00:00, 14.52it/s]


## File export

In [10]:
out = pd.DataFrame(centroids,index=test_path)
out.to_csv('Results/COCK_SIFT_voted_5_5_5_15_1_04.csv',index_label='id',header=['x','y'])