## Import statements

In [None]:
import os
import sys 
import cv2
import time
import glob
import shutil
import pickle
import sqlite3
import IPython
import fnmatch
import copyreg
import subprocess
from math import exp
from PIL import Image
import multiprocessing
from pprint import pprint
import concurrent.futures
from tqdm.notebook import tqdm

## Helper functions

In [None]:
def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i:i + n]


def thread_it(thread_function, my_list, tq=True, WORKERS=None):
    # Set worker number to CPU count
    if not WORKERS:
        WORKERS = multiprocessing.cpu_count()
    
    if tq:
        tq = tqdm(total=len(my_list))
    
    # Separate into chunks and execute threaded
    thread_list = chunks(my_list, WORKERS)
    for chunk in thread_list:
        with concurrent.futures.ThreadPoolExecutor(max_workers=WORKERS) as executor:
            for item in chunk:
                executor.submit(thread_function, item)
                if tq:
                    tq.update(1)
    tq.close()



def thread_it_return(thread_function, my_list, tq=True, WORKERS=None):
    # Set worker number to CPU count
    if not WORKERS:
        WORKERS = multiprocessing.cpu_count()
    
    if tq:
        tq = tqdm(total=len(my_list))
        
    results = []
    # Separate into chunks and execute threaded
    thread_list = chunks(my_list, WORKERS)
    for chunk in thread_list:
        with concurrent.futures.ThreadPoolExecutor(max_workers=WORKERS) as executor:
            for item in chunk:
                future = executor.submit(thread_function, item)
                
                return_value = future.result()
                if return_value != None:
                    results.append(return_value)
                    
                if tq:
                    tq.update(1)
    
    tq.close()
    
    return results
    

def show_img_by_path(a, resize=True, size=(320, 240)):
    # I could and probably need to implement image scaling beforehand, for network access
    img = Image.open(a)
    if resize:
        img = img.resize(size=size)
    IPython.display.display(img)


def create_folder(path):
    if not os.path.isdir(path):
        os.makedirs(path)


def move_to(file_list, dest):
    tq = tqdm(total=len(file_list))
    exception_flag = False
    for item in file_list:
        try:
            shutil.move(item, dest)
        except Exception as e:
            print(e)
            exception_flag = True
        tq.update(1)
    tq.close()
    return exception_flag

## Define various variables
#### This includes all paths for image folders.

In [None]:
# These should already exist from the "Download"
compare_dir = "compare_set/"
data_dir = "images/"
points_num = 1

consider_dir = "consider/"
pickles = "pickles/"

dirs = [consider_dir, pickles]
for path in dirs:
    create_folder(path)

## Feature Extraction (Comparison set)

In [None]:
cmd = r"colmap feature_extractor --database_path ./colmap_folder/colmap.db --image_path compare_set/ --SiftExtraction.max_num_features {}".format(points_num)
process = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE)
output, error = process.communicate()
print(output.splitlines()[-1])
print("ERROR:", error)
print("Finished extracting features from Comparison set")

## Feature Matching (Comparison set)

In [None]:
cmd = r"colmap exhaustive_matcher --database_path ./colmap_folder/colmap.db"
process = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE)
output, error = process.communicate()
print(output.splitlines()[-1])
print("ERROR:", error)
print("Finished matching features from Comparison set")

## Database Functions

In [None]:
def create_connection(db_file):
    conn = None
    try:
        conn = sqlite3.connect(db_file, timeout=30000)
    except Error as e:
        print(e)

    return conn

def delete_from(conn, table, column_name, where_value):
    sql = r"DELETE FROM {} WHERE {}='{}'".format(table, column_name, where_value)
    cur = conn.cursor()
    cur.execute(sql)
    conn.commit()

def select_what_from_where(conn, what, table, where_name, where_value):
    cur = conn.cursor()
    row = cur.execute(r"SELECT {} FROM {} WHERE {}='{}'".format(what, table, column_name, where_value)).fetchone()
    conn.commit()
    return row

def select_what_from(conn, what, table):
    cur = conn.cursor()
    cur.execute(r"SELECT {} FROM {}".format(what, table))
    conn.commit()
    rows = cur.fetchall()
    conn.commit()
    return rows

def decrement_cameras(conn):
    sql = r"UPDATE sqlite_sequence SET seq = seq - 1 WHERE name='cameras'"
    cur = conn.cursor()
    cur.execute(sql)
    conn.commit()

def decrement_images(conn):
    sql = r"UPDATE sqlite_sequence SET seq = seq - 1 WHERE name='images'"
    cur = conn.cursor()
    cur.execute(sql)
    conn.commit()

def remove_img_from_db(conn, filename, compare_images, delete=False):
    # Retrieve img_id and cam_id for image to delete
    images_row = select_from_where(conn, "images", "name", filename)
    img_id = images_row[0]
    cam_id = images_row[2]

    # Check if that's the only image referencing that camera.
    images_rows = select_all(conn, "images")
    only_cam_ref = True
    for row in images_rows:
        if row[2] == cam_id and row[0] != img_id:
            # Cannot delete that camera
            only_cam_ref = False

    if only_cam_ref:
        delete_from(conn, "cameras", "camera_id", cam_id)
        decrement_cameras(conn)

    # Delete any images, descriptors and keypoints for one data image record.
    delete_from(conn, "images", "name", filename)
    delete_from(conn, "descriptors", "image_id", img_id)
    delete_from(conn, "keypoints", "image_id", img_id)


    # Delete all matches and two_view_geometries for one data image to all comparison images
    for compare_img in compare_images:
        compare_filename = compare_img.split("/")[-1]
        try:
            return_val = select_from_where(conn, "images", "name", compare_filename)
            compare_img_id = return_val[0]
        except Exception:
            # If the return values is none, assume no matches for this image.
            #print(compare_filename, return_val)
            pass

        pair_id = image_ids_to_pair_id(img_id, compare_img_id)

        delete_from(conn, "matches", "pair_id", pair_id)
        delete_from(conn, "two_view_geometries", "pair_id", pair_id)

    decrement_images(conn)


In [None]:
#https://colmap.github.io/database.html
#https://github.com/colmap/colmap/blob/dev/scripts/python/database.py
MAX_IMAGE_ID = 2**31 - 1

def pair_id_to_image_ids(pair_id):
    image_id2 = pair_id % MAX_IMAGE_ID
    image_id1 = (pair_id - image_id2) / MAX_IMAGE_ID
    return image_id1, image_id2

def load_matches(conn):
    matches = {}
    id_to_img = {}
    for row in select_what_from(conn, "image_id, name", "images"):
        img_id = row[0]
        name = row[1]
        id_to_img[img_id] = name

    for img in id_to_img.values():
        matches[img] = {}

    for row in select_what_from(conn, "pair_id, rows", "matches"):
        pair_id = row[0]
        img1id, img2id = pair_id_to_image_ids(pair_id)
        num_matches = row[1]
        img1name, img2name = id_to_img[img1id], id_to_img[img2id]
        matches[img1name][img2name] = num_matches
        matches[img2name][img1name] = num_matches
    
    conn.close()

    return matches

conn = create_connection("./colmap_folder/colmap.db")
compare_matches = load_matches(conn)
conn.close()

In [None]:
def total_matches(matches):
    totals = {}
    for key in matches.keys():
        totals[key] = []
    for key, value in matches.items():
        length = len(value)
        for _, num_matches in value.items():
            totals[key].append(num_matches)
    for key in totals.keys():
        totals[key] = sum(totals[key])

    return totals

compare_total_matches = total_matches(compare_matches)
# If one image doesn't match at all, either eliminate it (and rerun) or find more images for the set.
pprint(compare_total_matches)

for key, val in compare_total_matches.items():
    if val == 0:
        print(key, "has no mathces.")
        #show_img_by_path(compare_dir+key)

## Calculate threshold from comparison set 

In [None]:
def get_thr_from_compare(totals, multipler=1):
    thr_per_image = {}
    
    tq = tqdm(total=len(totals))
    for img1, item in totals.items():
        for img2, val in item.items():
            thr = val * multipler
            
            # Add thr to dict
            if img2 not in thr_per_image.keys():
                thr_per_image[img2] = [thr]
            else:
                thr_per_image[img2].append(thr)
        tq.update(1)

    # Get the average feature match for a valid image for each image in the reference set to every other image
    for key in thr_per_image.keys():
        val = thr_per_image[key]
        thr_per_image[key] = sum(val)/len(val)
        
    return thr_per_image

In [None]:
thr_per_image = get_thr_from_compare(compare_matches, multipler=0.65)
pprint(thr_per_image)

## Apply threshold to data directory set function

In [None]:
def get_threshold_items(totals, thr_per_image, at_least_img_num=5, show=False):
    values = {}
    #x, y = [], []
    tq = tqdm(total=len(totals))
    for img1, item in totals.items():
        rating = 0
        
        for img2, val in item.items():
            # If the "Data" image is under the thr for the comparison image
            if val > thr_per_image[img2]:
                # Show which images from the comparison set, the data image is under thr for, and how much
                rating += 1
        
        if rating >= at_least_img_num:
            rating = 1
        else:
            rating = 0 
        #rating = sigmoid(rating)

        values[img1] = rating
        
        tq.update(1)
    """    
    if show:
        %matplotlib notebook
        import matplotlib.pyplot as plt
        plt.figure(figsize=(200, 200))
        plt.plot(y, x, "o", color="black")
        plt.plot([x for x in range(len(x))], [confidence for x in range(len(x))], '-ok', color="red")
        plt.xlabel("Number of features")
        plt.ylabel("Confidence")
        plt.show()
        print("Average is ", confidence)
    """
        
    return values

## Calculating number of matches for each image (Data set) to entire comparison set

In [None]:
#https://colmap.github.io/database.html
#https://github.com/colmap/colmap/blob/dev/scripts/python/database.py
MAX_IMAGE_ID = 2**31 - 1

def image_ids_to_pair_id(image_id1, image_id2):
    if image_id1 > image_id2:
        image_id1, image_id2 = image_id2, image_id1
    return image_id1 * MAX_IMAGE_ID + image_id2

def pair_id_to_image_ids(pair_id):
    image_id2 = pair_id % MAX_IMAGE_ID
    image_id1 = (pair_id - image_id2) / MAX_IMAGE_ID
    return image_id1, image_id2

data_matches = {}
conn = create_connection("./colmap_folder/colmap.db")
data_images = glob.glob(data_dir+"*.jpg")
data_pair_matching = "./colmap_folder/pairs_to_match.txt"
compare_ids_to_img_path = pickles+"compare_ids_to_img.pickle"
comparison_pair_ids_path = pickles+"comparison_pair_ids.pickle"


# Retrieve all img_ids and filenames for comparison set
if os.path.isfile(compare_ids_to_img_path):
    comparison_only_pair_ids = pickle.load(open(compare_ids_to_img_path, "rb"))
    print("LOADED compare_ids_to_img FROM PICKLE")
else:
    compare_ids_to_img = {}
    return_val = select_what_from(conn, "image_id, name", "images")
    for row in return_val:
        compare_img_id = str(int(row[0]))
        compare_img_filename = row[1]
        compare_ids_to_img[compare_img_id] = compare_img_filename
    pickle.dump(compare_ids_to_img, open(compare_ids_to_img_path, "wb"))

print("Length of 'compare_ids_to_img':", len(compare_ids_to_img.keys()))

# Get all comparison only pair_ids
if os.path.isfile(comparison_pair_ids_path):
    comparison_only_pair_ids = pickle.load(open(comparison_pair_ids_path, "rb"))
    print("LOADED comparison_only_pair_ids FROM PICKLE")
else:    
    comparison_only_pair_ids = []
    return_val = select_what_from(conn, "pair_id", "matches")
    for row in return_val:
        pair_id = str(int(row[0]))
        comparison_only_pair_ids.append(pair_id)
    pickle.dump(comparison_only_pair_ids, open(comparison_pair_ids_path, "wb"))

print("Length of 'comparison_only_pair_ids':", len(comparison_only_pair_ids))

# Write all image pairings for each data image to each comparison image but not to other data images.
for data_img in data_images:
    data_filename = data_img.split("/")[-1]

    for compare_img in compare_ids_to_img.values():
        #to_write = data_filename + " " + compare_img + "\n"
        to_write = data_filename + " " + compare_img + "\n"
        f = open(data_pair_matching, "a").write(to_write)
"""
cmd = r"colmap feature_extractor --database_path ./colmap_folder/colmap.db --image_path ./images --SiftExtraction.max_num_features {}".format(points_num)
process = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE)
output, error = process.communicate()
p_status = process.wait()
print("EXTRACTION ERROR: ", error)
"""

# Retrieve all img_ids and filenames for data set
data_ids_to_img = {}
return_val = select_what_from(conn, "image_id, name", "images")
for row in return_val:
    data_img_id = str(int(row[0]))
    data_img_filename = row[1]
    # If not a comparison image
    if data_img_id not in compare_ids_to_img.keys():
        data_ids_to_img[data_img_id] = data_img_filename

# HAVE TO FUCKING CLOSE AND REOPEN THE DATABASE NOW!
conn.close()

"""
# Match all image pairs for data set.
cmd = r"colmap matches_importer --database_path ./colmap_folder/colmap.db --match_list_path ./colmap_folder/pairs_to_match.txt --SiftMatching.max_num_matches 16384"
process = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE)
output, error = process.communicate()
p_status = process.wait()
print("MATCHING ERROR: ", error)
#print(output)
"""
# WILL NEED FOR REPEAT
#os.remove(data_pair_matching)

# REOPEN THAT SHIT
conn = create_connection("./colmap_folder/colmap.db")

from numba import jit

@jit(parallel=True)
def get_matches_from_db(return_val):
    # Get all match numbers
    matches = {}

    # Prefill matches
    for image in data_images:
        img = image.split("/")[-1]
        matches[img] = {}
        
    tq = tqdm(total=len(return_val))
    #print(compare_ids_to_img)
    for row in return_val:
        pair_id = str(int(row[0]))
        num_matches = row[1]
        #print(row)
        if pair_id in comparison_only_pair_ids:
            continue
        else:
            img1, img2 = pair_id_to_image_ids(int(pair_id))
            img1, img2 = str(int(img1)), str(int(img2))
            # img1 is comparison image
            if img1 in compare_ids_to_img.keys():
                img1 = compare_ids_to_img[img1]
                img2 = data_ids_to_img[img2]
                matches[img2][img1] = num_matches
            # img2 is comparison image
            elif img2 in compare_ids_to_img.keys():
                img2 = compare_ids_to_img[img2]
                img1 = data_ids_to_img[img1]
                matches[img1][img2] = num_matches
        tq.update(1)
    return matches

#pprint(matches)
return_val = select_what_from(conn, "pair_id, rows", "matches")
matches = get_matches_from_db(return_val)
conn.close()

In [None]:
ratings = get_threshold_items(matches, thr_per_image, at_least_img_num=10, show=False)
pprint(ratings)

In [None]:
#pprint(ratings)
under_confidence = []
confidence = sum(ratings.values()) / len(ratings)
print("CONFIDENCE: {}".format(confidence))
for key, val in ratings.items():
    if val < confidence:
        print(key, "@", val)
        under_confidence.append(key)
        #show_img_by_path(key, size=(75,75))

print("{} out of {} images are under confident".format(len(under_confidence), len(ratings)))

In [None]:
def move_threshold_items(under, consider_folder, do_print=False):
    for val in under:
        val = data_dir+val
        if os.path.isfile(val):
            filename = val.split("/")[-1]
            path = os.path.join(consider_folder, filename)
            try:
                shutil.move(val, consider_folder)
            except Exception as e:
                print(e)
        else:
            print(val, "doesn't exist")

move_threshold_items(under_confidence, consider_dir)

# An idea; I could possibly smush all the feature data for the comparison images into one object... That would give me a nice percentage as an overall match.