In [2]:
import cv2 
import matplotlib.pyplot as plt
import numpy as np
import collections
import time
import glob
import hashlib

In [3]:
def matchCrop(img,crop,threshold=0.8):
    if threshold > 1.0 or threshold < 0.0:
        return -1    
    
    img = cv2.GaussianBlur(img,(5,5),0)
    crop = cv2.GaussianBlur(crop,(5,5),0)
    
    ih,iw = img.shape[:2]
    h,w = crop.shape[:2]
    
    if ih//h > 8 or iw//w > 8:
        return -1
    
    q = collections.deque([[-2,0,0],[-2,0,0]],2)

    if iw/ih > w/h :
        crop = cv2.resize(crop,(int(w*ih/h),ih))
    else:
        crop = cv2.resize(crop,(iw,int(h*iw/w)))

    for scale in range(20):

        h,w = crop.shape[:2]

        if ih//h > 8 or iw//w > 8:
            break

        res = cv2.matchTemplate(img,crop,cv2.TM_CCOEFF_NORMED)

        _, max_val, _, max_loc = cv2.minMaxLoc(res)
        bottom_right = (max_loc[0]+w , max_loc[1]+h)

        if max_val > q[-1][0]:
            if max_val > threshold:
                q.append([max_val,max_loc,bottom_right])
        else:
            break

        h = int(h*0.86)
        w = int(w*0.86)
        crop = cv2.resize(crop,(w,h))
        
    if q[-1][0] > threshold:
        return q[-1]
    else:
        return -1

In [4]:
def md5(fname):
    hash_md5 = hashlib.md5()
    with open(fname, "rb") as f:
        for chunk in iter(lambda: f.read(4096), b""):
            hash_md5.update(chunk)
    return hash_md5.hexdigest()

In [56]:
def getUniqueCrops(crops_path):
    
    unique = {}
    for fname in glob.glob(crops_path):
        checksum = md5(fname)
        if checksum in unique:
            unique[checksum].append(fname)
        else:
            unique[checksum] = [fname]
            
    return unique

def getCropsAssociation(im_path,keys):
    
    matches = []    
    im = cv2.imread(im_path)

    for key in keys:
        cr = cv2.imread(unique_crops[key][0])
        result = matchCrop(im,cr)

        if result != -1:
            max_val,top_left,bottom_right = result
            
            if key not in found_crops:
                found_crops.append(key)
                
            for val in unique_crops[key]:
                crop_name = val.split('/')[1]
                matches.append((crop_name,[top_left[0],top_left[1],bottom_right[0],bottom_right[1]]))
    
    return matches
    

In [62]:
CROPS_PATH = 'crops/'
IMAGES_PATH = 'images/'

found_crops = []
not_found = []
match_dict = {}

unique_crops = getUniqueCrops(CROPS_PATH+'*')
keys = unique_crops.keys()

t1 = time.time()

images = [path.split('/')[1] for path in glob.glob(IMAGES_PATH+'*')]

for image_name in images:
    print(IMAGES_PATH+image_name)
    match_dict[image_name] = getCropsAssociation(IMAGES_PATH+image_name,keys)

for key in keys:
    if key not in found_crops:
        cr_names = []
        for path in unique_crops[key]:
            not_found.append((path.split('/')[1],[]))
            
match_dict['na'] = not_found

t2 = time.time()

print(t2-t1," secs")

images/ad4d275c-f960-4ff6-8557-3f2fc8a2b938.jpg
images/53dc69fe-dec1-4318-b76a-1a91840dec80.jpg
images/ce0eadb1-f1ba-4bd6-aad1-b5a8ea9c9533.jpg
images/2cb5ebc9-b1e9-4890-a477-691530b865e2.jpg
images/0cdc518f-d4af-4a93-9fff-1f095c619a40.jpg
images/b2f34e6a-047f-4b6f-b974-4758cfcb9786.jpg
images/a0d5345b-2a5d-4457-87c9-7ad69d092d6b.jpg
images/f467dfe6-ef73-4bb2-a370-72984c1180a9.jpg
images/933685e4-afd9-4499-b72e-da060b5e64e4.jpg
images/6c606013-344a-4cf9-8220-26e7d21681cf.jpg
images/96ce811a-b2ad-4c5c-974f-453b3efc77a4.jpg
images/1b7afaaf-992d-40ae-907f-d4deaa7adf9f.jpg
images/8c74e428-da51-4446-897c-86071042752e.jpg
images/0313d744-6a9f-4ce5-bad8-8987c7686861.jpg
images/4a56bd57-1cdd-4ae0-85a7-208dad0ecac1.jpg
images/9253badc-8b45-451e-875a-e0612eec5d12.jpg
images/2b90572d-1d13-4237-991b-b163340c5d04.jpg
images/4e43756d-4033-498b-b9f7-a6546c412f2c.jpg
images/5c773ad9-4174-4b96-9bec-e0838b806cbf.jpg
images/bcd944a7-3eda-4837-a15e-867d62a4febb.jpg
images/71038f21-2854-4ccb-963b-0492d1ac7

images/957882b0-68ee-435e-8e62-e0c5d1d53217.jpg
images/2b780208-9d9f-4251-adae-84918240529e.jpg
images/befb5fe7-4614-4d05-bbab-b77d12cd10db.jpg
images/b9677b09-ac72-45d9-a2c2-457d84df7b9b.jpg
images/10d5a9e7-27cb-407c-bfb6-f569002248fe.jpg
images/666b6eb3-cb69-4c2d-9f57-581a8b77b759.jpg
images/beee6dbf-6a90-4088-9705-a74781552dfc.jpg
images/622367a8-1ccb-4cb5-8968-6a66ccb83046.jpg
images/e1502824-a1bd-4e8a-8dd0-62d7d3106156.jpg
images/d106508e-efea-40d8-9be5-e128c16ea281.jpg
images/f39c8db5-883a-4206-9f04-3dd6d5f605c4.jpg
images/cfd1a6e4-61f2-4e90-92ae-0f2683ec7ddd.jpg
images/837933cf-9d07-4665-a987-5e78ac952d30.jpg
images/339c9e70-d0a3-47d8-bf80-e94e75f54b90.jpg
images/2a226b01-9c27-49cb-ab33-2659d9520c33.jpg
images/feb85275-b148-47f1-a159-e27257bf3705.jpg
images/041b2165-e923-46ea-ae3d-aab954aa0fb3.jpg
images/5b5a2ac8-9de5-4f5c-a79c-e7f5fe5ce0d4.jpg
images/ed0cf422-9e83-4586-b2a1-65829e52224d.jpg
images/61fb2c2c-6823-4927-b88f-0c2c7053b9f5.jpg
images/d3be4bd0-a475-4a65-b6a8-518133c38

In [63]:
match_dict

{'ad4d275c-f960-4ff6-8557-3f2fc8a2b938.jpg': [],
 '53dc69fe-dec1-4318-b76a-1a91840dec80.jpg': [('95a16ec5-3968-4f4d-87d7-49969f9141ba.jpg',
   [56, 0, 101, 95]),
  ('1b1a3b4b-a468-4e8f-ab0c-e4e495154bf6.jpg', [260, 0, 327, 206])],
 'ce0eadb1-f1ba-4bd6-aad1-b5a8ea9c9533.jpg': [],
 '2cb5ebc9-b1e9-4890-a477-691530b865e2.jpg': [],
 '0cdc518f-d4af-4a93-9fff-1f095c619a40.jpg': [('8986f73d-753e-406b-a43c-7dc285cd2c73.jpg',
   [56, 156, 277, 319])],
 'b2f34e6a-047f-4b6f-b974-4758cfcb9786.jpg': [('3ee562a4-e1a6-4e17-8f30-38d2746a5d32.jpg',
   [908, 78, 1085, 255]),
  ('95a16ec5-3968-4f4d-87d7-49969f9141ba.jpg', [523, 145, 658, 425]),
  ('838848e1-ce44-4608-8b9f-7398ad16ec22.jpg', [13, 156, 357, 536])],
 'a0d5345b-2a5d-4457-87c9-7ad69d092d6b.jpg': [('95a16ec5-3968-4f4d-87d7-49969f9141ba.jpg',
   [13, 304, 82, 451])],
 'f467dfe6-ef73-4bb2-a370-72984c1180a9.jpg': [('95a16ec5-3968-4f4d-87d7-49969f9141ba.jpg',
   [71, 90, 126, 210]),
  ('838848e1-ce44-4608-8b9f-7398ad16ec22.jpg', [17, 137, 429, 593]