## Preparacion de datos para juego spot it!

Parcialmente basado en: [Medium post](https://towardsdatascience.com/how-i-learned-my-computer-to-play-spot-it-using-opencv-and-deep-learning-ad1f017a3ec3)

### Preparacion de datos

In [None]:
import os
import glob
import cv2
import imutils
import numpy as np
import matplotlib.pyplot as plt

In [None]:
raw_image_directory = '/Users/velascoluis/dl_summer_day/spotit/input_data/raw'
processed_image_directory = '/Users/velascoluis/dl_summer_day/spotit/input_data/processed'
symbols_image_directory = '/Users/velascoluis/dl_summer_day/spotit/input_data/symbols'

In [None]:
for file in os.listdir(raw_image_directory):
    if file.endswith(".jpg"):
        filename = os.fsdecode(file)
        #print(filename)
        image = cv2.imread(raw_image_directory+'/'+filename)
        lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
        l, a, b = cv2.split(lab)
        clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
        cl = clahe.apply(l)
        limg = cv2.merge((cl,a,b))
        final = cv2.cvtColor(limg, cv2.COLOR_LAB2BGR)
        resized = cv2.resize(final, (800, 800))
        cv2.imwrite(processed_image_directory+'/'+filename+'_processed.jpg',resized)

In [None]:
for file in os.listdir(processed_image_directory):
    if file.endswith(".jpg"):
        filename = os.fsdecode(file)
        #print(filename)
        image = cv2.imread(processed_image_directory+'/'+filename)
        gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        thresh = cv2.threshold(gray, 190, 255, cv2.THRESH_BINARY)[1]
        ## find contours
        cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        cnts = imutils.grab_contours(cnts)
        output = image.copy()
        ## draw contours on image
        for c in cnts:
            cv2.drawContours(output, [c], -1, (255, 0, 0), 3)
        ## sort by area, grab the biggest one
        cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[0]
        ## create mask with the biggest contour
        mask = np.zeros(gray.shape,np.uint8)
        mask = cv2.drawContours(mask, [cnts], -1, 255, cv2.FILLED)
        ## card in foreground
        fg_masked = cv2.bitwise_and(image, image, mask=mask)
        ## white background (use inverted mask)
        mask = cv2.bitwise_not(mask)
        bk = np.full(image.shape, 255, dtype=np.uint8)
        bk_masked = cv2.bitwise_and(bk, bk, mask=mask)
        # combine back- and foreground
        final = cv2.bitwise_or(fg_masked, bk_masked)
        #plt.imshow(final)
        gray = cv2.cvtColor(final, cv2.COLOR_RGB2GRAY)
        thresh = cv2.threshold(gray, 195, 255, cv2.THRESH_BINARY)[1]
        thresh = cv2.bitwise_not(thresh)
        cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        cnts = imutils.grab_contours(cnts)
        cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:10]
        # handle each contour
        i = 0
        for c in cnts:
            if cv2.contourArea(c) > 1000:
        # draw mask, keep contour
                mask = np.zeros(gray.shape, np.uint8)
                mask = cv2.drawContours(mask, [c], -1, 255, cv2.FILLED)
                # white background
                fg_masked = cv2.bitwise_and(image, image, mask=mask)
                mask = cv2.bitwise_not(mask)
                bk = np.full(image.shape, 255, dtype=np.uint8)
                bk_masked = cv2.bitwise_and(bk, bk, mask=mask)
                finalcont = cv2.bitwise_or(fg_masked, bk_masked)
                # bounding rectangle around contour
                output = finalcont.copy()
                x,y,w,h = cv2.boundingRect(c)
                # squares io rectangles
                if w < h:
                    x += int((w-h)/2)
                    w = h
                else:
                    y += int((h-w)/2)
                    h = w
                # take out the square with the symbol
                roi = finalcont[y:y+h, x:x+w]
                try:
                    roi = cv2.resize(roi, (400,400))
                    cv2.imwrite(symbols_image_directory+'/'+filename+'_icon'+str(i)+'.jpg', roi)
                except Exception:
                    pass
                i += 1

### Clusterizacion para labeling

In [None]:
x_train = []
for file in os.listdir(symbols_image_directory):
    if file.endswith(".jpg"):
        filename = os.fsdecode(file)
        image = cv2.imread(symbols_image_directory+'/'+filename)
        x_train.append(image)
print('x_train shape:', np.array(x_train).shape)
X_train = np.array(x_train).reshape(len(x_train),-1)
print('X_train shape:', np.array(X_train).shape)

In [None]:
from sklearn.cluster import MiniBatchKMeans
total_clusters = 57
# Initialize the K-Means model
kmeans = MiniBatchKMeans(n_clusters = total_clusters)
# Fitting the model to training set
kmeans.fit(np.array(X_train))

In [None]:
i = 0
for file in os.listdir(symbols_image_directory):
    if file.endswith(".jpg"):
        filename = os.fsdecode(file)
        image = cv2.imread(symbols_image_directory+'/'+filename)
        print(filename)
        cluster = kmeans.labels_[i]
        os.rename(symbols_image_directory+'/'+filename,symbols_image_directory+'/'+str(cluster)+'_'+filename)
        i += 1