In [None]:
import numpy as np
import matplotlib.pyplot as plt
import glob 
import cv2


files = glob.glob("dataset/*/*.jpg")

### Create dictory of names to id correspondance
# ! remember to change manually the names of Farfetche'd and MrMime for formating reasons
name_id = {}
id_name = {}
for x in open("pokemon_id_name").readlines():
    y = x.split("\t")
    if len(y) > 2 and str.isdigit(y[2]) :
        name_id[x.split("\t")[0]] = int(x.split("\t")[2])
        id_name[int(x.split("\t")[2])] = x.split("\t")[0]
        


In [None]:
def read_img_src(src, color=True):
    if color:
        return cv2.imread(src)
    return cv2.imread(src, 0)

In [None]:
def showMultiImage(imgs, descs=None, figsize=None, cmap=None, suptitle=""):
    if descs is None:
        descs = [None] * len(imgs)
    if isinstance(figsize, (int, float)):
        figsize = (figsize, figsize)
    plt.figure(figsize=figsize)
    for i, img in enumerate(imgs):
        plt.subplot(1, len(imgs), i+1)
        rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        plt.imshow(rgb, cmap=cmap)
        plt.title(descs[i])
        plt.axis('off')
    plt.show()


In [None]:
def resize(img, size=100, interpolation=cv2.INTER_LINEAR):
    return cv2.resize(img,(size, size))

In [None]:
# DATA Augmentation 
def rotateImage(img, angle):
    print(img.shape)
    rows,cols,color = img.shape
    M = cv2.getRotationMatrix2D(((cols-1)/2.0,(rows-1)/2.0),angle,1)
    return cv2.warpAffine(img,M,(cols,rows))

def save_with_rotate(src):
    img = read_img_src(src)
    angles = [5, 10, 15, 20]
    for i in angles:
        prefix = "no_background/"
        directory = prefix+src.split("/")[1]
        if not os.path.exists(directory):
            os.makedirs(directory)
        cv2.imwrite(prefix+src[8:], rotateImage(img=img, angle=i))


In [None]:
def similarity(h1, h2):
    s = 0
    for c in range(3):
        s += cv2.compareHist(h1[c], h2[c], cv2.HISTCMP_BHATTACHARYYA)
    return s

In [None]:
def kmeans_seg(img):
    Z = img.reshape((-1,3))
    # convert to np.float32
    Z = np.float32(Z)
    # define criteria, number of clusters(K) and apply kmeans()  
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
    K = 4
    ret,label,center=cv2.kmeans(Z,K,None,criteria,10,cv2.KMEANS_RANDOM_CENTERS) 
    # Now convert back into uint8, and make original image
    center = np.uint8(center)
    res = center[label.flatten()]
    res2 = res.reshape((img.shape))
    return res2

def remove_background(img, seg_img):
    res = img.copy()
    # Take the color of the 4 corners and set all of thoses colors to zero
    res[seg_img == seg_img[0][0]] = 0
    res[seg_img == seg_img[-1][-1]] = 0
    res[seg_img == seg_img[0][-1]] = 0
    res[seg_img == seg_img[-1][0]] = 0
    return res

def histogramme(img, mask = None):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    h = []
    color = ('b','g','r')
    for i,col in enumerate(color):
        histr = cv2.calcHist([img],[i],mask,[256],[0,256])
        #plt.plot(histr,color = col)
        #plt.xlim([0,256])
        h.append(histr)
    #plt.show()
    return h

In [None]:
train_imgs = []
train_files = glob.glob("dataset_train/pokemon-a/*.png")
for f in train_files:
    num = f.split("/")[-1].split(".")[0]
    if str.isdigit(num) and int(num) < 152:
        train_imgs.append(f)

In [None]:
base_hist = [] # fix length 151, idx = pokemon_id - 1
for src in train_imgs:
    base = read_img_src(src)
    base_hist.append(histogramme(base))
len(base_hist)

In [None]:
def closest_img(src):
    img = resize(read_img_src(src))
    # remove background
    img = remove_background(img, kmeans_seg(img))
    # create hist
    hist = histogramme(img)
    # compare hist to all the base histogrammes
    s = []
    for i in range(len(base_hist)):
        s.append(similarity(base_hist[i], hist))
    # choose the highest sim
    print("predicted", id_name[s.index(max(s)) + 1], "actual", f.split('/')[1])
    if id_name[s.index(max(s)) + 1] ==  f.split('/')[1]:
        score += 1

score = 0
for f in files:
    # read image
    img = resize(read_img_src(f))
    base_img = resize(read_img_src("dataset_train/pokemon-a/" +  str(name_id[f.split('/')[1]]) + ".png"))
    # showMultiImage([img, base_img])
    # remove background
    img = remove_background(img, kmeans_seg(img))
    # create hist
    hist = histogramme(img)
    # compare hist to all the base histogrammes
    s = []
    for i in range(len(base_hist)):
        s.append(similarity(base_hist[i], hist))
    # choose the highest sim
    print("predicted", id_name[s.index(max(s)) + 1], "actual", f.split('/')[1])
    if id_name[s.index(max(s)) + 1] ==  f.split('/')[1]:
        score += 1

In [None]:
# ML approach 

from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification

X = []
Y = []
for f in files: 
    Y.append(name_id[f.split('/')[1]])
    X.append(resize(read_img_src(f, color=False), 50))
    
# flatten each line of X
for i in range(len(X)):
    X[i] = X[i].flatten()
reudced_X = PCA().fit(X).transform(X)

In [None]:
X = np.array(X)
X_train, X_test, y_train, y_test = train_test_split(X, np.array(Y), test_size=0.4, random_state=0)


clf = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=0)
clf.fit(X, Y)

y_pred = clf.predict(X_test)

print(sum(y_pred == y_test))