In [None]:
import numpy as np
import cv2
import glob
import os
from keras.utils import np_utils
from sklearn.model_selection import train_test_split

In [None]:
class processing_data():
  def __init__(self, image):
    self.image = image

  def resize_image(self, size = (64, 64)):
    self.image = cv2.resize(src = self.image, dsize = size)

  def extract_color_histogram(self):
    img_yuv = cv2.cvtColor(self.image, cv2.COLOR_BGR2YUV)
    img_yuv[:,:,0] = cv2.equalizeHist(img_yuv[:,:,0])

    # convert the YUV image back to BGR format
    img_output = cv2.cvtColor(img_yuv, cv2.COLOR_YUV2BGR)
    self.image = img_output

  def image_to_feature_vector(self):
    self.resize_image()
    self.extract_color_histogram()
    self.image = np.array(self.image)
    return self.image.flatten()

In [None]:
def load_img_from_folder(path_folder):
  images = []
  for filename in os.listdir(path_folder):
    img = cv2.imread(os.path.join(path_folder, filename))
    if img is not None:
      model = processing_data(img)
      img = model.image_to_feature_vector()
      images.append(img)
  return images

In [None]:
train_path = '/content/drive/MyDrive/Colab Notebooks/content/Train'
data_labels = os.listdir(train_path)
print(class_names)

['00035', '00011', '00033', '00034', '00007', '00038', '00032', '00029', '00009', '00036', '00001', '00005', '00003', '00006']


In [None]:
X_train = []
y_train = []
labels = 0

for namefolder in class_names:
  img_dir = train_path + '/' + namefolder
  print(img_dir)
  images = load_img_from_folder(img_dir)
  print(len(images))
  X_train.extend(images)
  y_train.extend([labels] * len(images))
  labels = labels + 1

/content/drive/MyDrive/Colab Notebooks/content/Train/00035
739
/content/drive/MyDrive/Colab Notebooks/content/Train/00011
829
/content/drive/MyDrive/Colab Notebooks/content/Train/00033
409
/content/drive/MyDrive/Colab Notebooks/content/Train/00034
229
/content/drive/MyDrive/Colab Notebooks/content/Train/00007
889
/content/drive/MyDrive/Colab Notebooks/content/Train/00038
1309
/content/drive/MyDrive/Colab Notebooks/content/Train/00032
109
/content/drive/MyDrive/Colab Notebooks/content/Train/00029
109
/content/drive/MyDrive/Colab Notebooks/content/Train/00009
919
/content/drive/MyDrive/Colab Notebooks/content/Train/00036
199
/content/drive/MyDrive/Colab Notebooks/content/Train/00001
1429
/content/drive/MyDrive/Colab Notebooks/content/Train/00005
1189
/content/drive/MyDrive/Colab Notebooks/content/Train/00003
889
/content/drive/MyDrive/Colab Notebooks/content/Train/00006
229


In [None]:
X_train = np.array(X_train)
y_train = np.array(y_train)
print("X_train shape: ", X_train.shape)
print(X_train)
print("y_train shape: ", y_train.shape)
print(y_train)

X_train shape:  (9476, 12288)
[[ 29  44  61 ...  50  48  67]
 [227 226 228 ...  49  50  48]
 [ 68  63  64 ... 106 101  95]
 ...
 [171 174 178 ...  44  46  46]
 [101  99 104 ...  40  38  38]
 [ 17  21  22 ...  61  63  70]]
y_train shape:  (9476,)
[ 0  0  0 ... 13 13 13]


In [None]:
# load test
link_test = '/content/drive/MyDrive/Colab Notebooks/public_save'
X_test = load_img_from_folder(link_test)
X_test = np.array(X_test)
print("X_test shape: ", X_test.shape)
print(X_test)

X_test shape:  (202, 12288)
[[134 129 144 ...  81  75  92]
 [ 71  72  76 ...  12  14  15]
 [ 72  87 110 ...  73  89 102]
 ...
 [ 39  44  43 ...  71  71  78]
 [151 151 158 ...  60  67  59]
 [ 45  49  58 ...  49  51  51]]


In [None]:
class KNearestNeighbors():
  def __init__(self, n, labels):
    self.n = n
    self.labels = labels
  
  def fit(self, X, y):
    if(X.shape[0] != y.shape[0]):
      raise ValueError('Failed')
    self.X = X
    self.y = y

  
  def get_neighbors(self, X_test):
    dists = list()
    n = list()
    for i in range(len(self.X)):
      dist = Eudists(self.X[i], X_test)
      dists.append((y_train[i], dist))
    dists.sort(key = lambda tup : tup[1])
    for i in range(self.n):
      n.append(dists[i][0])
    return np.array(n)

  def predict(self, X_test):
    predictions = np.zeros(self.n)
    n = self.get_neighbors(X_test)
    counts = np.bincount(n)
    label = np.argmax(counts)
    return self.labels[label]

In [None]:
def Eudists(image1, image2):
    dists = image1 - image2
    dists = dists ** 2
    dist = dists.sum()
    return dist ** 0.5

In [None]:
model = KNearestNeighbors(n = 1, labels = data_labels)
model.fit(X_train, y_train)

In [None]:
y_pred = []
for test in X_test:
  y_pred.append(model.predict(test))

In [None]:
print(y_pred)

['00036', '00006', '00001', '00029', '00006', '00029', '00034', '00035', '00035', '00035', '00035', '00032', '00007', '00005', '00001', '00032', '00005', '00006', '00035', '00035', '00035', '00001', '00001', '00003', '00003', '00003', '00003', '00029', '00007', '00001', '00006', '00006', '00006', '00005', '00006', '00036', '00011', '00006', '00009', '00005', '00011', '00038', '00035', '00034', '00005', '00032', '00038', '00009', '00034', '00038', '00003', '00011', '00033', '00036', '00006', '00009', '00032', '00011', '00034', '00029', '00038', '00033', '00035', '00009', '00038', '00007', '00011', '00035', '00038', '00001', '00011', '00007', '00003', '00032', '00003', '00033', '00001', '00011', '00038', '00011', '00007', '00003', '00036', '00009', '00036', '00038', '00001', '00029', '00038', '00001', '00003', '00035', '00003', '00005', '00011', '00032', '00032', '00005', '00001', '00007', '00036', '00038', '00034', '00038', '00005', '00036', '00007', '00005', '00033', '00011', '00034', 

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
res = pd.DataFrame({'Id': image_names, 'Category': y_pred})

In [None]:
res.to_csv('./KNNClassification.csv', index = False)
print(len(y_pred), len(image_names))
print(res)

202 202
                                                    Id Category
0    6669f7b30dd90ba177243365413e01b253affe741888f4...    00036
1    9abc917a77bed75e97909a27cbb5ff36b030b0b0b80c55...    00006
2    71d9853d188a668d5f2817d3be98db3854ef0d90307264...    00001
3    16cfbe0d023f157063ab1b62ada0c76a63e0a9daf681ce...    00029
4    e9a116735bcdbcdd503853b6351bd6b7942de214aa3ad1...    00006
..                                                 ...      ...
197  3f726940c7b19d6723ac1e30fa164c5748cc1c1a370c26...    00033
198  4c952f0e21cd70c54282af8dab6ea98e44c6fa8369b764...    00003
199  7263b0a0e2e89197ef9a4f7b6945c9ce3e73c884c41161...    00003
200  0daf40fdb0e182dc9ba0b3dfb9b9c09a4102709647b338...    00009
201  4a50facbf50745591ffaef484de1259d97d15f9dcbf8a0...    00005

[202 rows x 2 columns]
