In [0]:
from google.colab import drive
drive.mount('/content/drive')

!ls -lha kaggle.json

!pip uninstall -y kaggle
!pip install --upgrade pip
!pip install kaggle==1.5.6

# 캐글연동을 위한 토큰 입력
! mkdir -p ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json


# 버전이 1.5.6 이 아니면, 진행할 수 없다
! kaggle -v

! yes | pip3 uninstall opencv-python
! yes | pip3 uninstall opencv-contrib-python
! yes | pip3 install opencv-python==3.4.2.16
! yes | pip3 install opencv-contrib-python==3.4.2.16

In [0]:
! yes | pip3 uninstall opencv-python
! yes | pip3 uninstall opencv-contrib-python
! yes | pip3 install opencv-python==3.4.2.16
! yes | pip3 install opencv-contrib-python==3.4.2.16

In [0]:
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook as tqdm
import pandas as pd
import cv2
import numpy as np
from scipy.cluster.vq import vq
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
import zipfile
import os
import pickle
import torch
import numpy as np
import random
import sys
from sklearn.svm import LinearSVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

In [0]:
## 코드북 로드
############################################

n_cluster = 2048

codebook = pickle.load(open("/content/drive/My Drive/패턴인식/2048codebook.npy", 'rb'))

############################################


def dense_sift_each(imgGray):
   sift = cv2.xfeatures2d_SIFT.create()

   keypoints = []
   w, h = np.array(imgGray).shape
   #print(imgGray.shape)
   for i in range(0, h, 8):
       for j in range(0, w, 8):
           keypoints.append(cv2.KeyPoint(i, j, 8))
   kp, des = sift.compute(imgGray, keypoints)

   return kp, des


def weak_sift_each(imgGray):
  sift = cv2.xfeatures2d_SIFT.create()
  kp, des = sift.detectAndCompute(imgGray, None)
  return kp, des


def HistogramIntersection(X, Y):
    x = X.shape[0]
    y = Y.shape[0]

    result = np.zeros((x,y))
    for i in range(x):
        for j in range(y):
            temp = np.sum(np.minimum(X[i], Y[j]))
            result[i][j] = temp
    return result


def HI_SVM(x_train, x_test, y_train):

  gramMatrix = HistogramIntersection(x_train, x_train)
  clf = SVC(kernel='precomputed')
  clf.fit(gramMatrix, y_train)

  predictMatrix = HistogramIntersection(x_test, x_train)
  SVMResults = clf.predict(predictMatrix)

  return SVMResults
 

def LSVM(x_train, x_test, y_train):

  param_range = [ 0.0001, 0.001, 0.01, 0.1, 1 ,10 ,100 ]
  param_grid = {'C': param_range}
  clf = LinearSVC(class_weight='balanced')
  grid = GridSearchCV(clf, param_grid, verbose = 3)
  grid.fit(x_train, y_train)

  print(grid.best_params_)

  SVMResults = grid.predict(x_test)

  return SVMResults

def histogram(des, codebook):

  codeword, _ = vq(des, codebook)
  his, _ = np.histogram(codeword, bins=list(range(n_cluster+1)))

  return his


def build(imgs, codebook):
  data = []

  for img in tqdm(imgs):
    _, des = dense_sift_each(img)
    des = np.asarray(des)
    des = np.resize(des, (32, 32, 128))
    des = cutted(des, 2)
    des = np.asarray(des)
    l, _, _, _ = des.shape

    hist = []

    for i in range(0,l):
      tmp = np.resize(des[i], (64 ,128))
      his = histogram(tmp, codebook)
      hist.append(his)

    data.append(hist)

  return data


def cutted(src, level=2):
  h_end, w_end, _ = src.shape
  cutted_img = []
  w_start = 0
  h_start = 0
  w = w_end // (2**level)
  h = h_end // (2**level)
  #print("step size :",w, h)
  if level != 0:
    for j in range(2 ** level):
      for i in range(2 ** level):
        #print(h_start, w_start)
        img = src[h_start:h_start + h, w_start:w_start + w]
        #print(img)
        cutted_img.append(img)
        w_start += h
        if (w_start+h == w_end+w):
          #print ("gogo")
          w_start = 0
          h_start += h
  else: cutted_img = src
  #print("Number of cut imgs :",len(cutted_img))
  #plt.imshow(cutted_img[0])
  return cutted_img


def pyramid(his, level):
  his1 = np.array([[his[0] + his[1] + his[4] + his[5]], [his[2] + his[3] + his[6] + his[7]], [his[8] + his[9] + his[12] + his[13]], [his[10] + his[11] + his[14] + his[15]]])
  his0 = his1[0] + his1[1] + his1[2] + his1[3]
  his0 = np.ravel(his0, order='C')
  his1 = np.ravel(his1, order='C')
  his2 = his
  his2 = np.ravel(his2, order='C')

  if level == 0:
    return np.asarray(his0)

  py = np.hstack((his0 * 0.25, his1 * 0.25))

  if level == 1:
    return py

  if level == 2:
    return np.hstack((py, his2 * 0.5))

## 데이터 로드 및 가공

In [0]:
! kaggle competitions download -c 2019-ml-finalproject

zip_ref = zipfile.ZipFile("/content/2019-ml-finalproject.zip", 'r')
zip_ref.extractall("/content")
zip_ref.close()

df_data = pd.read_csv('/content/Label2Names.csv', header=None)

DATA_ROOT_TRAIN = "./train"

train_imgs = []
y_train = []
i = 0

for cls in tqdm(os.listdir(DATA_ROOT_TRAIN)):
  img_list = os.listdir(DATA_ROOT_TRAIN + '/' + cls)
  img_list.sort()

  if cls == 'BACKGROUND_Google' :
    label = 102
    continue
  else :
    label = (df_data.index[df_data[1]==cls]+1).tolist()[0]

  for img in img_list :
    image = cv2.imread(DATA_ROOT_TRAIN + '/' + cls + '/' + img)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.resize(gray, (256, 256))
    train_imgs.append(gray)
    y_train.append(label)


DATA_ROOT_TEST = "./testAll_v2"

test_imgs = []
test_des = list()

img_list = os.listdir(DATA_ROOT_TEST)
img_list.sort()

y_test = []

for cls in tqdm(img_list):
  image = cv2.imread(DATA_ROOT_TEST + '/' + cls)
  gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
  gray = cv2.resize(gray, (256, 256))
  test_imgs.append(gray)
  y_test.append(cls)

## GPU Kmeans

In [0]:
# ! pip install kmc2

# train_des = list()

# for img in tqdm(train_imgs):
#   _, des = dense_sift_PCA(img)
#   train_des.append(des)

# import kmc2

# np.array(train_des).shape
# train_codebook = np.array(train_des).reshape(-1,96)
# print(train_codebook.shape)

# codebooksize = 1024

# seeding = kmc2.kmc2(train_codebook, codebooksize) 
# from sklearn.cluster import MiniBatchKMeans
# Kmeans = MiniBatchKMeans(codebooksize, init=seeding).fit(train_codebook)
# codebook = Kmeans.cluster_centers_


## Pyramid

In [0]:
his = build(train_imgs, codebook) 

x_data = []
for data in his:
  py = pyramid(data, 2)
  x_data.append(py)

his_test = build(test_imgs, codebook)

x_test = []
for data in his_test:
  py = pyramid(data, 2)
  x_test.append(py)

x_train = np.asarray(x_data, dtype=np.float32)
x_test = np.asarray(x_test, dtype=np.float32)


## 히스토그램 스케일러

In [0]:
scaler = StandardScaler().fit(x_train)

x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)

## 성능 평가

In [0]:
x_train_t, x_test_t, y_train_t, y_test_t = train_test_split(x_train, y_train)

result = HI_SVM(x_train_t, x_test_t, y_train_t)
#result = LSVM(x_train_t, x_test_t, y_train_t)

print(classification_report(y_test_t, result))

## 제출용 코드

In [0]:
result = HI_SVM(x_train, x_test, y_train)
#result = LSVM(x_train, x_test, y_train)

result = result.reshape(-1, 1)
result_img_list = np.array(y_test).reshape(-1,1)
total_result = np.hstack([result_img_list, result])
print(total_result)

In [0]:
df = pd.DataFrame(total_result, columns=["id", "Category"])
print(df)
df.to_csv('results-tjkim-v3.csv', index=False, header=True)

In [0]:
! kaggle competitions submit -c 2019-ml-finalproject -f results-tjkim-v3.csv -m "Final_Term_Project"