In [None]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from skimage.io import imread
from skimage.filters import prewitt_h,prewitt_v
import cv2
from sklearn.model_selection import train_test_split
from sklearn.cluster import KMeans
from sklearn.svm import SVC
from sklearn.linear_model import Perceptron
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score

### Read Dataset List

In [None]:
dir_path = './data/'

train, train_labels = [], []
with open(dir_path + 'train.txt', 'r') as f:
  for line in f:
    data = line.split(" ")
    train.append(dir_path + data[0])
    train_labels.append(data[1].replace('\n', ''))


val, val_labels = [], []
with open(dir_path + 'val.txt', 'r') as f:
  for line in f:
    data = line.split(" ")
    val.append(dir_path + data[0])
    val_labels.append(data[1].replace('\n', ''))


test, test_labels = [], []
with open(dir_path + 'test.txt', 'r') as f:
  for line in f:
    data = line.split(" ")
    test.append(dir_path + data[0])
    test_labels.append(data[1].replace('\n', ''))

print(f'訓練資料共{len(train)}筆')
print(f'驗證資料共{len(val)}筆')
print(f'測試資料共{len(test)}筆')

### Feature Extraction (硬體關係跑不動，因此後來沒使用)
- [bovw](https://tigercosmos.xyz/post/2020/06/cv/bag-of-visual-words/)
- [bovw_github](https://gist.github.com/tigercosmos/a5af5359b81b99669ef59e82839aed60)

In [None]:
def get_clusters(paths, cluster_size):
    bag_of_features = []
    # tqdm(images.items()):
    for path in tqdm(paths):   
        # 讀取image，並轉為灰階影像
        img = cv2.imread(path, 0)
        # 利用sift找到圖片的關鍵點
        sift = cv2.SIFT_create()                
        keypoints, descriptors = sift.detectAndCompute(img, None)

        if descriptors is not None:
            for des in descriptors:
                bag_of_features.append(des)

    clusters = KMeans(cluster_size).fit(np.array(bag_of_features).astype('float32'))

    return clusters

# labels共50個
# feature_clusters = get_clusters(train, 50)

[edgeDetection](https://kknews.cc/zh-tw/code/y5a5v3g.html)

In [None]:
def feature_extraction(paths):
    features = []
    for path in tqdm(paths):
        # 讀取image，並轉為灰階影像
        image = imread(path, as_gray=True) 
        # 為image生成邊緣特徵
        edges_prewitt_horizontal = prewitt_h(image) 
        edges_prewitt_vertical = prewitt_v(edges_prewitt_horizontal) 
        # 最大特徵長度   
        max_len = 100000  
         # 轉成一維陣列
        edge_features = edges_prewitt_vertical.flatten()
        if len(edge_features) < max_len:
            pad_features = np.pad(edge_features, (0, max_len-len(edge_features)), 'constant', constant_values=0)
            features.append(pad_features)
        else:
            features.append(edge_features[0: max_len])
            
    return features

### 資料前處理


In [None]:
def data_preprocessing(paths):
    features = []
    for path in tqdm(paths):
        # 讀取image，並轉為灰階影像
        image = cv2.imread(path, 0)
        # 灰階圖片數值介於0~255之間
        image_normalize = image/255
        # 最大特徵長度   
        max_len = 100000  
        # 轉成一維陣列
        image_features = image_normalize.flatten()
        if len(image_features) < max_len:
            pad_features = np.pad(image_features, (0, max_len-len(image_features)), 'constant', constant_values=0)
            features.append(pad_features)
        else:
            features.append(image_features[0: max_len])
            
    return features

train_features = data_preprocessing(train)
val_features = data_preprocessing(val)
test_features = data_preprocessing(test)

### 訓練模型、驗證模型與預測模型

### [Perceptron](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Perceptron.html#sklearn.linear_model.Perceptron)

In [None]:
%%time
# split the data 
# x_train, x_test, y_train, y_test = train_test_split(train_features[0:5000], train_labels[0:5000], test_size=0.2, random_state=0)

# train Perceptron classifier
clf = Perceptron()
clf.fit(train_features, train_labels)

In [None]:
%%time
print('Perceptron Classifier')
# valid Perceptron classifier
y_val = clf.predict(val_features)
val_accuracy = accuracy_score(val_labels, y_val)
print(f'validation_data_accuracy:{val_accuracy}')

# test Perceptron classifier
y_test = clf.predict(test_features)
test_accuracy = accuracy_score(test_labels, y_test)
print(f'test_data_accuracy:{test_accuracy}')

### [SVM](https://scikit-learn.org/stable/modules/svm.html)

In [None]:
%%time
# train SVM classifier
svm = SVC(kernel='linear', random_state=0)
svm.fit(train_features, train_labels)

In [None]:
%%time
print('SVM Classifier')
# valid SVM classifier
y_val = svm.predict(val_features)
val_accuracy = accuracy_score(val_labels, y_val)
print(f'validation_data_accuracy:{val_accuracy}')

# test SVM classifier
y_test = svm.predict(test_features)
test_accuracy = accuracy_score(test_labels, y_test)
print(f'test_data_accuracy:{test_accuracy}')

### [AdaBoost](https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.AdaBoostClassifier.html)

In [None]:
%%time
# train AdaBoost classifier
Ada = AdaBoostClassifier(n_estimators=100, random_state=0)
Ada = Ada.fit(train_features, train_labels)

In [None]:
%%time
print('Adaoost Classifier')
# valid Adaoost classifier
y_val = Ada.predict(val_features)
val_accuracy = accuracy_score(val_labels, y_val)
print(f'validation_data_accuracy:{val_accuracy}')

# test Adaoost classifier
y_test = Ada.predict(test_features)
test_accuracy = accuracy_score(test_labels, y_test)
print(f'test_data_accuracy:{test_accuracy}')