In [1]:
import numpy as np
import math
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import cv2
from sklearn.linear_model import Perceptron
from sklearn.metrics import accuracy_score
from sklearn.decomposition import PCA

RANDOM_STATE = 101

In [2]:
dir_path = '../data/'

train, train_labels = [], []
with open(dir_path + 'train.txt', 'r') as f:
  for line in f:
    data = line.split(" ")
    train.append(dir_path + data[0])
    train_labels.append(data[1].replace('\n', ''))


val, val_labels = [], []
with open(dir_path + 'val.txt', 'r') as f:
  for line in f:
    data = line.split(" ")
    val.append(dir_path + data[0])
    val_labels.append(data[1].replace('\n', ''))


test, test_labels = [], []
with open(dir_path + 'test.txt', 'r') as f:
  for line in f:
    data = line.split(" ")
    test.append(dir_path + data[0])
    test_labels.append(data[1].replace('\n', ''))

print(f'訓練資料共{len(train)}筆')
print(f'驗證資料共{len(val)}筆')
print(f'測試資料共{len(test)}筆')

訓練資料共63325筆
驗證資料共450筆
測試資料共450筆


### 特徵擷取
- [Color Histograms彩色直方图](https://blog.csdn.net/cliukai/article/details/101379638)

In [3]:
# PCA
# def data_preprocess(paths):
#     features = []
#     max_len = 256 * 5
#     for path in tqdm(paths):
#         # 讀取image，並轉為灰階影像
#         image = cv2.imread(path, 0)
#         img = cv2.resize(img, (256, 256))
#         # 灰階圖片數值介於0~255之間
#         image_normalize = image/255
#         # 使用pca降維，保留5個components
#         pca = PCA(n_components=5)
#         pca.fit(image_normalize)
#         image_pca = pca.transform(image_normalize)
#         image_fla = image_pca.flatten()
#         # 補0到最大長度
#         if len(image_fla) < max_len:
#             pad_feature = np.pad(image_fla, (0, max_len-len(image_fla)), 'constant', constant_values=0)
#             features.append(pad_feature.flatten())
#         else:
#             features.append(image_fla)

#     return features

def feature_extraction(paths):
    color_features = []
    for path in tqdm(paths):
        img = cv2.imread(path)
        img = cv2.resize(img, (256, 256))
        
        # color histograms彩色直方图
        # 0:blue 1:green 2:red
        color_channels = cv2.split(img)
        bg_features = cv2.calcHist([color_channels[0], color_channels[1]], [0, 1], None, [16, 16], [0, 256, 0, 256]).flatten()
        br_features = cv2.calcHist([color_channels[0], color_channels[2]], [0, 1], None, [16, 16], [0, 256, 0, 256]).flatten()
        gr_features = cv2.calcHist([color_channels[1], color_channels[2]], [0, 1], None, [16, 16], [0, 256, 0, 256]).flatten()
        # 標準化(MaxAbs)，雙色圖片 256*256
        total_features = np.concatenate([bg_features, br_features, gr_features]) / 65536
        color_features.append(total_features)
    return color_features

In [4]:
%%time
train_features = feature_extraction(train)
val_features = feature_extraction(val)
test_features = feature_extraction(test)

  0%|          | 0/63325 [00:00<?, ?it/s]

  0%|          | 0/450 [00:00<?, ?it/s]

  0%|          | 0/450 [00:00<?, ?it/s]

CPU times: user 3min 14s, sys: 10.2 s, total: 3min 24s
Wall time: 18min 47s


### 評估指標

In [5]:
def top1_acc(labels, pro):
    correct = []
    for i, p in enumerate(pro):
        pred_y = np.argsort(p)[-1]
        if int(labels[i]) == pred_y:
            correct.append(1)
        else:
            correct.append(0)
    
    return round(sum(correct) / len(correct), 4)

def top5_acc(labels, pro):
    correct = []
    for i, p in enumerate(pro):
        top5_y = np.argsort(p)[-5:]
        if int(labels[i]) in list(top5_y):
            correct.append(1)
        else:
            correct.append(0)
    
    return round(sum(correct) / len(correct), 4)

### [Single-Layer Perceptron](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Perceptron.html#sklearn.linear_model.Perceptron)

In [6]:
%%time
# train Perceptron classifier
print('Perceptron Classifier')
clf = Perceptron(random_state=RANDOM_STATE)
clf.fit(train_features, train_labels)

# valid Perceptron classifier
val_pro = clf.decision_function(val_features)
val_top1_accuracy = top1_acc(val_labels, val_pro)
print(f'validation_data_top1_acc: {val_top1_accuracy}')
val_top5_accuracy = top5_acc(val_labels, val_pro)
print(f'validation_data_top5_acc: {val_top5_accuracy}')


# test Perceptron classifier
test_pro = clf.decision_function(test_features)
test_top1_accuracy = top1_acc(test_labels, test_pro)
print(f'test_data_top1_acc: {test_top1_accuracy}')
test_top5_accuracy = top5_acc(test_labels, test_pro)
print(f'test_data_top5_acc: {test_top5_accuracy}')

Perceptron Classifier
validation_data_top1_acc: 0.0133
validation_data_top5_acc: 0.0911
test_data_top1_acc: 0.0222
test_data_top5_acc: 0.1133
CPU times: user 26.5 s, sys: 1.3 s, total: 27.8 s
Wall time: 1min 22s
