# 顔画像の識別

# 顔画像データセットの準備

In [None]:
# load modules

import numpy as np

import skimage.data

import matplotlib
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
%matplotlib inline
plt.gray();

import sklearn
from sklearn.datasets import fetch_olivetti_faces
from sklearn.model_selection import StratifiedShuffleSplit

from sklearn.svm import SVC as SVM
from sklearn.neighbors import KNeighborsClassifier as kNN
from sklearn.ensemble import AdaBoostClassifier as AdaBoost
from sklearn.ensemble import RandomForestClassifier as RandomForest


from ipywidgets import interact, interactive, fixed, RadioButtons
import ipywidgets as widgets
from IPython.display import display

## 顔画像データセットのダウンロード

ここではsklearnのデータセットとして準備されているOlivetti facesデータセットを用いる．
詳しくは[sklearnのマニュアル](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.fetch_olivetti_faces.html)を参照．

このデータセットには400枚の顔画像があり，それぞれが4096次元のベクトルである（64x64画像を表す）．

In [None]:
# load the face dataset

dataset = fetch_olivetti_faces(shuffle=False)
faces = dataset.data

In [None]:
# faces has images as its row vectors

nsamples, dim = faces.shape
print("there are", nsamples, "samples of dimension", dim)

In [None]:
# see the inside of faces

faces

In [None]:
# show 0-th image with colorbar (pixel values range from 0 to 1)

# reshape(64,64) converts a vector into a 64x64 image 
imshow(faces[0].reshape(64,64), vmin=0, vmax=1) # set vmin=0 and vmax=1 to display value 0 to be black, and 1 white (otherwise min value is black and max value is white)

plt.colorbar() # show colorbar
plt.axis('off') # turn off border ticks
plt.title("0th image"); # set title

In [None]:
# show all 400 images. takes time, wait for a while....

plt.figure(figsize=(20, 20))
for i, p in enumerate(faces):
    plt.subplot(20, 20, i + 1)
    
    plt.imshow(faces[i].reshape(64,64), vmin=0, vmax=1)
    
    plt.axis('off')

plt.show()

このデータセットにはラベルが付いている．

In [None]:
dataset.target

これを多クラス識別問題として扱う．

In [None]:
X = faces
y = dataset.target

In [None]:
ss = StratifiedShuffleSplit(n_splits=1,      # 分割を1個生成
                            train_size=0.5,  # 学習は半分
                            test_size=0.5)   # テストも半分

In [None]:
train_index, test_index = next(ss.split(X, y))

X_train, X_test = X[train_index], X[test_index] # 学習データ，テストデータ
y_train, y_test = y[train_index], y[test_index] # 学習データのラベル，テストデータのラベル

では各識別器を適用してみる．

In [None]:
methods = {'SVM': SVM(kernel='linear'),
           'kNN': kNN(n_neighbors=5),
           'AdaBoost': AdaBoost(n_estimators=15),
           'RandomForest': RandomForest(n_estimators=50)}
for m, clf in methods.items():
    clf.fit(X_train, y_train)
    print('method {0}: test accuracy {1}%'.format(m, clf.score(X_test, y_test) * 100))

In [None]:
@interact(sample=(0, len(y)-1, 1),
          method=RadioButtons(options=list(methods.keys()))
         )
def g(sample=0, method='kNN'):
    
    imshow(X[sample].reshape(64,64), vmin=0, vmax=1)
    clf = methods[method]
    y_pred = clf.predict(X[sample, np.newaxis])[0]
    print(clf)

    plt.axis('off')
    plt.title('class: true {0} predict {1}\n{2} sample'.format(y[sample], y_pred, 
                                                               'test' if sample in test_index else 'training'))