In [None]:
from numpy import load
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd  
import os

import keras
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Sequential, Model 
from keras.layers import *
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping
from keras.utils import plot_model

from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn import metrics

In [None]:
X = load('./data/input_Female_gray20.npy')
Y = load('./data/output_Female_gray20.npy')

In [None]:
#get 3 classes
YawDD = os.listdir("./Female")
classes = []
for item in YawDD:
    if item == ".DS_Store" or item == ".ipynb_checkpoints":
        pass
    else:
        classes.append(item)

In [None]:
seq_len = 20
#convert dimension of X to 1D 
X = X.reshape(X.shape[0] ,seq_len, -1)
features_len = X.shape[2] #shape of X: (number of videos, 10, 4096)

In [None]:
X.shape

In [None]:
#define encoder
inputs = Input(shape=(seq_len,features_len))

encoded_X = SimpleRNN(200, activation='relu')(inputs)

#define reconstruct decoder
decoded_X = RepeatVector(seq_len)(encoded_X)
decoded_X = SimpleRNN(200, activation='relu', return_sequences=True)(decoded_X)
decoded_X = TimeDistributed(Dense(features_len))(decoded_X)

model = Model(inputs=inputs, outputs=decoded_X)
model.compile(optimizer='adam', loss='mae')

#get encoder
Encoder = Model(inputs,encoded_X) #input, output

In [None]:
model.summary()

In [None]:
history = model.fit(X, X, epochs=200, batch_size=8, verbose=1)

In [None]:
plt.plot(history.history['loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['Loss'], loc='best')
plt.show()

In [None]:
x_test_encoded = Encoder.predict(X)
                   
pca = PCA(n_components=2)  # 把維度降至2維
# 進行PCA降維
X_pca = pca.fit_transform(x_test_encoded)
# 生成降維後的dataframe
X_pca_frame = pd.DataFrame(X_pca, columns=['pca_1', 'pca_2'])  
print('PCA1 and PCA2:', pca.explained_variance_ratio_)

In [None]:
#one-hot to integer
Y = np.array([np.where(r==1)[0][0] for r in Y])

In [None]:
plt.figure()
colors = ['navy', 'turquoise', 'darkorange']
lw = 2
for color, i, target_name in zip(colors, [0, 1, 2], np.array(classes)):
    plt.scatter(X_pca[Y == i, 0], X_pca[Y == i, 1], color=color, alpha=.8, lw=lw,
                label=target_name)

plt.legend(loc='best', shadow=False, scatterpoints=1)
plt.title('Ground Truth')
plt.show()

In [None]:
#plot K-Means
d = {}
fig_reduced_data = plt.figure(figsize=(12, 12)) #畫圖之前先設置figure，此函数=設置一塊自定義大小的畫布讓之後的圖形印在這塊畫布上
for k in range(3, 4):
    est = KMeans(n_clusters=k, random_state=111)
    y_pred = est.fit_predict(X_pca) #作用到降維後的數據上

    # X_pca_frame：表示要cluster的樣本數據，一般如（samples，features）的格式。y_pred：即cluster後得到的label，如（samples，）的格式
    calinski_harabaz_score = metrics.calinski_harabasz_score(X_pca_frame, y_pred)  
    d.update({k: calinski_harabaz_score})
    print('calinski_harabaz_score with k={0} is {1}'.format(k, calinski_harabaz_score))  # CH score的数值越大越好
    #生成2D圖，每個樣本點的坐標分别是兩個主成分的值
    ax = plt.subplot(4, 3, k-1) #將設置的畫布大小分成幾個部分，表示4(row)x3(col),k-4表示選擇圖形輸出的區域在第k-1塊，參數必須在“rowXcol”範圍
    ax.scatter(X_pca_frame.pca_1, X_pca_frame.pca_2, c=y_pred)  # pca_1、pca_2為輸入數據，c表示顏色序列

    plt.title('K-means of Female Drivers from YawDD Dataset')
    plt.tight_layout()
    plt.show()

In [None]:
X_pca_frame['class'] = y_pred
X_pca_frame = X_pca_frame.sort_values(by='class')

In [None]:
X_pca_frame.value_counts('class')

In [None]:
#compare original frames to reconstructed frames
decoded_imgs = model.predict(X)
decoded_imgs_int = decoded_imgs.astype(np.int32)#plt沒辦法畫浮點數
n = 5  #how many digits we will display

plt.figure(figsize=(20, 4))
for x in X_pca_frame.index:
    if X_pca_frame['class'][x] == 0:
        for i in range(n):
            # display original
            ax = plt.subplot(2, n, i + 1)
            plt.imshow(X[x][i].reshape(64,64), cmap=plt.get_cmap('gray'))
            ax.get_xaxis().set_visible(False)
            ax.get_yaxis().set_visible(False)

            # display reconstruction
#             ax = plt.subplot(2, n, i + 1 + n)
#             plt.imshow(decoded_imgs_int[0][i].reshape(64,64), cmap=plt.get_cmap('gray'))
#             ax.get_xaxis().set_visible(False)
#             ax.get_yaxis().set_visible(False)

        plt.show()

In [None]:
#compare original frames to reconstructed frames
decoded_imgs = model.predict(X)
decoded_imgs_int = decoded_imgs.astype(np.int32)#plt沒辦法畫浮點數
n = 5  #how many digits we will display

plt.figure(figsize=(20, 4))

for i in range(n):
    # display original
    ax = plt.subplot(2, n, i + 1)
    plt.imshow(X[20][i].reshape(64,64), cmap=plt.get_cmap('gray'))
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # display reconstruction
    ax = plt.subplot(2, n, i + 1 + n)
    plt.imshow(decoded_imgs_int[20][i].reshape(64,64), cmap=plt.get_cmap('gray'))
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    
plt.show()