In [18]:
import numpy as np
import pandas as pd
import os, sys
import matplotlib.pyplot as plt

import tensorflow as tf
from IPython.display import display
from IPython.display import Image as _Imgdis
from PIL import Image
import cv2
from keras.preprocessing.image import load_img
from keras.preprocessing.image import save_img
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import ImageDataGenerator, array_to_img
from tensorflow.image import resize

from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.model_selection import cross_validate
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers.core import Dense,Dropout, Activation
import keras.backend as K
from keras.wrappers.scikit_learn import KerasClassifier
from keras import optimizers
import sys
import warnings
from sklearn.preprocessing import LabelEncoder
from keras.utils.np_utils import *

if not sys.warnoptions:
    warnings.simplefilter("ignore")

In [19]:
# reading data
df_train = pd.read_csv('train.csv')
df_train_DG = pd.read_csv('train_DG.csv')
df_test = pd.read_csv('test.csv')

In [20]:
X_train = df_train.iloc[: , 1:-1]
y_train = df_train.iloc[: , -1:].values.ravel()
print(X_train.shape)
print(y_train.shape)

X_train_DG = df_train_DG.iloc[: , 1:-1]
y_train_DG = df_train_DG.iloc[: , -1:].values.ravel()
print(X_train_DG.shape)
print(y_train_DG.shape)

X_test = df_test.iloc[: , 1:-1]
y_test = df_test.iloc[: , -1:].values.ravel()
print(X_test.shape)
print(y_test.shape)

(251, 65536)
(251,)
(333, 65536)
(333,)
(66, 65536)
(66,)


In [21]:
from sklearn.preprocessing import LabelEncoder
from keras.utils.np_utils import *
label_encoder = LabelEncoder()
y_train = to_categorical(label_encoder.fit_transform(y_train),3)
y_train_DG = to_categorical(label_encoder.fit_transform(y_train_DG),3)
y_test = to_categorical(label_encoder.fit_transform(y_test),3)
print(y_train.shape)
print(y_train_DG.shape)
print(y_test.shape)

(251, 3)
(333, 3)
(66, 3)


In [4]:
def create_model(optimizer):
    model = Sequential()
    model.add(Dense(256,input_dim=X_train.shape[1],kernel_initializer="uniform"))
    model.add(Activation('relu'))
    model.add(Dropout(0.4))
    model.add(Dense(128,kernel_initializer="uniform"))
    model.add(Activation('relu'))
    model.add(Dropout(0.4))
    model.add(Dense(64,kernel_initializer="uniform"))
    model.add(Activation('relu'))
    model.add(Dropout(0.4))
    model.add(Dense(3))
    model.add(Activation('softmax'))
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=["accuracy"])
    return model

In [26]:
def predict(x,y,test_x,test_y):
    model = KerasClassifier(build_fn=create_model, verbose=0)
    callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5)
    X_train, X_val, y_train, y_val = train_test_split(x,y, test_size=0.25, random_state=1) # 0.25 
    #利用grid search調參數
    batch_size = [32, 128]
    epochs = [30]
    opt = ['adam']
    param_grid = dict(optimizer=opt,batch_size=batch_size, epochs=epochs)
    grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1, cv=10)
    grid_result = grid.fit(X_train, y_train,validation_data=(X_val,y_val),callbacks=[callback])
    # summarize results
    print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']
    optimizer_best = grid_result.best_params_['optimizer']
    batch_best = grid_result.best_params_['batch_size']
    epochs_best = grid_result.best_params_['epochs']
    for mean, stdev, param in zip(means, stds, params):
        print("%f (%f) with: %r" % (mean, stdev, param))
    model_best =  Sequential()
    model_best.add(Dense(256,input_dim=x.shape[1],kernel_initializer="uniform"))
    model_best.add(Activation('relu'))
    model_best.add(Dropout(0.4))
    model_best.add(Dense(128,kernel_initializer="uniform"))
    model_best.add(Activation('relu'))
    model_best.add(Dropout(0.4))
    model_best.add(Dense(64,kernel_initializer="uniform"))
    model_best.add(Activation('relu'))
    model_best.add(Dropout(0.4))
    model_best.add(Dense(3))
    model_best.add(Activation('softmax'))
    model_best.compile(optimizer=optimizer_best, loss='categorical_crossentropy', metrics=["accuracy"])
    history = model_best.fit(x, y,batch_size=batch_best,epochs=epochs_best,callbacks=[callback])
    Y_pred = model_best.predict_generator(test_x)
    y_pred = np.argmax(Y_pred, axis=1)
    y_test_argmax = np.argmax(test_y, axis=1)
    accuracy = accuracy_score(y_test_argmax, y_pred)
    print('accuracy:', accuracy)
    precision = precision_score(y_test_argmax, y_pred, average = 'macro')
    print('precision:', precision)
    recall = recall_score(y_test_argmax, y_pred, average = 'macro')
    print('recall:', recall)
    f1 = f1_score(y_test_argmax, y_pred, average = 'macro')
    print('f1:',f1)
    cm = confusion_matrix(y_test_argmax, y_pred)
    print('confusion matrix')
    print(cm)

In [10]:
predict(X_train,y_train,X_test,y_test)

Best: 0.877778 using {'batch_size': 32, 'epochs': 30, 'optimizer': 'rmsprop'}
0.676608 (0.130353) with: {'batch_size': 32, 'epochs': 10, 'optimizer': 'rmsprop'}
0.851462 (0.069610) with: {'batch_size': 32, 'epochs': 10, 'optimizer': 'adam'}
0.751170 (0.127805) with: {'batch_size': 32, 'epochs': 20, 'optimizer': 'rmsprop'}
0.866959 (0.079102) with: {'batch_size': 32, 'epochs': 20, 'optimizer': 'adam'}
0.877778 (0.071033) with: {'batch_size': 32, 'epochs': 30, 'optimizer': 'rmsprop'}
0.868129 (0.097526) with: {'batch_size': 32, 'epochs': 30, 'optimizer': 'adam'}
0.755263 (0.100493) with: {'batch_size': 128, 'epochs': 10, 'optimizer': 'rmsprop'}
0.792982 (0.092027) with: {'batch_size': 128, 'epochs': 10, 'optimizer': 'adam'}
0.745614 (0.166805) with: {'batch_size': 128, 'epochs': 20, 'optimizer': 'rmsprop'}
0.851462 (0.080277) with: {'batch_size': 128, 'epochs': 20, 'optimizer': 'adam'}
0.797661 (0.098141) with: {'batch_size': 128, 'epochs': 30, 'optimizer': 'rmsprop'}
0.857310 (0.107512)

In [11]:
predict(X_train_DG,y_train_DG,X_test,y_test)

Best: 0.831500 using {'batch_size': 32, 'epochs': 20, 'optimizer': 'adam'}
0.699000 (0.096172) with: {'batch_size': 32, 'epochs': 10, 'optimizer': 'rmsprop'}
0.807333 (0.097454) with: {'batch_size': 32, 'epochs': 10, 'optimizer': 'adam'}
0.798833 (0.108254) with: {'batch_size': 32, 'epochs': 20, 'optimizer': 'rmsprop'}
0.831500 (0.092359) with: {'batch_size': 32, 'epochs': 20, 'optimizer': 'adam'}
0.795333 (0.110677) with: {'batch_size': 32, 'epochs': 30, 'optimizer': 'rmsprop'}
0.819000 (0.097411) with: {'batch_size': 32, 'epochs': 30, 'optimizer': 'adam'}
0.659167 (0.098858) with: {'batch_size': 128, 'epochs': 10, 'optimizer': 'rmsprop'}
0.783000 (0.109366) with: {'batch_size': 128, 'epochs': 10, 'optimizer': 'adam'}
0.715333 (0.096243) with: {'batch_size': 128, 'epochs': 20, 'optimizer': 'rmsprop'}
0.823500 (0.085850) with: {'batch_size': 128, 'epochs': 20, 'optimizer': 'adam'}
0.803167 (0.093792) with: {'batch_size': 128, 'epochs': 30, 'optimizer': 'rmsprop'}
0.823500 (0.114587) wi

In [12]:
# result of original data with PCA = 27
pca = PCA(n_components=27)
pca.fit(X_train)
X_train_PCA = pca.transform(X_train)
X_test_PCA = pca.transform(X_test)

In [13]:
def create_model_for_PCA(optimizer):
    model = Sequential()
    model.add(Dense(256,input_dim=X_train_PCA.shape[1],kernel_initializer="uniform"))
    model.add(Activation('relu'))
    model.add(Dropout(0.4))
    model.add(Dense(128,kernel_initializer="uniform"))
    model.add(Activation('relu'))
    model.add(Dropout(0.4))
    model.add(Dense(64,kernel_initializer="uniform"))
    model.add(Activation('relu'))
    model.add(Dropout(0.4))
    model.add(Dense(3))
    model.add(Activation('softmax'))
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=["accuracy"])
    return model
def predict_for_PCA(x,y,test_x,test_y):
    model = KerasClassifier(build_fn=create_model_for_PCA, verbose=0)
    callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5)
    X_train, X_val, y_train, y_val = train_test_split(x,y, test_size=0.25, random_state=1) # 0.25 
    #利用grid search調參數
    batch_size = [32, 128]
    epochs = [10,20,30]
    opt = ['rmsprop','adam']
    param_grid = dict(optimizer=opt,batch_size=batch_size, epochs=epochs)
    grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1, cv=10)
    grid_result = grid.fit(X_train, y_train,validation_data=(X_val,y_val),callbacks=[callback])
    # summarize results
    print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']
    optimizer_best = grid_result.best_params_['optimizer']
    batch_best = grid_result.best_params_['batch_size']
    epochs_best = grid_result.best_params_['epochs']
    for mean, stdev, param in zip(means, stds, params):
        print("%f (%f) with: %r" % (mean, stdev, param))
    model_best =  Sequential()
    model_best.add(Dense(256,input_dim=x.shape[1],kernel_initializer="uniform"))
    model_best.add(Activation('relu'))
    model_best.add(Dropout(0.4))
    model_best.add(Dense(128,kernel_initializer="uniform"))
    model_best.add(Activation('relu'))
    model_best.add(Dropout(0.4))
    model_best.add(Dense(64,kernel_initializer="uniform"))
    model_best.add(Activation('relu'))
    model_best.add(Dropout(0.4))
    model_best.add(Dense(3))
    model_best.add(Activation('softmax'))
    model_best.compile(optimizer=optimizer_best, loss='categorical_crossentropy', metrics=["accuracy"])
    history = model_best.fit(x, y,batch_size=batch_best,epochs=epochs_best,callbacks=[callback])
    Y_pred = model_best.predict_generator(test_x)
    y_pred = np.argmax(Y_pred, axis=1)
    y_test_argmax = np.argmax(test_y, axis=1)
    accuracy = accuracy_score(y_test_argmax, y_pred)
    print('accuracy:', accuracy)
    precision = precision_score(y_test_argmax, y_pred, average = 'macro')
    print('precision:', precision)
    recall = recall_score(y_test_argmax, y_pred, average = 'macro')
    print('recall:', recall)
    f1 = f1_score(y_test_argmax, y_pred, average = 'macro')
    print('f1:',f1)
    cm = confusion_matrix(y_test_argmax, y_pred)
    print('confusion matrix')
    print(cm)

In [14]:
predict_for_PCA(X_train_PCA,y_train,X_test_PCA,y_test)

Best: 0.925731 using {'batch_size': 128, 'epochs': 30, 'optimizer': 'rmsprop'}
0.904678 (0.080949) with: {'batch_size': 32, 'epochs': 10, 'optimizer': 'rmsprop'}
0.920468 (0.063643) with: {'batch_size': 32, 'epochs': 10, 'optimizer': 'adam'}
0.909942 (0.066943) with: {'batch_size': 32, 'epochs': 20, 'optimizer': 'rmsprop'}
0.915205 (0.078947) with: {'batch_size': 32, 'epochs': 20, 'optimizer': 'adam'}
0.915205 (0.071587) with: {'batch_size': 32, 'epochs': 30, 'optimizer': 'rmsprop'}
0.920468 (0.079162) with: {'batch_size': 32, 'epochs': 30, 'optimizer': 'adam'}
0.904678 (0.080949) with: {'batch_size': 128, 'epochs': 10, 'optimizer': 'rmsprop'}
0.899708 (0.092372) with: {'batch_size': 128, 'epochs': 10, 'optimizer': 'adam'}
0.909942 (0.070961) with: {'batch_size': 128, 'epochs': 20, 'optimizer': 'rmsprop'}
0.909942 (0.078380) with: {'batch_size': 128, 'epochs': 20, 'optimizer': 'adam'}
0.925731 (0.067697) with: {'batch_size': 128, 'epochs': 30, 'optimizer': 'rmsprop'}
0.925731 (0.067697

In [50]:
# reading data
df_zca = pd.read_csv('zca_500.csv')
df_test_zca = pd.read_csv('zca_test.csv')

In [63]:
X_train_zca = df_zca.iloc[: , 1:-1]
X_train_zca = X_train_zca/255
y_train_zca = df_zca.iloc[: , -1:].values.ravel()

X_test_zca = df_test_zca.iloc[: , 1:-1]
X_test_zca = X_test_zca/255
y_test_zca = df_test_zca.iloc[: , -1:].values.ravel()

In [64]:
y_train_zca = to_categorical(label_encoder.fit_transform(y_train_zca),3)
y_test_zca = to_categorical(label_encoder.fit_transform(y_test_zca),3)
print(y_train_zca.shape)
print(y_test_zca.shape)

(1455, 3)
(66, 3)


In [91]:
def create_model_zca(optimizer):
    model = Sequential()
    model.add(Dense(64,input_dim=X_train_zca.shape[1],kernel_initializer="uniform"))
    model.add(Activation('relu'))
    model.add(Dropout(0.4))
    model.add(Dense(32,kernel_initializer="uniform"))
    model.add(Activation('relu'))
    model.add(Dropout(0.4))
    model.add(Dense(16,kernel_initializer="uniform"))
    model.add(Activation('relu'))
    model.add(Dropout(0.4))
    model.add(Dense(3))
    model.add(Activation('softmax'))
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=["accuracy"])
    return model
def predict_ZCA(x,y,test_x,test_y):
    model = KerasClassifier(build_fn=create_model_zca, verbose=0)
    callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=10)
    X_train, X_val, y_train, y_val = train_test_split(x,y, test_size=0.25, random_state=1) # 0.25 
    #利用grid search調參數
    batch_size = [32,128]
    opt = ['rmsprop','adam']
    param_grid = dict(optimizer=opt,batch_size=batch_size)
    grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1, cv=10)
    grid_result = grid.fit(X_train, y_train,validation_data=(X_val,y_val),callbacks=[callback])
    # summarize results
    print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']
    optimizer_best = grid_result.best_params_['optimizer']
    batch_best = grid_result.best_params_['batch_size']
    for mean, stdev, param in zip(means, stds, params):
        print("%f (%f) with: %r" % (mean, stdev, param))
    model_best =  Sequential()
    model_best.add(Dense(64,input_dim=x.shape[1],kernel_initializer="uniform"))
    model_best.add(Activation('relu'))
    model_best.add(Dropout(0.4))
    model_best.add(Dense(32,kernel_initializer="uniform"))
    model_best.add(Activation('relu'))
    model_best.add(Dropout(0.4))
    model_best.add(Dense(16,kernel_initializer="uniform"))
    model_best.add(Activation('relu'))
    model_best.add(Dropout(0.4))
    model_best.add(Dense(3))
    model_best.add(Activation('softmax'))
    model_best.compile(optimizer=optimizer_best, loss='categorical_crossentropy', metrics=["accuracy"])
    history = model_best.fit(X_train, y_train,validation_data=(X_val,y_val),batch_size=batch_best,epochs=30,callbacks=[callback])
    Y_pred = model_best.predict_generator(test_x)
    y_pred = np.argmax(Y_pred, axis=1)
    y_test_argmax = np.argmax(test_y, axis=1)
    accuracy = accuracy_score(y_test_argmax, y_pred)
    print('accuracy:', accuracy)
    precision = precision_score(y_test_argmax, y_pred, average = 'macro')
    print('precision:', precision)
    recall = recall_score(y_test_argmax, y_pred, average = 'macro')
    print('recall:', recall)
    f1 = f1_score(y_test_argmax, y_pred, average = 'macro')
    print('f1:',f1)
    cm = confusion_matrix(y_test_argmax, y_pred)
    print('confusion matrix')
    print(cm)

In [92]:
predict_ZCA(X_train_zca,y_train_zca,X_test_zca,y_test_zca)

Best: 0.348324 using {'batch_size': 128, 'optimizer': 'rmsprop'}
0.328123 (0.039587) with: {'batch_size': 32, 'optimizer': 'rmsprop'}
0.325413 (0.066416) with: {'batch_size': 32, 'optimizer': 'adam'}
0.348324 (0.057835) with: {'batch_size': 128, 'optimizer': 'rmsprop'}
0.305229 (0.030753) with: {'batch_size': 128, 'optimizer': 'adam'}
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
accuracy: 0.3484848484848485
precision: 0.26060606060606056
recall: 0.36410256410256414
f1: 0.25009009009009003
confusion matrix
[[ 5  0 21]
 [ 4  0 16]
 [ 2  0 18]]


In [14]:
df_500 = pd.read_csv('train_DG_500.csv')
X_train_500 = df_500.iloc[: , 1:-1]
X_train_500 = X_train_500/255
y_train_500 = df_500.iloc[: , -1:].values.ravel()

In [23]:
label_encoder = LabelEncoder()
y_train_500 = to_categorical(label_encoder.fit_transform(y_train_500),3)

In [27]:
predict(X_train_500,y_train_500,X_test,y_test)

Best: 0.951090 using {'batch_size': 128, 'epochs': 30, 'optimizer': 'adam'}
0.951082 (0.015140) with: {'batch_size': 32, 'epochs': 30, 'optimizer': 'adam'}
0.951090 (0.027647) with: {'batch_size': 128, 'epochs': 30, 'optimizer': 'adam'}
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Instructions for updating:
Please use Model.predict, which supports generators.
accuracy: 0.8333333333333334
precision: 0.8339542483660131
recall: 0.8282051282051283
f1: 0.8255855855855855
confusion matrix
[[23  2  1]
 [ 0 14  6]
 [ 1  1 18]]


In [36]:
X_train, X_val, y_train, y_val = train_test_split(X_train_500,y_train_500, test_size=0.25, random_state=1) # 0.25 
callback = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=5)
model_best =  Sequential()
model_best.add(Dense(256,input_dim=X_train_500.shape[1],kernel_initializer="uniform"))
model_best.add(Activation('relu'))
model_best.add(Dropout(0.5))
model_best.add(Dense(128,kernel_initializer="uniform"))
model_best.add(Activation('relu'))
model_best.add(Dropout(0.5))
model_best.add(Dense(64,kernel_initializer="uniform"))
model_best.add(Activation('relu'))
model_best.add(Dropout(0.5))
model_best.add(Dense(3))
model_best.add(Activation('softmax'))
model_best.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
history = model_best.fit(X_train, y_train,validation_data=(X_val,y_val),batch_size=32,epochs=20,callbacks=[callback])
Y_pred = model_best.predict_generator(X_test)
y_pred = np.argmax(Y_pred, axis=1)
y_test_argmax = np.argmax(y_test, axis=1)
accuracy = accuracy_score(y_test_argmax, y_pred)
print('accuracy:', accuracy)
precision = precision_score(y_test_argmax, y_pred, average = 'macro')
print('precision:', precision)
recall = recall_score(y_test_argmax, y_pred, average = 'macro')
print('recall:', recall)
f1 = f1_score(y_test_argmax, y_pred, average = 'macro')
print('f1:',f1)
cm = confusion_matrix(y_test_argmax, y_pred)
print('confusion matrix')
print(cm)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
accuracy: 0.8333333333333334
precision: 0.8383333333333333
recall: 0.8282051282051283
f1: 0.8265795206971678
confusion matrix
[[23  2  1]
 [ 0 14  6]
 [ 2  0 18]]
