In [None]:
import numpy as np
import keras
import pandas as pd
import os, sys
import matplotlib.pyplot as plt

import tensorflow as tf
from keras.models import Sequential  #用來啟動 NN
from keras.layers import Conv2D,Conv1D,Reshape # Convolution Operation
from keras.layers import MaxPooling2D,MaxPooling1D # Pooling
from keras.layers import Flatten
from keras.layers import Dense,Dropout

from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.model_selection import cross_validate
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers.core import Dense,Dropout, Activation
import keras.backend as K
from keras.wrappers.scikit_learn import KerasClassifier
from keras import optimizers
import sys
import warnings

if not sys.warnoptions:
    warnings.simplefilter("ignore")

In [None]:
# reading data
df_train = pd.read_csv('train.csv')
df_train_DG = pd.read_csv('train_DG.csv')
df_test = pd.read_csv('test.csv')

### reshape data

In [None]:
X_train = df_train.iloc[: , 1:-1]
y_train = df_train.iloc[: , -1:].values.ravel()
print(X_train.shape)
print(y_train.shape)

X_train_DG = df_train_DG.iloc[: , 1:-1]
y_train_DG = df_train_DG.iloc[: , -1:].values.ravel()
print(X_train_DG.shape)
print(y_train_DG.shape)

X_test = df_test.iloc[: , 1:-1]
y_test = df_test.iloc[: , -1:].values.ravel()

print(X_test.shape)
print(y_test.shape)

pca = PCA(n_components=27)
pca.fit(X_train)
X_train_PCA = pca.transform(X_train)
X_test_PCA = pca.transform(X_test)

X_train = np.array(X_train).reshape(X_train.shape[0],256,256,1)
X_train_DG = np.array(X_train_DG).reshape(X_train_DG.shape[0],256,256,1)
X_test = np.array(X_test).reshape(X_test.shape[0],256,256,1)
# X_train_PCA = np.array(X_train_PCA).reshape(X_train_PCA.shape[0],X_train_PCA.shape[1],1)

(251, 65536)
(251,)
(333, 65536)
(333,)
(66, 65536)
(66,)


In [None]:
from sklearn.preprocessing import LabelEncoder
from keras.utils.np_utils import *
label_encoder = LabelEncoder()
y_train = to_categorical(label_encoder.fit_transform(y_train),3)
y_train_DG = to_categorical(label_encoder.fit_transform(y_train_DG),3)
y_test = to_categorical(label_encoder.fit_transform(y_test),3)
print(y_train.shape)
print(y_train_DG.shape)
print(y_test.shape)

(251, 3)
(333, 3)
(66, 3)


### Create CNN 2D model

In [None]:
def create_model(optimizer):
    model = Sequential()  
    model.add(Conv2D(64, 3, 3, input_shape = (X_train.shape[1],X_train.shape[2],1), activation = 'relu'))
    model.add(MaxPooling2D(pool_size = (2, 2)))
    # Second convolutional layer
    model.add(Conv2D(32, 3, 3, activation = 'relu'))
    model.add(MaxPooling2D(pool_size = (2, 2)))

    model.add(Flatten())
    model.add(Dropout(0.5))
    model.add(Dense(128, activation = 'relu'))
    model.add(Dropout(0.5))
    model.add(Dense(3, activation = 'softmax'))
    model.compile(optimizer = optimizer, loss = 'categorical_crossentropy', metrics = ['accuracy'])
    return model

In [None]:
def predict(x,y,test_x,test_y):
    model = KerasClassifier(build_fn=create_model, verbose=0)
    callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5)
    X_train, X_val, y_train, y_val = train_test_split(x,y, test_size=0.25, random_state=1) # 0.25 
    #利用grid search調參數
    batch_size = [32, 128]
    epochs = [30]
    opt = ['rmsprop']
    param_grid = dict(optimizer=opt,batch_size=batch_size, epochs=epochs)
    grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1, cv=10)
    grid_result = grid.fit(X_train, y_train,validation_data=(X_val,y_val),callbacks=[callback])
    # summarize results
    print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']
    optimizer_best = grid_result.best_params_['optimizer']
    batch_best = grid_result.best_params_['batch_size']
    epochs_best = grid_result.best_params_['epochs']
    for mean, stdev, param in zip(means, stds, params):
        print("%f (%f) with: %r" % (mean, stdev, param))
    model_best = Sequential()  
    model_best.add(Conv2D(64, 3, 3, input_shape = (X_train.shape[1],X_train.shape[2],1), activation = 'relu'))
    model_best.add(MaxPooling2D(pool_size = (2, 2)))
    # Second convolutional layer
    model_best.add(Conv2D(32, 3, 3, activation = 'relu'))
    model_best.add(MaxPooling2D(pool_size = (2, 2)))

    model_best.add(Flatten())
    model_best.add(Dropout(0.5))
    model_best.add(Dense(128, activation = 'relu'))
    model_best.add(Dropout(0.5))
    model_best.add(Dense(3, activation = 'softmax'))
    model_best.compile(optimizer = optimizer_best, loss = 'categorical_crossentropy', metrics = ['accuracy'])
    history = model_best.fit(X_train, y_train,validation_data=(X_val,y_val),batch_size=batch_best,epochs=epochs_best,callbacks=[callback])
    Y_pred = model_best.predict_generator(test_x)
    y_pred = np.argmax(Y_pred, axis=1)
    y_test_argmax = np.argmax(test_y, axis=1)
    accuracy = accuracy_score(y_test_argmax, y_pred)
    print('accuracy:', accuracy)
    precision = precision_score(y_test_argmax, y_pred, average = 'macro')
    print('precision:', precision)
    recall = recall_score(y_test_argmax, y_pred, average = 'macro')
    print('recall:', recall)
    f1 = f1_score(y_test_argmax, y_pred, average = 'macro')
    print('f1:',f1)
    cm = confusion_matrix(y_test_argmax, y_pred)
    print('confusion matrix')
    print(cm)

#### Using training data to train model and evaluate result by testing data

In [None]:
predict(X_train,y_train,X_test,y_test)

Best: 0.920760 using {'batch_size': 32, 'epochs': 20, 'optimizer': 'adam'}
0.852047 (0.140443) with: {'batch_size': 32, 'epochs': 10, 'optimizer': 'rmsprop'}
0.867836 (0.122757) with: {'batch_size': 32, 'epochs': 10, 'optimizer': 'adam'}
0.899415 (0.075733) with: {'batch_size': 32, 'epochs': 20, 'optimizer': 'rmsprop'}
0.920760 (0.088770) with: {'batch_size': 32, 'epochs': 20, 'optimizer': 'adam'}
0.915205 (0.067137) with: {'batch_size': 32, 'epochs': 30, 'optimizer': 'rmsprop'}
0.920760 (0.053796) with: {'batch_size': 32, 'epochs': 30, 'optimizer': 'adam'}
0.787719 (0.101754) with: {'batch_size': 128, 'epochs': 10, 'optimizer': 'rmsprop'}
0.814327 (0.108489) with: {'batch_size': 128, 'epochs': 10, 'optimizer': 'adam'}
0.857310 (0.084419) with: {'batch_size': 128, 'epochs': 20, 'optimizer': 'rmsprop'}
0.883626 (0.093172) with: {'batch_size': 128, 'epochs': 20, 'optimizer': 'adam'}
0.909942 (0.081450) with: {'batch_size': 128, 'epochs': 30, 'optimizer': 'rmsprop'}
0.904386 (0.087388) wi

#### Using training data of Data Generation to train model and evaluate result by testing data

In [None]:
predict(X_train_DG,y_train_DG,X_test,y_test)

Best: 0.883833 using {'batch_size': 32, 'epochs': 20, 'optimizer': 'adam'}
0.855833 (0.080185) with: {'batch_size': 32, 'epochs': 10, 'optimizer': 'rmsprop'}
0.863833 (0.059601) with: {'batch_size': 32, 'epochs': 10, 'optimizer': 'adam'}
0.871833 (0.083348) with: {'batch_size': 32, 'epochs': 20, 'optimizer': 'rmsprop'}
0.883833 (0.074558) with: {'batch_size': 32, 'epochs': 20, 'optimizer': 'adam'}
0.879833 (0.097845) with: {'batch_size': 32, 'epochs': 30, 'optimizer': 'rmsprop'}
0.883833 (0.072380) with: {'batch_size': 32, 'epochs': 30, 'optimizer': 'adam'}
0.815167 (0.088187) with: {'batch_size': 128, 'epochs': 10, 'optimizer': 'rmsprop'}
0.843667 (0.062724) with: {'batch_size': 128, 'epochs': 10, 'optimizer': 'adam'}
0.863833 (0.082172) with: {'batch_size': 128, 'epochs': 20, 'optimizer': 'rmsprop'}
0.855667 (0.080139) with: {'batch_size': 128, 'epochs': 20, 'optimizer': 'adam'}
0.867500 (0.075936) with: {'batch_size': 128, 'epochs': 30, 'optimizer': 'rmsprop'}
0.883833 (0.045154) wi

### Create CNN 1D model for PCA data

In [None]:
def get_model():
    num_features=27
    num_preds = 3
    inp = keras.layers.Input((num_features,))
    model = Sequential
    x = keras.layers.Reshape((num_features,1))(inp)
    x = keras.layers.Conv1D(256,num_preds, activation='relu')(x)
    x = keras.layers.MaxPooling1D(4)(x)
    x = keras.layers.Dropout(0.4)(x)
    x = keras.layers.Conv1D(128,1, activation='relu')(x)
    x = keras.layers.MaxPooling1D(4)(x)
    x = keras.layers.Dropout(0.4)(x)
    x = keras.layers.Flatten()(x)
    x = keras.layers.Dropout(0.4)(x)
    out = keras.layers.Dense(3, activation='softmax')(x)
    return keras.Model(inputs=inp, outputs=out)

In [None]:
model = get_model()
callback = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=10)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
X_train_, X_val, y_train_, y_val = train_test_split(X_train_PCA,y_train, test_size=0.25, random_state=1) # 0.25 
model.fit(X_train_, y_train_,validation_data=(X_val,y_val), epochs=30, batch_size=128, verbose=1,callbacks=[callback])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30


<tensorflow.python.keras.callbacks.History at 0x7f9437200370>

In [None]:
Y_pred = model.predict_generator(X_test_PCA)
y_pred = np.argmax(Y_pred, axis=1)
y_test_argmax = np.argmax(y_test, axis=1)
accuracy = accuracy_score(y_test_argmax, y_pred)
print('accuracy:', accuracy)
precision = precision_score(y_test_argmax, y_pred, average = 'macro')
print('precision:', precision)
recall = recall_score(y_test_argmax, y_pred, average = 'macro')
print('recall:', recall)
f1 = f1_score(y_test_argmax, y_pred, average = 'macro')
print('f1:',f1)
cm = confusion_matrix(y_test_argmax, y_pred)
print('confusion matrix')
print(cm)

accuracy: 0.6666666666666666
precision: 0.6638528138528138
recall: 0.6371794871794872
f1: 0.6286003878018686
confusion matrix
[[25  1  0]
 [ 3  8  9]
 [ 7  2 11]]


In [None]:
# reading data
df_zca = pd.read_csv('zca_500.csv')
df_test_zca = pd.read_csv('zca_test.csv')

In [None]:
X_train_zca = df_zca.iloc[: , 1:-1]
X_train_zca = X_train_zca/255
y_train_zca = df_zca.iloc[: , -1:].values.ravel()

X_test_zca = df_test_zca.iloc[: , 1:-1]
X_test_zca = X_test_zca/255
y_test_zca = df_test_zca.iloc[: , -1:].values.ravel()

X_train_zca = np.array(X_train_zca).reshape(X_train_zca.shape[0],80,80,1)
X_test_zca = np.array(X_test_zca).reshape(X_test_zca.shape[0],80,80,1)

In [None]:
y_train_zca = to_categorical(label_encoder.fit_transform(y_train_zca),3)
y_test_zca = to_categorical(label_encoder.fit_transform(y_test_zca),3)
print(y_train_zca.shape)
print(y_test_zca.shape)

(1455, 3)
(66, 3)


In [None]:
def create_model_zca(optimizer):
    model = Sequential()  
    model.add(Conv2D(64, 3, 3, input_shape = (X_train_zca.shape[1],X_train_zca.shape[2],1), activation = 'relu'))
    model.add(MaxPooling2D(pool_size = (2, 2)))
    # Second convolutional layer
    model.add(Conv2D(32, 3, 3, activation = 'relu'))
    model.add(MaxPooling2D(pool_size = (2, 2)))

    model.add(Flatten())
    model.add(Dropout(0.5))
    model.add(Dense(128, activation = 'relu'))
    model.add(Dropout(0.5))
    model.add(Dense(3, activation = 'softmax'))
    model.compile(optimizer = optimizer, loss = 'categorical_crossentropy', metrics = ['accuracy'])
    return model
def predict_ZCA(x,y,test_x,test_y):
    model = KerasClassifier(build_fn=create_model_zca, verbose=0)
    callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=10)
    X_train, X_val, y_train, y_val = train_test_split(x,y, test_size=0.25, random_state=1) # 0.25 
    #利用grid search調參數
    batch_size = [32, 128]
    epochs = [30]
    opt = ['rmsprop','adam']
    param_grid = dict(optimizer=opt,batch_size=batch_size, epochs=epochs)
    grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1, cv=10)
    grid_result = grid.fit(X_train, y_train,validation_data=(X_val,y_val),callbacks=[callback])
    # summarize results
    print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']
    optimizer_best = grid_result.best_params_['optimizer']
    batch_best = grid_result.best_params_['batch_size']
    epochs_best = grid_result.best_params_['epochs']
    for mean, stdev, param in zip(means, stds, params):
        print("%f (%f) with: %r" % (mean, stdev, param))
    model_best = Sequential()  
    model_best.add(Conv2D(64, 3, 3, input_shape = (X_train_zca.shape[1],X_train_zca.shape[2],1), activation = 'relu'))
    model_best.add(MaxPooling2D(pool_size = (2, 2)))
    # Second convolutional layer
    model_best.add(Conv2D(32, 3, 3, activation = 'relu'))
    model_best.add(MaxPooling2D(pool_size = (2, 2)))

    model_best.add(Flatten())
    model_best.add(Dropout(0.5))
    model_best.add(Dense(128, activation = 'relu'))
    model_best.add(Dropout(0.5))
    model_best.add(Dense(3, activation = 'softmax'))
    model_best.compile(optimizer = optimizer_best, loss = 'categorical_crossentropy', metrics = ['accuracy'])
    history = model_best.fit(x, y,batch_size=batch_best,epochs=epochs_best,callbacks=[callback])
    Y_pred = model_best.predict_generator(test_x)
    y_pred = np.argmax(Y_pred, axis=1)
    y_test_argmax = np.argmax(test_y, axis=1)
    accuracy = accuracy_score(y_test_argmax, y_pred)
    print('accuracy:', accuracy)
    precision = precision_score(y_test_argmax, y_pred, average = 'macro')
    print('precision:', precision)
    recall = recall_score(y_test_argmax, y_pred, average = 'macro')
    print('recall:', recall)
    f1 = f1_score(y_test_argmax, y_pred, average = 'macro')
    print('f1:',f1)
    cm = confusion_matrix(y_test_argmax, y_pred)
    print('confusion matrix')
    print(cm)

In [None]:
predict_ZCA(X_train_zca,y_train_zca,X_test_zca,y_test_zca)

Best: 0.736038 using {'batch_size': 32, 'epochs': 30, 'optimizer': 'adam'}
0.725129 (0.088091) with: {'batch_size': 32, 'epochs': 30, 'optimizer': 'rmsprop'}
0.736038 (0.029074) with: {'batch_size': 32, 'epochs': 30, 'optimizer': 'adam'}
0.644379 (0.048205) with: {'batch_size': 128, 'epochs': 30, 'optimizer': 'rmsprop'}
0.645346 (0.056641) with: {'batch_size': 128, 'epochs': 30, 'optimizer': 'adam'}
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
accuracy: 0.6363636363636364
precision: 0.6416666666666667
recall: 0.6192307692307693
f1: 0.6125
confusion matrix
[[21  4  1]
 [ 2 14  4]
 [ 1 12  7]]


In [None]:
df_500 = pd.read_csv('train_DG_500.csv')
X_train_500 = df_500.iloc[: , 1:-1]
X_train_500 = X_train_500/255
y_train_500 = df_500.iloc[: , -1:].values.ravel()
X_train_500 = np.array(X_train_500).reshape(X_train_500.shape[0],256,256,1)

In [None]:
y_train_500 = to_categorical(label_encoder.fit_transform(y_train_500),3)

In [None]:
predict(X_train_500,y_train_500,X_test,y_test)

Best: 0.897819 using {'batch_size': 128, 'epochs': 30, 'optimizer': 'rmsprop'}
0.608225 (0.305329) with: {'batch_size': 32, 'epochs': 30, 'optimizer': 'rmsprop'}
0.897819 (0.031682) with: {'batch_size': 128, 'epochs': 30, 'optimizer': 'rmsprop'}
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
accuracy: 0.7878787878787878
precision: 0.8390804597701149
recall: 0.7743589743589744
f1: 0.7534941249226964
confusion matrix
[[24  2  0]
 [ 0 20  0]
 [ 5  7  8]]
