In [1]:
import keras
from keras.utils import np_utils
from keras import backend as K
from keras.layers import *
from keras.models import *
from keras.wrappers.scikit_learn import KerasClassifier

import numpy as np
import pickle

from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, StratifiedKFold, KFold
from sklearn.ensemble import RandomForestClassifier, VotingClassifier, GradientBoostingClassifier
from sklearn.externals import joblib

from xgboost import XGBClassifier

Using TensorFlow backend.


In [2]:
Input = np.load('numpy_array_data/Input5_data.npy')
Output = np.load('numpy_array_data/labels5_data.npy')

FileNotFoundError: [Errno 2] No such file or directory: 'numpy_array_data/Input5_data.npy'

In [32]:
from imblearn.combine import SMOTEENN
senn = SMOTEENN(random_state=0)
Oversampled_x, Oversampled_y = senn.fit_resample(Input, Output)

In [40]:
from sklearn.model_selection import train_test_split
train_x, test_x, train_y, test_y = train_test_split(Oversampled_x, Oversampled_y)
print("train_x's shape :", train_x.shape)
print("test_x's shape :", test_x.shape)
print("train_y's shape :", train_y.shape)
print("test_y's shape :", test_y.shape)

train_x's shape : (17523, 161)
test_x's shape : (5842, 161)
train_y's shape : (17523,)
test_y's shape : (5842,)


In [11]:
def recall(y_target, y_pred):
        # clip(t, clip_value_min, clip_value_max) : clip_value_min~clip_value_max 이외 가장자리를 깎아 낸다
        # round : 반올림한다
    y_target_yn = K.round(K.clip(y_target, 0, 1))  # 실제값을 0(Negative) 또는 1(Positive)로 설정한다
    y_pred_yn = K.round(K.clip(y_pred, 0, 1))  # 예측값을 0(Negative) 또는 1(Positive)로 설정한다

        # True Positive는 실제 값과 예측 값이 모두 1(Positive)인 경우이다
    count_true_positive = K.sum(y_target_yn * y_pred_yn)

        # (True Positive + False Negative) = 실제 값이 1(Positive) 전체
    count_true_positive_false_negative = K.sum(y_target_yn)

        # Recall =  (True Positive) / (True Positive + False Negative)
        # K.epsilon()는 'divide by zero error' 예방차원에서 작은 수를 더한다
    recall = count_true_positive / (count_true_positive_false_negative + K.epsilon())

        # return a single tensor value
    return recall


def precision( y_target, y_pred):
        # clip(t, clip_value_min, clip_value_max) : clip_value_min~clip_value_max 이외 가장자리를 깎아 낸다
        # round : 반올림한다
    y_pred_yn = K.round(K.clip(y_pred, 0, 1))  # 예측값을 0(Negative) 또는 1(Positive)로 설정한다
    y_target_yn = K.round(K.clip(y_target, 0, 1))  # 실제값을 0(Negative) 또는 1(Positive)로 설정한다

        # True Positive는 실제 값과 예측 값이 모두 1(Positive)인 경우이다
    count_true_positive = K.sum(y_target_yn * y_pred_yn)

        # (True Positive + False Positive) = 예측 값이 1(Positive) 전체
    count_true_positive_false_positive = K.sum(y_pred_yn)

        # Precision = (True Positive) / (True Positive + False Positive)
        # K.epsilon()는 'divide by zero error' 예방차원에서 작은 수를 더한다
    precision = count_true_positive / (count_true_positive_false_positive + K.epsilon())

        # return a single tensor value
    return precision

def f1score(y_target, y_pred):
    _recall = recall(y_target, y_pred)
    _precision = precision(y_target, y_pred)
        # K.epsilon()는 'divide by zero error' 예방차원에서 작은 수를 더한다
    _f1score = (2 * _recall * _precision) / (_recall + _precision + K.epsilon())

        # return a single tensor value
    return _f1score


# Neuron Network Classifier Grid Search

In [36]:

def create_NN_model(hidden_layer=[30,30,30],init='he_normal') :
    model = Sequential()
    model.add(Dense(hidden_layer[0],input_shape=(161,)))
    model.add(Activation("relu"))
    model.add(Dropout(0.2))
    for layer in hidden_layer[1:]:
        model.add(Dense(layer,init=init))
        model.add(Activation('relu'))
        model.add(Dropout(0.2))
    model.add(Dense(5,init=init))
    model.add(Activation("softmax"))
    model.compile(loss="sparse_categorical_crossentropy",optimizer="adam",metrics=["accuracy",recall,precision,f1score])
    return model


In [45]:
model = KerasClassifier(build_fn=create_NN_model,epochs=200,batch_size=10,verbose = 0)

In [None]:
init = ['glorot_uniform','normal','uniform']
hidden_layers= [[30,30,30],[50,50,50,50],[70,70,70,70],[128,128,128,128]]
param_grid = dict(hidden_layer = hidden_layers, init=init)
NN_grid_model = GridSearchCV(estimator = model, param_grid = param_grid)
NN_grid_model.fit(train_x,train_y)
NN_best = NN_grid_model.best_estimator_
NN_para = NN_best.best_params_
with open("NN_best_para.pkl",'wb') as f:
    pickle.dump(NN_para,f)

# Random Forest Classifier Grid Search

In [None]:
k_fold = KFold(n_splits=10, shuffle = True, random_state = 0)

RFC = RandomForestClassifier()
rf_param_grid = {"max_depth" : [None],
                 "max_features" : [3,8,8],
                 "min_samples_split" : [2,3,8],
                 "bootstrap" : [False],
                 "n_estimators" : [100,300],
                 "criterion" : ['gini'] }
gsRFC = GridSearchCV(RFC,rf_param_grid,cv=k_fold, scoring = "f1",verbose=0)
gsRFC.fit(train_x,train_y)
RFC_best = gsRFC.best_estimator_

# Gradient Boosting Classifier Grid Search

In [5]:
GBC = GradientBoostingClassifier()
gb_param_grid = {"loss":['deviance'],
                "n_estimators" : [100,200,300],
                "learning_rate" : [0.1,0.05,0.01],
                "max_depth" : [4,8],
                "min_samples_leaf" : [100,150],
                "max_features" :[0.3,0.1]}
gsGBC = GridSearchCV(GBC,gb_param_grid,cv=k_fold, scoring = "f1",verbose=0)
gsGBC.fit(train_x,train_y)
GBC_best = gsRFC.best_estimator_

# Support Vector classifier Grid Search

In [None]:
SVMC = SVC(probability=True)
svc_param_grid = {"kernel":['rbf'],
                "gamma" : [0.001,0.01,0.1,1],
                "C" : [1,10,50,100,200,300,1000]}
gsSVMC = GridSearchCV(SVMC,svc_param_grid,cv=k_fold, scoring = "f1",verbose=0)
gsSVMC.fit(train_x,train_y)
SVMC_best = gsRFC.best_estimator_

# XGBoost classifier Grid Search

In [None]:
XGBC = XGBClassifier()
xgb_param_grid = {"max_depth" : [3,5,7],
                 "min_child_weight" : [3,5,6],
                 "gamma" : [0,0.001,0.01,0.1,1],
                 "learning_rate" : [0.1,0.05,0.01]}
gsXGBC = GridSearchCV(XGBC,xgb_param_grid,cv=k_fold, scoring = "f1",verbose=0)
gsXGBC.fit(train_x,train_y)
XGBC_best = gsXGBC.best_estimator_

# Voting classifier

In [None]:
votingC = VotingClassifier(estimators = [("nnc",NN_best),('rfc',RFC_best),("svc",SVMC_best),('gbc',GBC_best),("xgb",XGBC_best),
                                        ],voting="soft")
votingC = votingC.fit(train_x,train_y)
joblib.dump(obj,"voting_classifier.pkl")