# 신경망 교차검증

In [None]:
#   교차검증, 하이퍼 파라미터 최적화
#   cross valiation

#   학습용 : 검증용
#   8     :  2


#   샘플수가 적을때 안정적인 결과를 도출할때
#   교차검증 횟수 : fold
#   5 fold
#   1000건
#   200     200     200     200     200
#   test
#           test
#                   test
#                           test
#                                   test
#   88%     89%     91%     85%     99%

In [1]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import cross_val_score
from sklearn.datasets import make_classification

np.random.seed(0)
number_of_features = 100
#                   독립변수 개수
features, target = make_classification(n_samples = 10000,
                                       n_features = number_of_features,
                                       n_informative = 3,
                                       n_redundant = 0,
                                       n_classes = 2,
                                       weights = [.5, .5],  # 0과 1의 가중치(50:50)
                                       random_state = 0)

In [6]:
from scikeras.wrappers import KerasClassifier

def create_network():
    network =Sequential()
    network.add(Dense(units=16, activation="relu", input_shape=(number_of_features,)))
    network.add(Dense(units=16, activation="relu"))
    network.add(Dense(units=1, activation="sigmoid"))
    network.compile(loss="binary_crossentropy",     # 이진분류 - sigmoid    /   다분류 - softmax
                    optimizer="rmsprop", 
                    metrics=["accuracy"])
    return network

#   input       h1      h2      ouput
#   100         16      16      1(sigmoid_이진분류(0/1))

In [7]:
neural_network = KerasClassifier(model=create_network,  # 모형
                                 epochs=10,
                                 batch_size=100,    # 미니배치
                                 verbose=0)
cross_val_score(neural_network, features, target, cv=3)
#     교차검증                                     교차검증 횟수

array([0.85902819, 0.90969097, 0.83378338])

In [9]:
# 2. 하이퍼 파라미터 튜닝
#시간이 오래 걸림

from sklearn.model_selection import GridSearchCV

np.random.seed(0)
number_of_features = 10
features, target = make_classification(n_samples = 100,
                              n_features = number_of_features,
                              n_informative = 3,
                              n_redundant = 0,
                              n_classes = 2,
                              weights = [.5, .5],
                              random_state = 0)

In [10]:
def create_network(optimizer="rmsprop"):
    network =Sequential()
    network.add(Dense(units=16,
                         activation="relu",
                         input_shape=(number_of_features,)))
    network.add(Dense(units=16, activation="relu"))
    network.add(Dense(units=1, activation="sigmoid"))
    network.compile(loss="binary_crossentropy",
                    optimizer=optimizer, 
                    metrics=["accuracy"]) 
    return network

In [11]:
neural_network = KerasClassifier(model=create_network, verbose=0)

In [12]:
# 후보 변수
epochs = [5, 10]
batches = [5, 10, 100]
optimizers = ["rmsprop", "adam"]
hyperparameters = dict(optimizer=optimizers, epochs=epochs, batch_size=batches)
grid = GridSearchCV(estimator=neural_network, param_grid=hyperparameters)   # 교차검증
grid_result = grid.fit(features, target)



In [13]:
grid_result.best_params_

{'batch_size': 5, 'epochs': 5, 'optimizer': 'adam'}

In [14]:
grid_result.cv_results_['mean_test_score']

array([0.58, 0.69, 0.68, 0.69, 0.46, 0.53, 0.65, 0.6 , 0.52, 0.52, 0.56,
       0.54])

In [15]:
max(grid_result.cv_results_['mean_test_score'])

0.6900000000000001

In [16]:
grid_result.best_estimator_