In [2]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.callbacks import EarlyStopping, Callback
import pandas as pd # crosstab
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier 
from xgboost import XGBClassifier # pip install xgboost
from lightgbm import LGBMClassifier # pip install lightgbm
from sklearn.metrics import accuracy_score
import numpy as np

In [3]:
#  1. 데이터
from tensorflow.keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
X_train = X_train.reshape(-1, 28,28,1)/255.0
X_test = X_test.reshape(-1, 28,28,1)/255.0
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(60000, 28, 28) (60000,) (10000, 28, 28) (10000,)
(60000, 28, 28, 1) (60000,) (10000, 28, 28, 1) (10000,)


In [4]:
train_idxs = np.random.choice(50000, 7000)
val_idxs   = np.random.choice(10000, 3000)

X_train = X_train[train_idxs]
y_train = y_train[train_idxs]
X_test = X_test[val_idxs]
y_test = y_test[val_idxs]
X_train.shape, y_train.shape, X_test.shape, y_test.shape 

((7000, 28, 28, 1), (7000,), (3000, 28, 28, 1), (3000,))

In [6]:
# 아래 셀처럼 하든지 본 셀처럼
cnn_layer = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten()  
])

In [None]:
cnn_layer = Sequential()
cnn_layer.add(Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
cnn_layer.add(MaxPooling2D((2, 2)))
cnn_layer.add(Conv2D(64, (3, 3), activation='relu'))
cnn_layer.add(MaxPooling2D((2, 2)))
cnn_layer.add(Flatten())

In [7]:
# CNN의 Feature Extractor 사용
features_train = cnn_layer.predict(X_train)
features_test = cnn_layer.predict(X_test)



In [8]:
%%time
rf = RandomForestClassifier(n_estimators=100)
rf.fit(features_train, y_train)
y_pred_rf = rf.predict(features_test)
print("Random Forest 정확도:", accuracy_score(y_test, y_pred_rf))

Random Forest 정확도: 0.9623333333333334
CPU times: total: 22.1 s
Wall time: 22.3 s


In [9]:
%%time
xgb_model = XGBClassifier(max_depth=10, # 트리의 최대 깊이
                          n_estimators=100, #트리 갯수
                          learning_rate=0.01, #학습률
                          eval_metric='logloss', #평가지표(이진분류에서 주류)
                         )
xgb_model.fit(features_train, y_train)
y_pred_xgb = rf.predict(features_test)
print("xgb_model정확도:", accuracy_score(y_test, y_pred_xgb))

Random Forest 정확도: 0.9623333333333334
CPU times: total: 13min 27s
Wall time: 3min 35s


In [10]:
%%time
lgb_model = LGBMClassifier(force_col_wise=True, verbose=-1)
lgb_model.fit(features_train, y_train)
y_pred_lgb = rf.predict(features_test)
print("Random Forest 정확도:", accuracy_score(y_test, y_pred_lgb))

Random Forest 정확도: 0.9623333333333334
CPU times: total: 2min 50s
Wall time: 49.6 s


In [11]:
%%time
cnn_model = Sequential()
cnn_model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
cnn_model.add(MaxPooling2D((2, 2)))
cnn_model.add(Conv2D(64, (3, 3), activation='relu'))
cnn_model.add(MaxPooling2D((2, 2)))
cnn_model.add(Flatten())
cnn_model.add(Dense(120, activation='relu', kernel_initializer='he_normal'))
cnn_model.add(Dense(84, activation='relu', kernel_initializer='he_normal'))
cnn_model.add(Dense(10, activation='softmax'))
cnn_model.compile(loss='sparse_categorical_crossentropy', # 원핫인코딩을 안 하고 분류분석
             optimizer='adam',
             metrics=['accuracy'])
cnn_model.fit(X_train, y_train,
                 validation_split=0.2, 
                  epochs=50,
                  verbose=0)
loss, accuracy = cnn_model.evaluate(X_test, y_test)
print("CNN DNN 정확도:",  accuracy)

CNN DNN 정확도: 0.9866666793823242
CPU times: total: 39 s
Wall time: 35.7 s


In [12]:
%%time
cnn_dnn_model = Sequential()
cnn_dnn_model.add(Dense(120, input_shape=(1600,), activation='relu', kernel_initializer='he_normal'))
cnn_dnn_model.add(Dense(84, activation='relu', kernel_initializer='he_normal'))
cnn_dnn_model.add(Dense(10, activation='softmax'))
cnn_dnn_model.compile(loss='sparse_categorical_crossentropy', # 원핫인코딩을 안 하고 분류분석
             optimizer='adam',
             metrics=['accuracy'])
cnn_dnn_model.fit(features_train, y_train,
                 validation_split=0.2, 
                  epochs=50,
                  verbose=0)
y_pred_cnn_dnn = cnn_dnn_model.predict(features_test).argmax(axis=1)
print("cnn_dnn_model 정확도:",  accuracy_score(y_test, y_pred_cnn_dnn))

cnn_dnn_model 정확도: 0.9813333333333333
CPU times: total: 31.7 s
Wall time: 27.7 s


In [27]:
%%time
from sklearn.ensemble import VotingClassifier
voting_model = VotingClassifier(estimators=[
                                            ('rf', rf),
                                            ('xgb_model', xgb_model),
                                            ('lgb_model', lgb_model),],
                               voting='soft')
voting_model.fit(features_train, y_train)
y_pred_voting = voting_model.predict(features_test)
print("cnn_dnn_model 정확도:",  accuracy_score(y_test, y_pred_voting))

cnn_dnn_model 정확도: 0.97
CPU times: total: 16min 32s
Wall time: 4min 43s


In [28]:
%%time
from sklearn.ensemble import VotingClassifier
voting_model = VotingClassifier(estimators=[
                                            ('rf', rf),
                                            ('xgb_model', xgb_model),
                                            ('lgb_model', lgb_model),],
                               voting='hard')
voting_model.fit(features_train, y_train)
y_pred_voting = voting_model.predict(features_test)
print("cnn_dnn_model 정확도:",  accuracy_score(y_test, y_pred_voting))

cnn_dnn_model 정확도: 0.9683333333333334
CPU times: total: 15min 33s
Wall time: 4min 52s
