In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import json
import os
import datetime
import time
from keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.python.keras.callbacks import TensorBoard


In [7]:
# 데이터 불러오기

train_x_path = "../../data/use_data/train_x.npy"
test_x_path = "../../data/use_data/test_x.npy"
train_y_path = "../../data/use_data/train_y.npy"
test_y_path = "../../data/use_data/test_y.npy"

x_train = np.load(train_x_path) 
y_train = np.load(train_y_path) 
x_test = np.load(test_x_path) 
y_test = np.load(test_y_path) 


In [8]:
##### 하이퍼 파라미터 설정
hp = {"n_most_common_words": 100000,
       "emb_dim": 128,
        "dropput":0.2,
        "epochs":100,
        "batch_size":64,
        "activation_function":"softmax",
        "max_len": 10}

In [9]:
def train(train_x, train_y, hp):

    model_path = "../../model/model.h5"
    log_path = "../../model/logs"
    
    model = Sequential()
    model.add(Embedding(hp["n_most_common_words"], hp["emb_dim"], input_length=hp["max_len"]))
    model.add(SpatialDropout1D(hp["dropput"]))
    model.add(LSTM(64, dropout=hp["dropput"], recurrent_dropout=hp["dropput"]))
    model.add(Dense(train_y.shape[1], activation=hp['activation_function']))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    print(model.summary())

    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)
    mc = ModelCheckpoint(model_path, monitor = 'val_acc', mode='max', verbose=1, save_best_only=True)
    tb = TensorBoard(log_dir=log_path)
    
    history = model.fit(train_x, train_y, 
                        epochs=hp["epochs"], 
                        batch_size=hp["batch_size"], 
                        validation_split=0.25,
                        callbacks=[mc, es, tb],
                        )
    return model

In [10]:
start_time = time.time()
model = train(x_train, y_train, hp)
model_path = "../../model/model.h5"
model.save(model_path)

print(datetime.datetime.now())
print(str(datetime.timedelta(seconds=time.time()-start_time)).split(".")[0])



Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 10, 128)           12800000  
                                                                 
 spatial_dropout1d_1 (Spatia  (None, 10, 128)          0         
 lDropout1D)                                                     
                                                                 
 lstm_1 (LSTM)               (None, 64)                49408     
                                                                 
 dense_1 (Dense)             (None, 2)                 130       
                                                                 
Total params: 12,849,538
Trainable params: 12,849,538
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9

In [11]:
y_test_arg = np.argmax(y_test, axis=1)
y_pred = np.argmax(model.predict(x_test), axis=1)



In [12]:
from sklearn.metrics import *
print(classification_report(y_test_arg, y_pred))
print('acc:', round(accuracy_score(y_test_arg, y_pred),3))
print('precision:', round(precision_score(y_test_arg, y_pred, average='weighted'),3))
print('recall:', round(recall_score(y_test_arg, y_pred, average='weighted'),3))
print('f1-score:', round(f1_score(y_test_arg, y_pred, average='weighted'),3))

              precision    recall  f1-score   support

           0       0.92      0.87      0.89      6913
           1       0.91      0.94      0.92      9409

    accuracy                           0.91     16322
   macro avg       0.91      0.91      0.91     16322
weighted avg       0.91      0.91      0.91     16322

acc: 0.911
precision: 0.911
recall: 0.911
f1-score: 0.911
