In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from keras.preprocessing import sequence

from keras.models import Sequential
from keras.layers import Dense, LSTM, Embedding
import tensorflow as tf

# seed 값 설정
np.random.seed(0)
tf.random.set_seed(3)

In [2]:
# Reuters 뉴스 데이터 불러오기
from keras.datasets import reuters

(X_train, y_train), (X_test, y_test) = reuters.load_data(num_words=1000, test_split=0.2)

In [3]:
category = np.max(y_train) + 1
print('카테고리: ', category)
print('학습용 뉴스 기사: ', len(X_train))
print('테스트용 뉴스 기사: ', len(X_test))
print(X_train[0])

카테고리:  46
학습용 뉴스 기사:  8982
테스트용 뉴스 기사:  2246
[1, 2, 2, 8, 43, 10, 447, 5, 25, 207, 270, 5, 2, 111, 16, 369, 186, 90, 67, 7, 89, 5, 19, 102, 6, 19, 124, 15, 90, 67, 84, 22, 482, 26, 7, 48, 4, 49, 8, 864, 39, 209, 154, 6, 151, 6, 83, 11, 15, 22, 155, 11, 15, 7, 48, 9, 2, 2, 504, 6, 258, 6, 272, 11, 15, 22, 134, 44, 11, 15, 16, 8, 197, 2, 90, 67, 52, 29, 209, 30, 32, 132, 6, 109, 15, 17, 12]


In [4]:
from tensorflow.keras.utils import to_categorical

In [5]:
X_train = sequence.pad_sequences(X_train, maxlen=100)
X_test = sequence.pad_sequences(X_test, maxlen=100)

y_train = to_categorical(y_train)
y_test = to_categorical(y_test)


In [6]:
model = Sequential()
model.add( Embedding(1000, 100) )
model.add(LSTM(100, activation='tanh'))
model.add(Dense(46, activation='softmax'))

In [7]:
model.summary()

In [8]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [9]:
history = model.fit(X_train, y_train, batch_size=100, epochs=10, validation_data=(X_test, y_test))

Epoch 1/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 66ms/step - accuracy: 0.3369 - loss: 2.8823 - val_accuracy: 0.4835 - val_loss: 2.0852
Epoch 2/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 63ms/step - accuracy: 0.4821 - loss: 2.0581 - val_accuracy: 0.4871 - val_loss: 1.9604
Epoch 3/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 63ms/step - accuracy: 0.5165 - loss: 1.8625 - val_accuracy: 0.5579 - val_loss: 1.7644
Epoch 4/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 63ms/step - accuracy: 0.5450 - loss: 1.7515 - val_accuracy: 0.5730 - val_loss: 1.6724
Epoch 5/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 65ms/step - accuracy: 0.5842 - loss: 1.6366 - val_accuracy: 0.5957 - val_loss: 1.6143
Epoch 6/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 64ms/step - accuracy: 0.6100 - loss: 1.5422 - val_accuracy: 0.6167 - val_loss: 1.5414
Epoch 7/10
[1m90/90[0m [32m━━━━

In [10]:
model.evaluate(X_test, y_test)[1] #accuracy

[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.6812 - loss: 1.2939


0.6687444448471069

In [12]:
from keras.preprocessing import sequence

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.layers import Embedding
from keras.layers import LSTM
from keras.layers import Conv1D, MaxPooling1D

In [25]:
# Reuters 뉴스 데이터 불러오기
from keras.datasets import imdb

(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=5000)

In [26]:
X_train = sequence.pad_sequences(X_train, maxlen=100)
X_test  = sequence.pad_sequences(X_test,  maxlen=100)


In [27]:
X_train.shape, y_train.shape

((25000, 100), (25000,))

In [28]:
model = Sequential()
model.add(Embedding(5000, 100))
model.add(Dropout(0.5))
model.add(Conv1D(64, 5, padding='valid', activation='relu',strides=1))
model.add(MaxPooling1D(pool_size=4))
model.add(LSTM(55))
model.add(Dense(1))
model.add(Activation('sigmoid'))

In [29]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [30]:
%%time
history = model.fit(X_train, y_train, batch_size=100, epochs=5, validation_data=(X_test, y_test))

Epoch 1/5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 21ms/step - accuracy: 0.6762 - loss: 0.5525 - val_accuracy: 0.8350 - val_loss: 0.3751
Epoch 2/5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 21ms/step - accuracy: 0.8695 - loss: 0.3094 - val_accuracy: 0.8508 - val_loss: 0.3401
Epoch 3/5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 20ms/step - accuracy: 0.8930 - loss: 0.2625 - val_accuracy: 0.8515 - val_loss: 0.3365
Epoch 4/5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 21ms/step - accuracy: 0.9071 - loss: 0.2329 - val_accuracy: 0.8511 - val_loss: 0.3456
Epoch 5/5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 20ms/step - accuracy: 0.9186 - loss: 0.2078 - val_accuracy: 0.8156 - val_loss: 0.4306
CPU times: total: 2min 33s
Wall time: 27.2 s


In [31]:
# 테스트 정확도 출력
print("\n Test Accuracy: %.4f" % (model.evaluate(X_test, y_test)[1]))

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8146 - loss: 0.4314

 Test Accuracy: 0.8156


In [35]:
# 예측 확률
pred_prob = model.predict(X_test)
pred = np.where(pred_prob > 0.5, 1, 0).flatten()
pred

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step


array([0, 1, 1, ..., 0, 0, 1])

In [36]:
from sklearn.metrics import classification_report
print(classification_report(y_test, pred, zero_division=True))

              precision    recall  f1-score   support

           0       0.76      0.93      0.83     12500
           1       0.91      0.70      0.79     12500

    accuracy                           0.82     25000
   macro avg       0.83      0.82      0.81     25000
weighted avg       0.83      0.82      0.81     25000

