# 로이터 뉴스 데이터로 다항분류
네트워크 구성을 4가지로 작성 후 비교해보기 : Dense, RNN + Dense, CNN + Dense,  CNN+RNN+Dense


In [None]:
from keras.datasets import reuters
from keras.models import Sequential
from keras.layers import Dense, Embedding, Flatten, LSTM, Conv1D, GlobalMaxPooling1D, Dropout
import tensorflow as tf
from keras.utils import pad_sequences, to_categorical
import matplotlib.pyplot as plt

In [None]:
(x_train,y_train),(x_test,y_test) = reuters.load_data(num_words=10000) #46개의 범주를 가지고 있음
print(x_train.shape,x_test.shape,y_train.shape,y_test.shape)  #(8982,) (2246,) (8982,) (2246,)
print(x_train[0])
print(y_train[0])

# train/validation
x_val = x_train[7000:]
y_val = y_train[7000:]
x_train=x_train[:7000]
y_train=y_train[:7000]

#문장 길이 맞추기
text_max_words =120
x_train= pad_sequences(x_train, maxlen=text_max_words)
x_val = pad_sequences(x_val, maxlen=text_max_words)
x_test = pad_sequences(x_test, maxlen=text_max_words)
print(x_train[0],len(x_train[0]))

y_train = to_categorical(y_train) #label
y_val= to_categorical(y_val)
y_test=to_categorical(y_test)
print(y_train[0])


## 모델 구성1 - 완전연결층만 사용

In [None]:
model= Sequential()
model.add(Embedding(10000,128,input_length=text_max_words))
model.add(Flatten())
model.add(Dense(256,activation= 'relu'))
model.add(Dense(46,activation= 'softmax'))

model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
print(model.summary())

hist= model.fit(x_train,y_train,epochs=10, batch_size=64,validation_data=(x_val,y_val),verbose=2)


In [None]:
def plot_func():
    fig,loss_ax =plt.subplots()
    acc_ax = loss_ax.twinx()

    loss_ax.plot(hist.history['loss'],'y',label='train loss')
    loss_ax.plot(hist.history['val_loss'],'r',label='val loss')
    loss_ax.set_ylim([0.0,3.0])

    acc_ax.plot(hist.history['accuracy'],'b',label='train acc')
    acc_ax.plot(hist.history['val_accuracy'],'g',label='val acc')
    acc_ax.set_ylim([0.0, 1.0])

    loss_ax.set_xlabel('epoch')
    loss_ax.set_ylabel('loss')
    acc_ax.set_ylabel('accuracy')

    loss_ax.legend(loc='upper left')
    acc_ax.legend(loc='lower left')

    plt.show()

    print('eval :', model.evaluate(x_test,y_test,batch_size=64, verbose=0))

plot_func()

# 모델 구성 2 : RNN+ 완전연결층

In [None]:
model= Sequential()
model.add(Embedding(10000,128))
model.add(LSTM(128))
model.add(Dense(256,activation= 'relu'))
model.add(Dense(46,activation= 'softmax'))

model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
print(model.summary())

hist= model.fit(x_train,y_train,epochs=10, batch_size=64,validation_data=(x_val,y_val),verbose=2)

plot_func()

# 모델 구성3  - CNN+완전연결층 : 순환 시간

In [None]:
model= Sequential()
model.add(Embedding(10000,128,input_length=text_max_words))
model.add(Conv1D(256,3,padding='valid',activation='relu',strides=1))
model.add(GlobalMaxPooling1D())
model.add(Dense(256,activation= 'relu'))
model.add(Dense(46,activation= 'softmax'))

model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
print(model.summary())

hist= model.fit(x_train,y_train,epochs=10, batch_size=64,validation_data=(x_val,y_val),verbose=2)

plot_func()

# 모델 4 - CNN +RNN + 완전 연결층 사용 : 순환 컨볼루션 신경망

In [None]:
from keras.layers import MaxPool1D
model= Sequential()

model.add(Embedding(10000,128,input_length=text_max_words))
model.add(Conv1D(256,3,padding='valid',activation='relu',strides=1))
model.add(MaxPool1D(pool_size=4))
model.add(LSTM(128))
model.add(Dense(256,activation= 'relu'))
model.add(Dense(46,activation= 'softmax'))

model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
print(model.summary())

hist= model.fit(x_train,y_train,epochs=10, batch_size=64,validation_data=(x_val,y_val),verbose=2)

plot_func()