# 用RNN做IMDB情緒分析

In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

## 載入IMDB資料集

In [None]:
from keras.datasets import imdb

In [None]:
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=2000)

### 觀察輸入資料

In [None]:
x_train.shape

In [None]:
type(x_train[0])

In [None]:
x_train[0]

In [None]:
x_train[0][:5]

In [None]:
for i in range(10):
  print(len(x_train[i]))

### 觀察輸出資料

In [None]:
y_train[:5]

### 將輸入資料設為相同長度

太長的截斷，太短的補 0 。

In [None]:
from keras.preprocessing import sequence

In [None]:
x_train = sequence.pad_sequences(x_train, maxlen=100, padding='post')
x_test = sequence.pad_sequences(x_test, maxlen=100, padding='post')

In [None]:
x_train.shape

In [None]:
x_train[0]

In [None]:
x_train[5]

### 建構神經網路模型

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Embedding, Flatten, Dropout
from keras.layers import SimpleRNN, LSTM, GRU

In [None]:
model = Sequential()

In [None]:
#1
model.add(Embedding(2000, 32, input_length=100))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

In [None]:
#2
model.add(Embedding(2000, 32))
model.add(Dropout(0.35))
model.add(SimpleRNN(32))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.35))
model.add(Dense(1, activation='sigmoid'))

In [None]:
#3
model.add(Embedding(2000, 32))
model.add(LSTM(32))
model.add(Dropout(0.2))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

In [None]:
#4
model.add(Embedding(2000, 32))
model.add(GRU(32))
model.add(Dense(256, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [None]:
model.summary()

In [None]:
# LSTM
(32+32+1)*3*32 + (32+32+1)*32

In [None]:
# GRU
(32+32+1)*2*32 + (32+32+1)*32

### 組裝模型

In [None]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

## 訓練模型

In [None]:
history = model.fit(x_train, y_train, batch_size=100, epochs=10, validation_split=0.2)

In [None]:
plt.plot(history.history['accuracy'], label='acc')
plt.plot(history.history['val_accuracy'], label='val_acc')
plt.xlabel('epoch')
plt.ylabel('Acc')
plt.legend(loc="upper left")

## 評估模型

In [None]:
score = model.evaluate(x_test, y_test)

In [None]:
print('測試資料正確率:', score[1])