In [1]:
%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

### 1.讀入深度學習套件

In [2]:
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding
from tensorflow.keras.layers import LSTM
from tensorflow.keras.datasets import imdb

### 2.讀入數據

In [3]:
(x_train, y_train), (x_test, y_test)= imdb.load_data(num_words=10000) 
#常用字的次數=10000 *

In [4]:
len(x_train)

25000

In [5]:
len(x_test)

25000

In [6]:
len(x_train[0]) #1st留言的資料長度

218

In [7]:
len(x_train[1])#2nd留言的資料長度

189

In [8]:
len(x_train[99])#100th留言的資料長度

171

In [9]:
y_train[0] #正評

1

In [10]:
y_train[1] #負評

0

### 3.資料處理

In [11]:
#限定每個留言長度都是100字，不夠的補0，多的去掉 
x_train = sequence.pad_sequences(x_train, maxlen=100)
x_test = sequence.pad_sequences(x_test, maxlen=100)

### 4-1. 打造函數學習機

In [12]:
#輸入電影評論/輸出正貨負評
model = Sequential()

In [13]:
#1st層  embedding:全連結神經網路，把10000為壓到128 
model.add(Embedding(10000,128))

In [14]:
#dropout:隨機抽人答題，永遠不知道誰會出來答題，避免背答案的結果發生 
#128-->87
model.add(LSTM(87, dropout=0.2, recurrent_dropout=0.2))

In [15]:
#輸出是0或1
model.add(Dense(1, activation='sigmoid'))

In [16]:
#compile
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

In [17]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, None, 128)         1280000   
_________________________________________________________________
lstm (LSTM)                  (None, 87)                75168     
_________________________________________________________________
dense (Dense)                (None, 1)                 88        
Total params: 1,355,256
Trainable params: 1,355,256
Non-trainable params: 0
_________________________________________________________________


### 4-2.訓練

In [18]:
#validation_data:知道測試資料的誤差 
#overfitting:loss越來越小，val_loss越來越大
# batch_size:32-->40
# epochs:10-->8
model.fit(x_train, y_train, batch_size=40, epochs=8, validation_data=(x_test,y_test))

Train on 25000 samples, validate on 25000 samples
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<tensorflow.python.keras.callbacks.History at 0x174795d2eb8>

In [19]:
model_json = model.to_json()
open('imdb_model_architechture.json', 'w').write(model_json)
model.save_weights('imdb_model_weights.h5')