In [None]:
import tensorflow as tf
from tensorflow import keras

import numpy as np

print(tf.__version__)

In [None]:
vocab_size = 10000

imdb = keras.datasets.imdb

(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=vocab_size)

In [None]:
print("Training entries: {}, labels: {}".format(len(train_data), len(train_labels)))

In [None]:
len(train_data[0]), len(train_data[5000])


In [None]:
word_index = imdb.get_word_index()


In [None]:
# 一个映射单词到整数索引的词典
word_index = imdb.get_word_index()
# 保留第一个索引
word_index = {k:(v+3) for k,v in word_index.items()}
word_index["<PAD>"] = 0
word_index["<START>"] = 1
word_index["<UNK>"] = 2  # unknown
word_index["<UNUSED>"] = 3

reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])

def decode_review(text):
    return ' '.join([reverse_word_index.get(i, '<UNK>') for i in text])


In [None]:
decode_review(train_data[0])

In [None]:
maxlen = 256

train_data = keras.preprocessing.sequence.pad_sequences(train_data,
                                                        value=word_index["<PAD>"],
                                                        padding='post',
                                                        maxlen=maxlen)

test_data = keras.preprocessing.sequence.pad_sequences(test_data,
                                                       value=word_index["<PAD>"],
                                                       padding='post',
                                                       maxlen=maxlen)


In [None]:
len(train_data[0]), len(train_data[1])

In [None]:
train_data.shape

In [None]:
decode_review(train_data[0])

In [None]:
enbedding_dim = 16
batch_size = 128

model = keras.Sequential()
model.add(keras.layers.Embedding(vocab_size, enbedding_dim,))
model.add(keras.layers.Bidirectional(keras.layers.SimpleRNN(units=32,return_sequences=False)))
model.add(keras.layers.Dense(16, activation='relu'))
model.add(keras.layers.Dense(1, activation='sigmoid'))

model.summary()


In [None]:
model.compile(optimizer='adam',#optimizer优化器
              loss='binary_crossentropy',#由于这是一个二分类问题且模型输出概率值（一个使用 sigmoid 激活函数的单一单元层），我们将使用 binary_crossentropy 损失函数。
              metrics=['accuracy'])#准确率（accuracy）结果会返回一个accuracy

In [None]:
x_val = train_data[:10000]
partial_x_train = train_data[10000:]

y_val = train_labels[:10000]
partial_y_train = train_labels[10000:]

In [None]:
history = model.fit(partial_x_train,
                    partial_y_train,
                    epochs=40,
                    batch_size=512,
                    validation_data=(x_val, y_val),
                    verbose=1)

In [None]:
loss,acc = model.evaluate(test_data,  test_labels, verbose=2)

print('loss',loss)
print('acc',acc)


In [None]:
history_dict = history.history
history_dict.keys()

In [None]:
import matplotlib.pyplot as plt

acc = history_dict['accuracy']
val_acc = history_dict['val_accuracy']
loss = history_dict['loss']
val_loss = history_dict['val_loss']

epochs = range(1, len(acc) + 1)

# “bo”代表 "蓝点"
plt.plot(epochs, loss, 'bo', label='Training loss')
# b代表“蓝色实线”
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

In [None]:
plt.clf()   # 清除数字

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.show()

In [None]:
#LSTM长短期记忆网络，避免RNN信息不能长久
# 输入形状是用于电影评论的词汇数目（10,000 词）
vocab_size = 10000
enbedding_dim = 16
batch_size = 128

Lstm_model = keras.Sequential([
        keras.layers.Embedding(vocab_size, enbedding_dim,input_length = maxlen),
        keras.layers.Bidirectional(keras.layers.LSTM(units=32,return_sequences=True)),
        keras.layers.Dense(32, activation='relu'),
        keras.layers.Dense(1, activation='sigmoid')
])

kstm_model.summary()

In [None]:
lstm_model.compile(optimizer='adam',#optimizer优化器
              loss='binary_crossentropy',#由于这是一个二分类问题且模型输出概率值（一个使用 sigmoid 激活函数的单一单元层），我们将使用 binary_crossentropy 损失函数。
              metrics=['accuracy'])#准确率（accuracy）结果会返回一个accuracy

In [None]:
history = lstm_model.fit(partial_x_train,
                    partial_y_train,
                    epochs=30,
                    batch_size=batch_size,
                    validation_split=0.2,
                    verbose=1)

In [None]:
loss,acc = lstm_model.evaluate(test_data,  test_labels, verbose=2)

print('loss',loss)
print('acc',acc)

In [None]:
history_dict = history.history
history_dict.keys()

In [None]:
import matplotlib.pyplot as plt

acc = history_dict['accuracy']
val_acc = history_dict['val_accuracy']
loss = history_dict['loss']
val_loss = history_dict['val_loss']

epochs = range(1, len(acc) + 1)

# “bo”代表 "蓝点"
plt.plot(epochs, loss, 'bo', label='Training loss')
# b代表“蓝色实线”
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

In [None]:
plt.clf()   # 清除数字

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.show()