In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np

In [2]:
#设置随机种子
tf.random.set_seed(22)
np.random.seed(22)

In [3]:
totals_words = 10000#只使用最常用的前10000个单词
embedding_len = 100#每个单词的维度为100
max_review_len = 80#每个句子的长度
batch_size = 128
(x_train,y_train),(x_val,y_val) = keras.datasets.imdb.load_data(num_words=totals_words)#只使用最常用的10000个单词，其他单词标记为未知
x_train = keras.preprocessing.sequence.pad_sequences(x_train,maxlen=max_review_len)#设置每个句子的长度为80
x_val = keras.preprocessing.sequence.pad_sequences(x_val,maxlen=max_review_len)
db_train = tf.data.Dataset.from_tensor_slices((x_train,y_train)).shuffle(10000).batch(batch_size,drop_remainder=True)
db_val = tf.data.Dataset.from_tensor_slices((x_val,y_val)).shuffle(1000).batch(batch_size,drop_remainder=True)#最后一个batch长度不够将其drop

db_iter = iter(db_train)
sample = next(db_iter)
print(x_train.shape,y_train.shape,tf.reduce_max(y_train),tf.reduce_min(y_train))
print(x_val.shape,y_val.shape)
print(sample[0].shape)
print(sample[1].shape)

(25000, 80) (25000,) tf.Tensor(1, shape=(), dtype=int64) tf.Tensor(0, shape=(), dtype=int64)
(25000, 80) (25000,)
(128, 80)
(128,)


In [7]:
class MyRnn(keras.Model):
    def __init__(self,units):
        super(MyRnn,self).__init__()
        #[b,64]
        self.state0 = [tf.zeros([batch_size,units]),tf.zeros([batch_size,units])]
        self.state1 = [tf.zeros([batch_size,units]),tf.zeros([batch_size,units])]
        #[b,80]=>[b,80,100]
        self.embedding = layers.Embedding(input_dim=totals_words,output_dim=embedding_len,input_length=max_review_len)
        
        #[b,80]=>[b,units]
        self.rnn_cell0 = layers.LSTMCell(units,dropout=0.5)
        self.rnn_cell1 = layers.LSTMCell(units,dropout=0.5)
        #[b,units]=>[b,1]
        self.fc = layers.Dense(1)
        
    def call(self,inputs,training=None):
        #x = [b,80]
        x = inputs
        #[b,80]=>[b,80,100]
        x = self.embedding(x)
        #[b,80,100]=>[b,100]
        state0 = self.state0
        state1 = self.state1
        #[b,80]=>[b,64]
        #80轮过后，out为[b,64]，收集了前80个语境的所有信息
        for word in tf.unstack(x,axis=1):
            out0,state0 = self.rnn_cell0(word,state0,training)
            out1,state1 = self.rnn_cell1(out0,state1,training)
        #[b,64]=>[b,1]
        out = self.fc(out1)
        #p(y is positive|x)
        pred = tf.sigmoid(out)
        
        return pred

In [8]:
def main():
    units = 64
    model = MyRnn(units)
    model.compile(optimizer=keras.optimizers.Adam(0.0001),loss=tf.losses.BinaryCrossentropy(),metrics=['accuracy'])
    model.fit(db_train,epochs=10,validation_data=db_val)

In [9]:
main()

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
