In [6]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.data import Dataset

In [3]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [None]:
batch_size = 128
train_dataset = Dataset.from_tensor_slices((X_train, y_train)).shuffle(buffer_size=1024).batch(batch_size=batch_size)
test_dataset = Dataset.from_tensor_slices((X_test, y_test)).batch(batch_size=batch_size)

In [None]:
# 定义一个 RNN 模型
seq_len = 28
input_size = 28
hidden_size = 128
output_size = 10

inputs = tf.keras.Input(shape=(seq_len, input_size)) # 在 tf.keras.Input(...) 中的 shape 参数不应包括 batch_size，Keras 会自动推断批次大小。
hidden = tf.keras.layers.SimpleRNN(
	units=hidden_size,                        # 隐藏状态维度
	activation='sigmoid',            # 激活函数（默认是 tanh）
	return_sequences=False,       # 是否返回每个时间步的输出，还是只返回最后一个
	return_state=False,           # 是否返回最后一个隐藏状态
	go_backwards=False,           # 是否反向处理序列
	stateful=False,               # 是否保留状态（跨 batch）
	dropout=0.0,                  # 输入的 dropout
	recurrent_dropout=0.0         # 隐藏状态的 dropout
)(inputs)

outputs = tf.keras.layers.Dense(output_size)(hidden)

model = tf.keras.Model(inputs, outputs)

model.compile(optimizer='Adam', 
			  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
			  metrics=['accuracy']
			  )
model.summary()

model.fit(X_train, y_train, 
		  validation_split=0.2, 
		  verbose=1, 
		  epochs=10, 
		  batch_size=128
		  )

Epoch 1/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 13ms/step - accuracy: 0.2451 - loss: 2.0298 - val_accuracy: 0.3708 - val_loss: 1.6530
Epoch 2/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step - accuracy: 0.3896 - loss: 1.6512 - val_accuracy: 0.4653 - val_loss: 1.3938
Epoch 3/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step - accuracy: 0.4657 - loss: 1.3912 - val_accuracy: 0.4788 - val_loss: 1.3192
Epoch 4/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - accuracy: 0.4873 - loss: 1.3332 - val_accuracy: 0.4818 - val_loss: 1.2860
Epoch 5/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step - accuracy: 0.4984 - loss: 1.3009 - val_accuracy: 0.5002 - val_loss: 1.2649
Epoch 6/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step - accuracy: 0.5084 - loss: 1.2753 - val_accuracy: 0.5215 - val_loss: 1.2153
Epoch 7/10
[1m375/375

<keras.src.callbacks.history.History at 0x11923d720>

In [None]:
# 定义一个 RNN 模型
seq_len = 28
input_size = 28
hidden_size = 128
output_size = 10

model = tf.keras.Sequential([
	tf.keras.Input(shape=(seq_len, input_size)),
	tf.keras.layers.SimpleRNN(
		units=hidden_size,
		activation='sigmoid',
		return_state=False,
		return_sequences=False
	),
	tf.keras.layers.Dense(output_size)
])

model.compile(optimizer='adam', 
			  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
			  metrics=['accuracy']
			  )
model.summary()


model.fit(X_train, y_train, 
		  validation_data=(X_test, y_test), 
		  verbose=1, 
		  epochs=10, 
		  batch_size=128
		  )

Epoch 1/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.2510 - loss: 2.0123 - val_accuracy: 0.3969 - val_loss: 1.6759
Epoch 2/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.4196 - loss: 1.5927 - val_accuracy: 0.4685 - val_loss: 1.3833
Epoch 3/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.4797 - loss: 1.3571 - val_accuracy: 0.4962 - val_loss: 1.3363
Epoch 4/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.4916 - loss: 1.3218 - val_accuracy: 0.5056 - val_loss: 1.2824
Epoch 5/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.5110 - loss: 1.2664 - val_accuracy: 0.5199 - val_loss: 1.2548
Epoch 6/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.5201 - loss: 1.2411 - val_accuracy: 0.5251 - val_loss: 1.2330
Epoch 7/10
[1m469/469[0m 

<keras.src.callbacks.history.History at 0x3104b8520>

In [None]:
class SimpleRNN(tf.keras.Model):
	def __init__(self, hidden_size, output_size):
		super().__init__() # 容易漏掉继承父类的初始化！
		# self.input = tf.keras.Input(shape=(seq_len, input_size)) # 不能在继承类中定义 input 层
		self.rnn = tf.keras.layers.SimpleRNN(
			units=hidden_size,
			activation='sigmoid',
			return_state=False,
			return_sequences=False,
			dropout=0,
			recurrent_dropout=0
		)
		self.dense = tf.keras.layers.Dense(output_size)

	def call(self, x):
			outputs = self.rnn(x)
			outputs = self.dense(outputs)
			return outputs

model = SimpleRNN(hidden_size=hidden_size, output_size=output_size)
model.compile(optimizer='adam',
			  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
			  metrics=['accuracy']
			  )
model.summary()

model.fit(X_train, y_train, 
		  validation_data=(X_test, y_test), 
		  verbose=1, 
		  epochs=10, 
		  batch_size=128
		  )

Epoch 1/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 13ms/step - accuracy: 0.2557 - loss: 1.9863 - val_accuracy: 0.3838 - val_loss: 1.6714
Epoch 2/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 12ms/step - accuracy: 0.4235 - loss: 1.5708 - val_accuracy: 0.4703 - val_loss: 1.3875
Epoch 3/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 11ms/step - accuracy: 0.4736 - loss: 1.3509 - val_accuracy: 0.4929 - val_loss: 1.3201
Epoch 4/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 10ms/step - accuracy: 0.4955 - loss: 1.3033 - val_accuracy: 0.5087 - val_loss: 1.2875
Epoch 5/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 14ms/step - accuracy: 0.5121 - loss: 1.2518 - val_accuracy: 0.5202 - val_loss: 1.2334
Epoch 6/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 15ms/step - accuracy: 0.5230 - loss: 1.2209 - val_accuracy: 0.5258 - val_loss: 1.2226
Epoch 7/10
[1m469/469

<keras.src.callbacks.history.History at 0x3080ed7e0>

In [None]:
class MyLayer(tf.keras.Layer):
	def __init__(self, output_dim):
		super().__init__()
		self.output_dim = output_dim
	
	def build(self, input_dim):
			self.weight = self.add_weight(
			shape=(input_dim[-1], self.output_dim),
			initializer='glorot_normal',
			trainable=True
			)
			self.bias = self.add_weight(
				shape=(self.output_dim, ),
				initializer='zeros',
				trainable=True
			)
		
	def call(self, x):
		return tf.matmul(x, self.weight) + self.bias
	
# 示例
layer = MyLayer(output_dim=1)
x = tf.random.normal((4, 10), 0, 1)
layer.build((4, 10))
layer(x)

<tf.Tensor: shape=(4, 1), dtype=float32, numpy=
array([[ 1.1668855 ],
       [ 0.16951352],
       [-0.5488201 ],
       [-0.40721592]], dtype=float32)>

In [194]:
class MyRNN(tf.keras.Layer):
	def __init__(self, hidden_size, output_size):
		super().__init__()
		self.hidden_size = hidden_size
		self.output_size = output_size
	
	def build(self, input_shape):
		self.Wxh = self.add_weight(
			shape=(input_shape[-1], self.hidden_size),
			initializer='glorot_normal',
			trainable=True
		)

		self.Whh = self.add_weight(
			shape=(self.hidden_size, self.hidden_size),
			initializer='glorot_normal',
			trainable=True
		)

		self.Why = self.add_weight(
			shape=(self.hidden_size, self.output_size),
			initializer='glorot_normal',
			trainable=True
		)

		self.bxh = self.add_weight(
			shape=(self.hidden_size, ),
			initializer='zeros',
			trainable=True
		)

		self.bhy = self.add_weight(
			shape=(self.output_size, ),
			initializer='zeros',
			trainable=True
		)
		
	# @tf.function # 关键一步
	# def call(self, X):
	# 	batch_size = tf.shape(X)[0]
	# 	seq_len = tf.shape(X)[1]
	# 	h = tf.zeros((batch_size, self.hidden_size)) # 注意隐藏层维度
	# 	for t in tf.range(seq_len):
	# 		xt = X[:, t, :]
	# 		h = tf.sigmoid(tf.matmul(xt, self.Wxh) + tf.matmul(h, self.Whh) + self.bxh)
		
	# 	outputs = tf.matmul(h,self.Why) + self.bhy
		
	# 	return outputs

	@tf.function # 关键一步
	def call(self, X):
		batch_size = tf.shape(X)[0]
		seq_len = tf.shape(X)[1]
		h = tf.zeros((batch_size, self.hidden_size)) # 注意隐藏层维度
		for t in tf.range(seq_len):
			xt = X[:, t, :]
			h = tf.sigmoid(tf.matmul(xt, self.Wxh) + tf.matmul(h, self.Whh) + self.bxh)
		
		outputs = tf.matmul(h,self.Why) + self.bhy
		
		return outputs
	# def call(self, X):
	# 	batch_size = tf.shape(X)[0]
	# 	seq_len = tf.shape(X)[1]
	# 	h = tf.zeros((batch_size, self.hidden_size))
		
	# 	# 定义循环体函数
	# 	def body(t, h):
	# 		xt = X[:, t, :]
	# 		h_new = tf.sigmoid(tf.matmul(xt, self.Wxh) + tf.matmul(h, self.Whh) + self.bxh)
	# 		return t + 1, h_new
		
	# 	# 定义条件函数
	# 	def cond(t, h):
	# 		return t < seq_len
		
	# 	# 执行 while_loop
	# 	# 返回值为 (final_t, final_h)，我们只需要 final_h
	# 	_, final_h = tf.while_loop(
	# 		cond=cond,
	# 		body=body,
	# 		loop_vars=[tf.constant(0), h],
	# 		parallel_iterations=1  # 按顺序处理时间步
	# 	)
		
	# 	outputs = tf.matmul(final_h, self.Why) + self.bhy
	# 	return outputs
# 示例
x = tf.random.normal((2, 28, 28), 0, 1)
myrnn = MyRNN(hidden_size=128, output_size=10)
myrnn(x)

<tf.Tensor: shape=(2, 10), dtype=float32, numpy=
array([[-0.33602533,  0.8466051 , -0.34166494, -0.41397402, -0.389801  ,
        -0.12300692,  0.66592544, -0.48977906,  1.0315523 ,  1.455004  ],
       [-0.5241675 ,  0.5626367 , -0.6679411 , -0.28649136, -0.3787795 ,
         0.81874186,  1.1080695 , -0.49634784,  0.5360158 ,  2.151848  ]],
      dtype=float32)>

In [195]:
inputs = tf.keras.Input(shape=(seq_len, input_size))
outputs = MyRNN(hidden_size=128, output_size=10)(inputs) 

model = tf.keras.Model(inputs, outputs)
model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
model.fit(X_train, y_train, epochs=10, batch_size=128, validation_data=(X_test, y_test), verbose=1)

Epoch 1/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 10ms/step - accuracy: 0.2477 - loss: 2.0297 - val_accuracy: 0.3646 - val_loss: 1.6723
Epoch 2/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - accuracy: 0.4228 - loss: 1.5753 - val_accuracy: 0.4649 - val_loss: 1.3820
Epoch 3/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - accuracy: 0.4871 - loss: 1.3397 - val_accuracy: 0.4990 - val_loss: 1.3030
Epoch 4/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - accuracy: 0.5121 - loss: 1.2743 - val_accuracy: 0.5109 - val_loss: 1.2511
Epoch 5/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - accuracy: 0.5240 - loss: 1.2277 - val_accuracy: 0.5199 - val_loss: 1.2371
Epoch 6/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - accuracy: 0.5327 - loss: 1.2087 - val_accuracy: 0.5333 - val_loss: 1.2106
Epoch 7/10
[1m469/469[0m 

<keras.src.callbacks.history.History at 0x11c3a9db0>