# 15.3预测时间序列

In [1]:
import keras.losses
import numpy as np

# 设置随机种子
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
from tensorflow.python.keras.losses import mean_squared_error


def generate_time_series(batch_size, n_steps):
    """
    生成时间序列数据的函数
    参数:
    batch_size (int): 每次生成的序列数量
    n_steps (int): 每个序列的时间步长
    返回:
    numpy.ndarray: 形状为(batch_size, n_steps, 1)的时间序列数据
    """
    # 生成4个随机数数组，分别用于控制两个正弦波的频率和相位偏移
    freq1, freq2, offsets1, offsets2 = np.random.rand(4, batch_size, 1)
    # 创建时间序列，从0到1均匀分布n_steps个点
    time = np.linspace(0, 1, n_steps)
    # 生成第一个正弦波，振幅为0.5
    # 通过随机频率(freq1*10+10)和相位偏移(offsets1)控制波形
    series = 0.5 * np.sin((time - offsets1) * (freq1 * 10 + 10))  # wave 1
    # 添加第二个正弦波，振幅为0.2
    # 通过不同的随机频率(freq2*20+20)和相位偏移(offsets2)控制波形
    series += 0.2 * np.sin((time - offsets2) * (freq2 * 20 + 20))  # wave 2
    # 添加随机噪声，振幅为0.1
    # 使用均匀分布生成随机数并减去0.5使其均值为0
    series += 0.1 * (np.random.rand(batch_size, n_steps) - 0.5)  # noise
    # 调整数组形状并转换为float32类型
    return series[..., np.newaxis].astype(np.float32)


2025-10-31 15:41:50.308505: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-10-31 15:41:50.315061: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-10-31 15:41:50.322988: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-10-31 15:41:50.325348: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-10-31 15:41:50.331555: I tensorflow/core/platform/cpu_feature_guar

In [2]:
# 创建训练集 验证集 测试集
n_steps = 50
series = generate_time_series(10000, n_steps + 1)
X_train,y_train = series[:7000, :n_steps], series[:7000, -1]
X_valid, y_valid = series[7000:9000, :n_steps], series[7000:9000, -1]
X_test, y_test = series[9000:, :n_steps], series[9000:, -1]

## 15.3.1 基准指标

In [3]:
from tensorflow import keras

model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[50, 1]),
    keras.layers.Dense(1)
])
model.compile(loss='mse', optimizer='adam')
history = model.fit(X_train, y_train, epochs=20, validation_data=(X_valid, y_valid))
model.evaluate(X_test, y_test)

  super().__init__(**kwargs)
I0000 00:00:1761896511.545891   54330 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1761896511.571627   54330 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1761896511.572598   54330 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1761896511.574736   54330 cuda_executor.cc:1015] successful

Epoch 1/20
[1m 75/219[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m0s[0m 683us/step - loss: 0.4023  

I0000 00:00:1761896512.274792   54450 service.cc:146] XLA service 0x7f6cdc016bb0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1761896512.274822   54450 service.cc:154]   StreamExecutor device (0): NVIDIA GeForce RTX 4080 Laptop GPU, Compute Capability 8.9
2025-10-31 15:41:52.282333: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-10-31 15:41:52.298347: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 91301
I0000 00:00:1761896512.399349   54450 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.1554 - val_loss: 0.0562
Epoch 2/20
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 917us/step - loss: 0.0409 - val_loss: 0.0306
Epoch 3/20
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 944us/step - loss: 0.0249 - val_loss: 0.0210
Epoch 4/20
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 971us/step - loss: 0.0179 - val_loss: 0.0163
Epoch 5/20
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0142 - val_loss: 0.0133
Epoch 6/20
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 999us/step - loss: 0.0119 - val_loss: 0.0115
Epoch 7/20
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 901us/step - loss: 0.0103 - val_loss: 0.0101
Epoch 8/20
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 904us/step - loss: 0.0091 - val_loss: 0.0089
Epoch 9/20
[1m219/219[0m [32m━━━━━━━

0.004255720414221287

## 15.3.2 使用RNN

In [4]:
from tensorflow import keras

# 使用RNN
# 目标：预测序列的下一个时间步（单值），即 Sequence-to-One 任务

model = keras.models.Sequential([
    # 1. SimpleRNN：
    #    - 移除 return_sequences=True：让 RNN 只输出最后一个时间步的隐藏状态。
    #    - 此时输出形状变为 (None, 50)
    keras.layers.SimpleRNN(50, input_shape=[n_steps, 1]),

    # 2. Dense 层：
    #    - 添加一个 Dense(1) 层，将 50 个神经元压缩为最终的 1 个预测值。
    #    - 最终模型输出形状变为 (None, 1)，与 y_train 的形状 (None, 1) 兼容。
    keras.layers.Dense(1)
])

model.compile(loss='mse', optimizer='adam')
# 现在可以正确运行了
history = model.fit(X_train, y_train, epochs=20, validation_data=(X_valid, y_valid))
model.evaluate(X_test, y_test)

Epoch 1/20


  super().__init__(**kwargs)


[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - loss: 0.0163 - val_loss: 0.0058
Epoch 2/20
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.0046 - val_loss: 0.0039
Epoch 3/20
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.0035 - val_loss: 0.0037
Epoch 4/20
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.0032 - val_loss: 0.0030
Epoch 5/20
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.0029 - val_loss: 0.0028
Epoch 6/20
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.0029 - val_loss: 0.0027
Epoch 7/20
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.0028 - val_loss: 0.0027
Epoch 8/20
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.0028 - val_loss: 0.0029
Epoch 9/20
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━

0.0030385295394808054

## 15.3.3 深度RNN

In [5]:
model = keras.models.Sequential([
    keras.layers.SimpleRNN(50, return_sequences=True, input_shape=[None, 1]),
    keras.layers.SimpleRNN(50, return_sequences=True),
    keras.layers.SimpleRNN(20),
    keras.layers.Dense(1)
])

## 15.3.4 预测未来几个时间步长

In [9]:
import numpy as np
from tensorflow import keras
from sklearn.metrics import mean_squared_error

# 假设 n_steps 和 generate_time_series 函数已在环境中定义

# --- 辅助函数 (需自行确保定义，这里只是一个占位符示例) ---
def generate_time_series(batch_size, n_steps):
    # n_steps 是总序列长度 (n_steps + 10)

    # 1. 修正 offsets/freqs 的形状，确保它们在 time 维度上是 1
    # 原始代码中的 offsets/freqs 已经确保是 (batch_size, 1, 1) 的形状：
    # freq1, freq2, offsets1, offsets2 = np.random.rand(4, batch_size, 1)
    # (4, 1, 1) 的形状，但用于计算时会沿着 axis=1 (n_steps) 广播。

    # 2. 修正 time 的形状：从 (n_steps,) 变为 (1, n_steps, 1)
    # 这样 time 就能与 (batch_size, 1, 1) 的 offsets 正确广播为 (batch_size, n_steps, 1)
    freq1, freq2, offsets1, offsets2 = np.random.rand(4, batch_size, 1)

    # 关键修正：使用 [np.newaxis, :, np.newaxis] 将形状变为 (1, n_steps, 1)
    time = np.linspace(0, 1, n_steps)
    time = time[np.newaxis, :, np.newaxis]

    # 计算信号 1
    series = 0.5 * np.sin((time - offsets1) * (freq1 * 10 + 10))

    # 计算信号 2
    series += 0.2 * np.sin((time - offsets2) * (freq2 * 20 + 20))

    # 添加噪声 (噪声形状 (batch_size, n_steps, 1))
    series += 0.1 * (np.random.rand(batch_size, n_steps, 1) - 0.5)

    return series.astype(np.float32)

# ... (保持主代码不变，因为修正后的 generate_time_series 会输出正确的形状)

n_steps = 20 # 假设时间步长为 20

# --------------------------------------------------------------------------
# 模型定义
# --------------------------------------------------------------------------
model = keras.models.Sequential([
    keras.layers.SimpleRNN(50, return_sequences=True, input_shape=[None, 1]),
    keras.layers.SimpleRNN(50, return_sequences=True),
    keras.layers.SimpleRNN(20),
    keras.layers.Dense(1)
])

# 假设模型已经编译和训练，这里省略编译和训练步骤

# --------------------------------------------------------------------------
# 15.3.4 预测未来几个时间步长 (修正部分)
# --------------------------------------------------------------------------

# 1. 准备数据：序列总长 n_steps (输入) + 10 (预测目标)
steps_to_predict = 10
series = generate_time_series(1, n_steps + steps_to_predict)

# 2. 定义输入 X_new 和 真实目标值 y_new
X_new = series[:, :n_steps]     # 输入序列 (n_steps 步)
# 修正: y_new 必须是原始序列中 n_steps 之后的 10 个真实值，形状为 (1, 10, 1)
y_new = series[:, n_steps:]     # 真实目标序列 (10 步)

X = X_new
print(f"初始 X_new 形状: {X.shape}")

# 3. 多步预测循环 (自回归预测)
for step_ahead in range(steps_to_predict):
    # 使用当前序列 X 的 'step_ahead' 之后的部分进行预测
    y_pred_one = model.predict(X[:, step_ahead:])[:, -1]

    # 将预测值 (形状 (1, 1)) 重塑为 (1, 1, 1) 并拼接到 X 的时间轴 (axis=1) 上
    X = np.concatenate([X, y_pred_one.reshape(-1, 1, 1)], axis=1)

# 4. 提取最终的预测序列
# 提取 X 中 n_steps 之后的部分，即 10 步预测结果
Y_pred = X[:, n_steps:]
print(f"最终 Y_pred 形状: {Y_pred.shape}")
print(f"最终 y_new 形状: {y_new.shape}")

# 5. 修正：将 3 维数组展平为 1 维 (或 2 维)，以满足 sklearn 的要求
Y_pred_flat = Y_pred.ravel()
y_new_flat = y_new.ravel()

print(f"展平后 Y_pred 形状: {Y_pred_flat.shape}")
print(f"展平后 y_new 形状: {y_new_flat.shape}")

# 6. 计算均方误差
print("\n--- 评估结果 ---")
print(f"预测序列的 MSE: {mean_squared_error(y_new_flat, Y_pred_flat)}")

初始 X_new 形状: (1, 20, 1)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 222ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
最终 Y_pred 形状: (1, 10, 1)
最终 y_new 形状: (1, 10, 1)
展平后 Y_pred 形状: (10,)
展平后 y_new 形状: (10,)

--- 评估结果 ---
预测序列的 MSE: 0.09033016115427017


## 15.3.5 序列到序列的模型

In [10]:
model = keras.models.Sequential([
    keras.layers.SimpleRNN(20, return_sequences=True, input_shape=[None, 1]),
    keras.layers.SimpleRNN(20, return_sequences=True),
    keras.layers.TimeDistributed(keras.layers.Dense(10))
])

  super().__init__(**kwargs)


In [11]:
# 利用最后一个层进行评估
def last_time_step_mse(Y_true, Y_pred):
    return keras.metrics.mean_squared_error(Y_true[:, -1], Y_pred[:, -1])