In [None]:
import os
import numpy as np

# 设置随机种子
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras import layers
from keras.optimizers import RMSprop

data_dir = "/home/dingziming/下载/jena_climate"
fname = os.path.join(data_dir, "jena_climate_2009_2016.csv")

f = open(fname)
data = f.read()
f.close()

lines = data.split("\n")
header = lines[0].split(",")
lines = lines[1:]

print(header)
print(len(lines))
# 解析数据 将数据转化为浮点数
import numpy as np

float_data = np.zeros((len(lines), len(header) - 1))
for i, line in enumerate(lines):
    values = [float(x) for x in line.split(",")[1:]]
    float_data[i, :] = values

# 绘制温度时间序列
import matplotlib.pyplot as plt

temp = float_data[:, 1]
plt.plot(range(len(temp)), temp)
plt.show()
plt.plot(range(1440), temp[:1440])
plt.show()
# 数据标准化
mean = float_data[:20000].mean(axis=0)
float_data -= mean
std = float_data[:20000].std(axis=0)
float_data /= std


# 生成时间序列样本及其目标的生成器
def generator(data, lookback, delay, min_index, max_index, shuffle=False, batch_size=128, step=6):
    """
    数据生成器函数，用于生成时间序列数据的样本和目标
    参数:
    data: 输入数据，通常是多维数组
    lookback: 输入数据的时间步长，即回看多长时间的历史数据
    delay: 预测目标与当前数据点之间的时间间隔
    min_index: 数据的最小索引
    max_index: 数据的最大索引，如果为None则自动计算
    shuffle: 是否打乱数据顺序，默认为False
    batch_size: 每个批次的样本数量，默认为128
    step: 采样步长，默认为6，即每隔6个时间步采样一次
    返回:
    生成器，产生(samples, targets)元组，其中samples是输入数据，targets是对应的目标值
    """
    if max_index is None:
        # 如果没有提供最大索引，则自动设置为数据长度减去延迟和1 减去1是数组以0开头
        max_index = len(data) - delay - 1
    # 初始化索引，从最小索引加上回看时间步长开始
    i = min_index + lookback
    while 1:
        # 如果shuffle为True，随机选择行索引 也就是随机采样
        if shuffle:
            # 从min_index+lookback到max_index之间随机选择batch_size个行索引
            rows = np.random.randint(min_index + lookback, max_index, size=batch_size)

        else:
            # 如果shuffle为False，则按顺序选择行索引
            # 如果当前索引加上批次大小超过最大索引，则重置为起始位置
            if i + batch_size >= max_index:
                i = min_index + lookback
            # 创建从i到i+batch_size的连续索引
            rows = np.arange(i, i + batch_size)
            # 更新i的值，为下一次循环做准备
            i += len(rows)

        # 创建样本数组，形状为(batch_size, lookback//6, data.shape[-1])
        samples = np.zeros((batch_size, lookback // 6, data.shape[-1]))

        # 创建目标数组，形状为(len(rows),)
        targets = np.zeros((len(rows),))
        # 遍历每行数据，填充样本和目标
        for j, row in enumerate(rows):
            # 计算历史数据的索引范围，从row-lookback到row，步长为step
            indices = range(rows[j] - lookback, rows[j], step)
            # 填充样本数据
            samples[j] = data[indices]
            # 填充目标数据，取row+delay位置的第二个元素
            targets[j] = data[rows[j] + delay][1]
        # 生成样本和目标
        yield samples, targets


## 创建三个生成器
lookback = 1440
delay = 144
step = 6
batch_size = 128

# 创建训练数据生成器
train_gen = generator(float_data, lookback=lookback, delay=delay, min_index=0, max_index=200000, shuffle=True,
                      step=step, batch_size=batch_size)

val_gen = generator(float_data, lookback=lookback, delay=delay, min_index=200001, max_index=300000, step=step,
                    batch_size=batch_size)

test_gen = generator(float_data, lookback=lookback, delay=delay, min_index=300001, max_index=None, step=step,
                     batch_size=batch_size)

# 为了查看整个验证集 需要从val_gen中抽取多少次
val_steps = (300000 - 200001 - lookback) // batch_size

test_steps = (len(float_data) - 300001 - lookback) // batch_size

model = Sequential()
model.add(layers.Conv1D(32, 5, activation= "relu", input_shape=(None, float_data.shape[-1])))
model.add(layers.MaxPooling1D(3))
model.add(layers.Conv1D(32, 5, activation= "relu"))
model.add(layers.MaxPolling1D(3))
model.add(layers.Conv1D(32, 5, activation= "relu"))
model.add(layers.GlobalMaxPooling1D())
model.add(layers.Dense(1))
model.compile(optimizer= RMSprop(), loss= "mae", metrics="mae")

history = model.fit(
    train_gen,
    steps_per_epoch= 500,
    epochs= 20,
    validation_data= val_gen,
    validation_steps= val_steps
)

loss = history.history["loss"]
val_loss = history.history["val_loss"]
acc = history.history["acc"]
val_acc = history.history["val_acc"]
epochs = range(1, len(loss) + 1)
plt.plot(epochs, loss, "bo", label="Training loss")
plt.plot(epochs, val_loss, "b", label= "Validation loss")
plt.title("Training and validation loss")
plt.figure()
plt.plot(epochs, acc, "bo", label= "Training acc")
plt.plot(epochs, val_acc, "b", label= "Validation acc")
plt.title("Training and validation accuracy")
plt.legend()
plt.show()