In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

# 加载ET语料库
ETm1_Data = pd.read_csv("./ETDataset/ETT-small/ETTh1.csv")
ETm1_Data = ETm1_Data.drop(["date"], axis=1)  # 删除原始日期列


# 使用 scikit-learn 的 MinMaxScaler 进行归一化
scaler = MinMaxScaler()
ETm1_Data_normalized = pd.DataFrame(
    scaler.fit_transform(ETm1_Data),
    columns=ETm1_Data.columns[:],
)

# # 下面是带时间戳的代码，如使用，请将transformer-input_size变为8
# import pandas as pd
# import numpy as np
# from sklearn.preprocessing import MinMaxScaler

# # 加载ET语料库
# ETm1_Data = pd.read_csv("./ETDataset/ETT-small/ETTh1.csv")


# # 转换日期为时间戳
# def transform_date_to_timestamp(df):
#     df["date"] = pd.to_datetime(df["date"])
#     df["timestamp"] = df["date"].apply(lambda x: x.timestamp())
#     df = df.drop(["date"], axis=1)  # 删除原始日期列
#     return df


# ETm1_Data = transform_date_to_timestamp(ETm1_Data)

# # 使用 scikit-learn 的 MinMaxScaler 进行归一化(时间戳没有归一化)
# scaler = MinMaxScaler()
# ETm1_Data_normalized = pd.DataFrame(
#     scaler.fit_transform(ETm1_Data.drop(["timestamp"], axis=1)),
#     columns=ETm1_Data.columns[:-1],
# )
# ETm1_Data_normalized["timestamp"] = ETm1_Data["timestamp"]

# # # 使用 scikit-learn 的 MinMaxScaler 进行归一化(时间戳归一化)
# # scaler = MinMaxScaler()
# # ETm1_Data_normalized = pd.DataFrame(
# #     scaler.fit_transform(ETm1_Data),
# #     columns=ETm1_Data.columns[:],
# # )

In [None]:
def get_features_labels(Data, Predict_HourLength = 96, Count_PerHour = 1):
    # 将数据分组
    group_size = Predict_HourLength * Count_PerHour * 2
    groups = [
        Data.iloc[i : i + group_size]
        for i in range(0, len(Data) - group_size + 1)
    ]

    # 提取特征值和预测值
    features = [group.iloc[: Predict_HourLength * Count_PerHour] for group in groups]
    labels = [group.iloc[Predict_HourLength * Count_PerHour :] for group in groups]
    return features, labels

In [None]:
# 按比例划分数据集
total_samples = len(ETm1_Data)
train_size = int(0.6 * total_samples)
val_size = int(0.2 * total_samples)

train_data = ETm1_Data[:train_size]
val_data = ETm1_Data[train_size : train_size + val_size]
test_data = ETm1_Data[train_size + val_size :]

train_data_normalized = ETm1_Data_normalized[:train_size]
val_data_normalized = ETm1_Data_normalized[train_size : train_size + val_size]
test_data_normalized = ETm1_Data_normalized[train_size + val_size :]

# 按时间步得到数据集
train_features, train_labels = get_features_labels(train_data)
val_features, val_labels = get_features_labels(val_data)
test_features, test_labels = get_features_labels(test_data)
train_features_normalized, train_labels_normalized = get_features_labels(train_data_normalized)
val_features_normalized, val_labels_normalized = get_features_labels(val_data_normalized)
test_features_normalized, test_labels_normalized = get_features_labels(test_data_normalized)

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader


class MyDataset(Dataset):
    def __init__(self, features, labels, features_normalized, labels_normalized):
        self.features = features
        self.labels = labels
        self.features_normalized = features_normalized
        self.labels_normalized = labels_normalized

    def __len__(self):
        return len(self.features)

    def __getitem__(self, index):
        # 将数据转换为 PyTorch 张量
        feature_tensor = torch.tensor(self.features[index].values, dtype=torch.float32)
        label_tensor = torch.tensor(self.labels[index].values, dtype=torch.float32)
        feature_normalized_tensor = torch.tensor(self.features_normalized[index].values, dtype=torch.float32)
        label_normalized_tensor = torch.tensor(self.labels_normalized[index].values, dtype=torch.float32)

        return feature_tensor, label_tensor, feature_normalized_tensor, label_normalized_tensor


# 创建训练集、验证集和测试集的 Dataset 实例
train_dataset = MyDataset(train_features, train_labels, train_features_normalized, train_labels_normalized)
val_dataset = MyDataset(val_features, val_labels, val_features_normalized, val_labels_normalized)
test_dataset = MyDataset(test_features, test_labels, test_features_normalized, test_labels_normalized)

# 创建 DataLoader 实例
batch_size = 64  # 批量大小
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [None]:
import math
import torch.nn as nn


class PositionalEncoding(nn.Module):
    "Implement the PE function."

    def __init__(self, d_model, dropout, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)
        # 初始化Shape为(max_len, d_model)的PE (positional encoding)
        pe = torch.zeros(max_len, d_model)
        # 初始化一个tensor [[0, 1, 2, 3, ...]]
        position = torch.arange(0, max_len).unsqueeze(1)
        # 这里就是sin和cos括号中的内容，通过e和ln进行了变换
        div_term = torch.exp(
            torch.arange(0, d_model, 2) * -(math.log(10000.0) / d_model)
        )
        # 计算PE(pos, 2i)
        pe[:, 0::2] = torch.sin(position * div_term)
        # 计算PE(pos, 2i+1)
        pe[:, 1::2] = torch.cos(position * div_term)
        # 为了方便计算，在最外面在unsqueeze出一个batch
        pe = pe.unsqueeze(0)
        # 如果一个参数不参与梯度下降，但又希望保存model的时候将其保存下来
        # 这个时候就可以用register_buffer
        self.register_buffer("pe", pe)

    def forward(self, x):
        "x 为embedding后的inputs，例如(1,7, 128)，batch size为1,7个单词，单词维度为128"

        # 将x和positional encoding相加。
        x = x + self.pe[:, : x.size(1)].requires_grad_(False)
        return self.dropout(x)

In [None]:
from torch.nn import Transformer


class TransformerModel(nn.Module):
    def __init__(self, input_size, embed_size, nhead, num_layers):
        super(TransformerModel, self).__init__()
        self.embedding_layer = nn.Linear(input_size, embed_size)
        self.positional_encoding = PositionalEncoding(d_model=embed_size, dropout=0)

        self.transformer = Transformer(
            d_model=embed_size,
            nhead=nhead,
            num_encoder_layers=num_layers,
            num_decoder_layers=num_layers,
            batch_first=True,
        )

        self.predictor = nn.Linear(embed_size, input_size)

    def forward(self, src, tgt):
        src = self.embedding_layer(src)
        src = self.positional_encoding(src)
        tgt = self.embedding_layer(tgt)
        tgt = self.positional_encoding(tgt)

        tgt_mask = nn.Transformer.generate_square_subsequent_mask(tgt.size(1)).to(
            src.device
        )
        output = self.transformer(src, tgt, tgt_mask=tgt_mask)

        return output

In [None]:
input_size = 7  # 输入特征的数量
embed_size = 128
nhead = 4
num_layers = 3
learning_rate = 0.001
num_epochs = 200
device = torch.device("cuda:4" if torch.cuda.is_available() else "cpu")

model = TransformerModel(input_size, embed_size, nhead, num_layers)
model = model.to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
from torch.optim.lr_scheduler import StepLR
from tqdm import tqdm


scheduler = StepLR(optimizer, step_size=50, gamma=0.5)

for epoch in range(num_epochs):
    total_loss = 0
    total_val_loss = 0

    with tqdm(total=len(train_loader), desc=f"Epoch {epoch+1}/{num_epochs}", unit="it") as pbar:
        # 训练
        model.train()
        for _, targets, inputs, _ in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            # 将右移后的目标序列传递给 Transformer
            targets_input = torch.cat([torch.zeros_like(targets[:, :1]), targets[:, :-1]], dim=1)
            optimizer.zero_grad()
            outputs = model(inputs, targets_input)
            outputs = model.predictor(outputs)
            
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            
            pbar.set_postfix({"loss (batch)": loss.item()})
            pbar.update(1)  # 更当前进度，1表示完成了一个batch的训练

        scheduler.step()

    # with tqdm(total=len(train_loader), desc=f"Epoch {epoch+1}/{num_epochs}", unit="it") as pbar:
    #     # 训练
    #     model.train()
    #     for _, targets, inputs, _ in train_loader:
    #         inputs, targets = inputs.to(device), targets.to(device)
    #         predict_result = torch.zeros((targets.size(0), 1, targets.size(2))).to(device)
    #         optimizer.zero_grad()
    #         for i in range(targets.size(1)):
    #             outputs = model(inputs, predict_result)
    #             outputs = model.predictor(outputs)
    #             predict_result = torch.cat([predict_result, targets[:, i].unsqueeze(1)], dim=1)
            
    #         loss = criterion(outputs, targets)
    #         loss.backward()
    #         optimizer.step()
    #         total_loss += loss.item()
            
    #         pbar.set_postfix({"loss (batch)": loss.item()})
    #         pbar.update(1)  # 更当前进度，1表示完成了一个batch的训练

    #     scheduler.step()

    if epoch%10 == 0:
        with tqdm(total=len(val_loader), desc=f"Epoch {epoch+1}/{num_epochs}", unit="it") as pbar:
            # 验证
            model.eval()
            with torch.no_grad():
                for _, val_targets, val_inputs, _ in val_loader:
                    val_inputs, val_targets = val_inputs.to(device), val_targets.to(device)
                    predict_result = torch.zeros((val_targets.size(0), 1, val_targets.size(2))).to(device)
                    for i in range(val_targets.size(1)):
                        val_outputs = model(val_inputs, predict_result)
                        val_outputs = model.predictor(val_outputs)
                        predict_result = torch.cat([predict_result, val_outputs[:, -1].unsqueeze(1)], dim=1)

                    val_loss = criterion(predict_result[:, 1:], val_targets)
                    total_val_loss += val_loss.item()

                    pbar.set_postfix({"val_loss (batch)": val_loss.item()})
                    pbar.update(1)  # 更当前进度，1表示完成了一个batch的验证

    average_train_loss = total_loss / len(train_loader.dataset)
    if epoch%10 == 0:
        average_val_loss = total_val_loss / len(val_loader.dataset)

    print(f"Epoch {epoch + 1}, average_train_loss: {average_train_loss}")
    if epoch%10 == 0:
        print(f"Epoch {epoch + 1}, average_train_loss: {average_train_loss}, average_val_loss: {average_val_loss}")

    # 保存模型参数
    model_state_dict_path = "./model/transformer_{}.pth".format(epoch + 1)
    torch.save(model.state_dict(), model_state_dict_path)

    print("Saved model state dict:", model_state_dict_path)

#### 测试

In [None]:
import torch.nn.functional as F

# 加载模型参数
model_path = "./model/transformer_91.pth"
model.load_state_dict(torch.load(model_path)) 
# model = model.to("cpu")

total_MAE = 0.0
total_MSE = 0.0

model.eval()
with torch.no_grad():
    for _, test_targets, test_inputs, _ in test_loader:
        test_inputs, test_targets = test_inputs.to(device), test_targets.to(device)
        predict_result = torch.zeros((test_targets.size(0), 1, test_targets.size(2))).to(device)
        for i in range(test_targets.size(1)):
            test_outputs = model(test_inputs, predict_result)
            test_outputs = model.predictor(test_outputs)
            predict_result = torch.cat([predict_result, test_outputs[:, -1].unsqueeze(1)], dim=1)
        predict_result = predict_result[:, 1:]
        
        # predict_result = torch.zeros_like(test_targets)
        # predict_result = model(test_inputs, predict_result)
        # predict_result = model.predictor(predict_result)

        # predict_result = model(test_inputs, test_inputs)
        # predict_result = model.predictor(predict_result)

        # predict_result = model(test_inputs, test_targets)
        # predict_result = model.predictor(predict_result)

        # 计算 MAE
        mae_loss = F.l1_loss(predict_result, test_targets, reduction='mean')
        total_MAE += mae_loss.item()

        # 计算 MSE
        mse_loss = F.mse_loss(predict_result, test_targets, reduction='mean')
        total_MSE += mse_loss.item()

    # 计算平均 MAE 和 MSE
    average_MAE = total_MAE / len(test_loader.dataset)
    average_MSE = total_MSE / len(test_loader.dataset)
    print(f"Average MAE: {average_MAE}")
    print(f"Average MSE: {average_MSE}")

In [None]:
import matplotlib.pyplot as plt

# 加载模型参数
model_path = "./model/transformer_91.pth"
model.load_state_dict(torch.load(model_path)) 
# model = model.to("cpu")

model.eval()
with torch.no_grad():
    for _, test_targets, test_inputs, _ in train_loader:
        test_inputs, test_targets = test_inputs.to(device), test_targets.to(device)
        predict_result = torch.zeros((test_targets.size(0), 1, test_targets.size(2))).to(device)
        for i in range(test_targets.size(1)):
            test_outputs = model(test_inputs, predict_result)
            test_outputs = model.predictor(test_outputs)
            predict_result = torch.cat([predict_result, test_outputs[:, -1].unsqueeze(1)], dim=1)
        predict_result = predict_result[:, 1:]
        
        # predict_result = torch.zeros_like(test_targets)
        # predict_result = model(test_inputs, predict_result)
        # predict_result = model.predictor(predict_result)

        # predict_result = model(test_inputs, test_inputs)
        # predict_result = model.predictor(predict_result)

        # predict_result = model(test_inputs, test_targets)
        # predict_result = model.predictor(predict_result)

        test_targets = test_targets.cpu()
        predict_result = predict_result.cpu()

        for i in range(test_targets.size(0)):
            # 绘制真实数据
            plt.plot(
                range(test_targets.size(1)),
                test_targets[i, :, -1],
                label="True Data",
                color="blue",
            )

            # 绘制预测数据
            plt.plot(
                range(test_targets.size(1)),
                predict_result[i, :, -1],
                label="Predicted Data",
                color="red",
            )

            # 添加标签和标题
            plt.xlabel("Time Steps")
            plt.ylabel("Values")
            plt.title("Comparison of Predicted and True Data")

            # 添加图例
            plt.legend()

            # 显示图
            plt.show()
        break