In [78]:
import pandas as pd
import seaborn as sns
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt

from torch_geometric.data import Data
import warnings
warnings.filterwarnings("ignore")
import torch


In [79]:
import torch
print("CUDA Available:", torch.cuda.is_available())
x = torch.tensor([1.0, 2.0, 3.0], device='cuda')
print(x)

CUDA Available: True
tensor([1., 2., 3.], device='cuda:0')


In [4]:
data=pd.read_csv("data_consolidation_standardization.csv")
data=data.drop(labels="Unnamed: 0", axis=1)
# data_test=data.loc[data['CellID']<=100]

In [6]:
data.sort_values(['CellID','datetime'], inplace=True)
data_test=data

# 重构数据

In [7]:
# 初始化一个数组来存储重构后的数据
num_nodes = 100
num_timesteps = 168
num_features = 3

X为需要用到的数据

In [8]:
# 创建一个空的 numpy 数组，用于存储每个节点的特征，这里是提取和时序有关的数据，为训练集和测试集构建做准备
X = np.zeros((num_nodes, num_timesteps, num_features))

# 遍历每个基站，填充数据
for i in range(num_nodes):
    # 提取当前基站的数据
    node_data = data_test[data_test['CellID'] == i+1]  # 假设 CellID 从 1 开始
    # 确保数据的长度正确
    if len(node_data) == num_timesteps:
        X[i, :, 0] = node_data['internet'].values
        X[i, :, 1] = node_data['calls'].values
        X[i, :, 2] = node_data['sms'].values
    else:
        print(f"Warning: Node {i+1} has incorrect number of timesteps: {len(node_data)}")


In [94]:
import numpy as np

# 参数设置
num_nodes = 100  # 节点数量
num_timesteps = 168  # 每个节点的时间点数量
window_size = 6  # 滑动窗口大小
predict_steps = 6  # 预测未来的时间步数

# 计算可以利用的时间序列长度（减去窗口大小和预测的步数）
usable_timesteps = num_timesteps - window_size - predict_steps + 1

# 初始化训练数据和标签数组
X_train = np.zeros((usable_timesteps, num_nodes, 3, window_size))  # 特征窗口数据
Y_train = np.zeros((usable_timesteps, num_nodes, predict_steps))  # 预测目标数据，现在每个节点有3个预测点

# 数据提取
for t in range(usable_timesteps):
    # 提取每个节点的特征窗口
    X_train[t] = np.transpose(X[:, t:t+window_size, :], (0, 2, 1))
    # 提取每个节点对应窗口后未来3个时间点的internet特征作为标签
    Y_train[t] = X[:, t+window_size:t+window_size+predict_steps, 0].reshape(num_nodes, predict_steps)

# 检查最终的数据形状
print(f"X_train shape: {X_train.shape}")  # 应为 (usable_timesteps, 100, 3, 6)
print(f"Y_train shape: {Y_train.shape}")  # 应为 (usable_timesteps, 100, 3)


X_train shape: (157, 100, 3, 6)
Y_train shape: (157, 100, 6)


In [95]:
import torch
from torch_geometric.data import Data


In [96]:
edge_index = []

# 因为边是无向的，所以每个连接要添加两次，一次为 (source, target)，另一次为 (target, source)
for timestep in range(num_timesteps - 6):  # 如果时间步涉及到边的创建，可能需要调整范围
    for cell_id in range(1, num_nodes):  # 假设从1开始，确保不超出边界
        if cell_id < num_nodes:  # 确保不会创建超出范围的边
            src = cell_id - 1  # CellID 从1开始的调整
            dst = cell_id     # 相邻的CellID，确保不越界

            # 添加边 (src, dst) 和 (dst, src)
            edge_index.append((src, dst))
            edge_index.append((dst, src))

# 转换为PyTorch张量
edge_index_tensor = torch.tensor(edge_index, dtype=torch.long).t().contiguous()


In [97]:
import torch
from torch_geometric_temporal.signal import StaticGraphTemporalSignal

# 假设 X_tensor 和 Y_tensor 已经准备好
# edge_index_tensor 也已经定义

# # 特征和目标应该是列表形式，其中每个元素是一个时间步的数据

# features = [X_train[i] for i in range(X_tensor.shape[0])]
# targets = [Y_train[i] for i in range(Y_tensor.shape[0])]

features = [X_train[i] for i in range(X_train.shape[0])]
targets = [Y_train[i] for i in range(Y_train.shape[0])]


# 由于这里假设所有时间步使用相同的边，我们将 edge_index 和 edge_weight 设置为常量
# 如果没有特定的边权重，可以使用全1的张量
edge_weights = torch.ones(edge_index_tensor.size(1), dtype=torch.float)

# 创建 StaticGraphTemporalSignal 对象
graph_data = StaticGraphTemporalSignal(edge_index=edge_index_tensor, edge_weight=edge_weights, features=features, targets=targets)

# 数据检查

In [98]:
graph_data[0]

Data(x=[100, 3, 6], edge_index=[2, 32076], edge_attr=[32076], y=[100, 6])

# 模型构建

In [117]:
import torch
import torch.nn.functional as F
from torch_geometric_temporal.nn.recurrent import A3TGCN

class TemporalGNN(torch.nn.Module):
    def __init__(self, node_features, periods):
        super(TemporalGNN, self).__init__()
        # Attention Temporal Graph Convolutional Cell
        self.tgnn = A3TGCN(in_channels=node_features, 
                           out_channels=32, 
                           periods=periods)#periods决定模型侧重短期特征还是长期特征
        # Equals single-shot prediction
        self.linear = torch.nn.Linear(32, periods)#预测未来的12个值

#     def forward(self, x, edge_index,edge_attr):
    def forward(self, x, edge_index):
        """
        x = Node features for T time steps
        edge_index = Graph edge indices
        """
#         h = self.tgnn(x, edge_index,edge_attr)
        h = self.tgnn(x, edge_index)
        h = F.relu(h)
        h = self.linear(h)
        return h

# TemporalGNN(node_features=3, periods=1)

# 训练和测试

In [118]:
# GPU support

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
subset = 2000#限制数据集为2000

# Create model and optimizers
model = TemporalGNN(node_features=3, periods=6).to(device)#node_features是节点特征长度，而periods是预测的时间步,todevice一方面
# model = TemporalGNN(node_features=3, periods=1)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
model.train()

print("Running training...")
for epoch in range(10): 
    loss = 0
    step = 0
    for snapshot in graph_data:
        snapshot = snapshot.to(device)
        # Get model predictions
        y_hat = model(snapshot.x, snapshot.edge_index)
#         y_hat = model(snapshot.x, snapshot.edge_index,snapshot.edge_attr)
        # Mean squared error
        loss = loss + torch.mean((y_hat-snapshot.y)**2) 
        step += 1
        if step > subset:
            break

    loss = loss / (step + 1)
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    print("Epoch {} train MSE: {:.4f}".format(epoch, loss.item()))

Running training...
Epoch 0 train MSE: 1.0252
Epoch 1 train MSE: 0.9673
Epoch 2 train MSE: 0.9108
Epoch 3 train MSE: 0.8498
Epoch 4 train MSE: 0.7887
Epoch 5 train MSE: 0.7286
Epoch 6 train MSE: 0.6740
Epoch 7 train MSE: 0.6300
Epoch 8 train MSE: 0.5998
Epoch 9 train MSE: 0.5827


# 层级注意力机制重构