In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt

from torch_geometric.data import Data
import warnings
warnings.filterwarnings("ignore")
import torch


In [2]:
import torch
print("CUDA Available:", torch.cuda.is_available())
x = torch.tensor([1.0, 2.0, 3.0], device='cuda')
print(x)

CUDA Available: True
tensor([1., 2., 3.], device='cuda:0')


In [3]:
data=pd.read_csv("data_consolidation_standardization.csv")
data=data.drop(labels="Unnamed: 0", axis=1)
# data_test=data.loc[data['CellID']<=100]

In [4]:
data.sort_values(['CellID','datetime'], inplace=True)
data_test=data

In [5]:
data_test

Unnamed: 0,datetime,CellID,internet,calls,sms,longitude,latitude
0,2013-11-01 00:00:00,1,-1.045031,-0.739046,-0.846748,9.160012,45.358657
100,2013-11-01 01:00:00,1,-1.074765,-0.750493,-0.865873,9.160012,45.358657
200,2013-11-01 02:00:00,1,-1.087136,-0.754285,-0.873567,9.160012,45.358657
300,2013-11-01 03:00:00,1,-1.096080,-0.757753,-0.875403,9.160012,45.358657
400,2013-11-01 04:00:00,1,-1.109877,-0.756706,-0.878479,9.160012,45.358657
...,...,...,...,...,...,...,...
16399,2013-11-07 19:00:00,100,-0.050505,-0.004918,-0.064944,9.160606,45.568069
16499,2013-11-07 20:00:00,100,-0.141303,-0.239126,-0.156799,9.160606,45.568069
16599,2013-11-07 21:00:00,100,-0.231419,-0.426115,-0.262352,9.160606,45.568069
16699,2013-11-07 22:00:00,100,-0.319378,-0.600432,-0.395659,9.160606,45.568069


# 重构数据

In [6]:
# 初始化一个数组来存储重构后的数据
num_nodes = 100
num_timesteps = 168
num_features = 3

X为需要用到的数据

In [7]:
# 创建一个空的 numpy 数组，用于存储每个节点的特征，这里是提取和时序有关的数据，为训练集和测试集构建做准备
X = np.zeros((num_nodes, num_timesteps, num_features))

# 遍历每个基站，填充数据
for i in range(num_nodes):
    # 提取当前基站的数据
    node_data = data_test[data_test['CellID'] == i+1]  # 假设 CellID 从 1 开始
    # 确保数据的长度正确
    if len(node_data) == num_timesteps:
        X[i, :, 0] = node_data['internet'].values
        X[i, :, 1] = node_data['calls'].values
        X[i, :, 2] = node_data['sms'].values
    else:
        print(f"Warning: Node {i+1} has incorrect number of timesteps: {len(node_data)}")


In [8]:
X.shape

(100, 168, 3)

In [9]:
X#X为100个节点3个特征168个时序的数据

array([[[-1.0450308 , -0.73904563, -0.84674845],
        [-1.07476506, -0.75049338, -0.86587277],
        [-1.08713609, -0.7542851 , -0.87356665],
        ...,
        [-0.96361034, -0.67310022, -0.74354086],
        [-0.97581032, -0.71657865, -0.76440385],
        [-1.00971515, -0.73868004, -0.80597322]],

       [[-1.02265791, -0.73796843, -0.84425615],
        [-1.05836134, -0.74950306, -0.86310406],
        [-1.06950522, -0.75192882, -0.87142658],
        ...,
        [-0.92947228, -0.66630709, -0.72315621],
        [-0.93924703, -0.7134834 , -0.74826225],
        [-0.97507457, -0.73674196, -0.79580883]],

       [[-1.0293318 , -0.73824903, -0.84680945],
        [-1.06575388, -0.74942007, -0.8626201 ],
        [-1.07831554, -0.75036499, -0.87113144],
        ...,
        [-0.93603355, -0.66563623, -0.72124845],
        [-0.95273954, -0.71413596, -0.75372177],
        [-0.98775576, -0.73780824, -0.79689143]],

       ...,

       [[-0.59533329, -0.68073512, -0.64476015],
        [-0

In [10]:
import numpy as np

def create_windowed_data(X, input_features, output_features, window_size, predict_steps):
    """
    创建滑动窗口数据集，允许输入和输出具有不同的特征集。
    
    Args:
    X (np.array): 原始数据，形状为 (num_nodes, num_timesteps, num_features)。
    input_features (list): 输入特征的索引列表。
    output_features (list): 输出特征的索引列表。
    window_size (int): 输入窗口大小。
    predict_steps (int): 要预测的步数。
    
    Returns:
    np.array: X_train, 形状为 (num_samples, num_nodes, window_size, len(input_features))
    np.array: y_train, 形状为 (num_samples, num_nodes, predict_steps, len(output_features))
    """
    num_nodes, num_timesteps, _ = X.shape
    num_samples = num_timesteps - window_size - predict_steps + 1
    X_train = np.empty((num_samples, num_nodes, window_size, len(input_features)))
    y_train = np.empty((num_samples, num_nodes, predict_steps, len(output_features)))

    for t in range(num_samples):
        X_train[t] = X[:, t:t+window_size, :][:, :, input_features]
        y_train[t] = X[:, t+window_size:t+window_size+predict_steps, output_features]

    return X_train, y_train

# 设置窗口大小和预测步数
window_size = 6
predict_steps = 3

# 选择特征
input_features = [0,1,2]  # e.g., [0, 1] could represent 'internet' and 'calls'
output_features = [0]    # e.g., [0] could represent 'internet'

# 假设 X 是从您的数据加载过程中获得的
X_train, y_train = create_windowed_data(X, input_features, output_features, window_size, predict_steps)

print("X_train shape:", X_train.shape)  # Expect: (num_samples, num_nodes, window_size, len(input_features))
print("y_train shape:", y_train.shape)  # Expect: (num_samples, num_nodes, predict_steps, len(output_features))


X_train shape: (160, 100, 3, 6)
y_train shape: (160, 100, 3)


In [50]:
edge_index = []

# 因为边是无向的，所以每个连接要添加两次，一次为 (source, target)，另一次为 (target, source)
for timestep in range(num_timesteps - 6):  # 如果时间步涉及到边的创建，可能需要调整范围
    for cell_id in range(1, num_nodes):  # 假设从1开始，确保不超出边界
        if cell_id < num_nodes:  # 确保不会创建超出范围的边
            src = cell_id - 1  # CellID 从1开始的调整
            dst = cell_id     # 相邻的CellID，确保不越界

            # 添加边 (src, dst) 和 (dst, src)
            edge_index.append((src, dst))
            edge_index.append((dst, src))

# 转换为PyTorch张量
edge_index_tensor = torch.tensor(edge_index, dtype=torch.long).t().contiguous()

In [34]:
import torch
import numpy as np

def prepare_graph_data_for_temporal_signal(X_train, y_train, edge_index, edge_attr, target_node, target_feature_index):
    """
    准备用于StaticGraphTemporalSignal的图数据，输入为全图，输出为特定节点的特定特征。

    Args:
    X_train (array): 输入特征数据，形状为 (num_samples, num_nodes, num_features, window_size)。
    y_train (array): 目标特征数据，形状为 (num_samples, num_nodes, predict_steps)。
    edge_index (array): 边索引。
    edge_attr (array): 边属性。
    target_node (int): 目标节点索引。
    target_feature_index (int): 目标特征索引。

    Returns:
    features (list of ndarray): 特征列表，每个元素是一个ndarray。
    targets (list of ndarray): 目标列表，每个元素是一个ndarray。
    """
    features = []
    targets = []
    for i in range(X_train.shape[0]):
        x_tensor = torch.tensor(X_train[i], dtype=torch.float32).numpy()
        y_tensor = torch.tensor(y_train[i, target_node, target_feature_index], dtype=torch.float32).numpy()
        features.append(x_tensor)
        targets.append(y_tensor)
    
    return features, targets

# 示例使用，确保 edge_index 和 edge_attr 已定义
features, targets = prepare_graph_data_for_temporal_signal(X_train, y_train, edge_index_tensor, edge_weights, target_node, target_feature_index)


In [51]:
from torch_geometric_temporal.signal import StaticGraphTemporalSignal

# # 转换边索引和边权重为NumPy数组
# edge_index_array = np.array(edge_index, dtype=np.int64)

# 由于这里假设所有时间步使用相同的边，我们将 edge_index 和 edge_weight 设置为常量
# 如果没有特定的边权重，可以使用全1的张量
edge_weights = torch.ones(edge_index_tensor.size(1), dtype=torch.float)

# 创建StaticGraphTemporalSignal对象
graph_data = StaticGraphTemporalSignal(edge_index=edge_index_tensor, edge_weight=edge_weights, features=features, targets=targets)

# 数据检查

In [52]:
graph_data

<torch_geometric_temporal.signal.static_graph_temporal_signal.StaticGraphTemporalSignal at 0x1c986abdd00>

In [53]:
graph_data[0]

Data(x=[100, 3, 6], edge_index=[2, 32076], edge_attr=[32076], y=-1.057259202003479)

# 模型构建

In [54]:
import torch
import torch.nn.functional as F
from torch_geometric_temporal.nn.recurrent import A3TGCN

class TemporalGNN(torch.nn.Module):
    def __init__(self, node_features, periods):
        super(TemporalGNN, self).__init__()
        # Attention Temporal Graph Convolutional Cell
        self.tgnn = A3TGCN(in_channels=node_features, 
                           out_channels=32, 
                           periods=periods)#periods决定模型侧重短期特征还是长期特征
        # Equals single-shot prediction
        self.linear = torch.nn.Linear(32, periods)#预测未来的12个值

    def forward(self, x, edge_index,edge_attr):
#     def forward(self, x, edge_index):
        """
        x = Node features for T time steps
        edge_index = Graph edge indices
        """
        h = self.tgnn(x, edge_index,edge_attr)
#         h = self.tgnn(x, edge_index)
        h = F.relu(h)
        h = self.linear(h)
        return h

# TemporalGNN(node_features=3, periods=1)

# 训练和测试

初始化模型

In [55]:
import torch.optim as optim

# 初始化模型
model = TemporalGNN(node_features=3, periods=6)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# 设置损失函数和优化器
criterion = torch.nn.MSELoss()  # 均方误差作为损失函数
optimizer = optim.Adam(model.parameters(), lr=0.01)  # Adam优化器


训练模型

In [56]:
def train_model(model, data, epochs):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for time_step in data:
            optimizer.zero_grad()
            out = model(time_step.x.to(device), time_step.edge_index.to(device), time_step.edge_attr.to(device))
            loss = criterion(out, time_step.y.to(device))
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss:.4f}")

# 调用训练函数
train_model(model, graph_data, epochs=50)


Epoch 1/50, Loss: 16.2590
Epoch 2/50, Loss: 4.2530
Epoch 3/50, Loss: 3.5913
Epoch 4/50, Loss: 2.8598
Epoch 5/50, Loss: 2.2881
Epoch 6/50, Loss: 1.9285
Epoch 7/50, Loss: 1.6612
Epoch 8/50, Loss: 1.3678
Epoch 9/50, Loss: 1.0851
Epoch 10/50, Loss: 0.9892
Epoch 11/50, Loss: 0.9513
Epoch 12/50, Loss: 0.8838
Epoch 13/50, Loss: 0.8502
Epoch 14/50, Loss: 0.8258
Epoch 15/50, Loss: 0.8109
Epoch 16/50, Loss: 0.8351
Epoch 17/50, Loss: 1.9499
Epoch 18/50, Loss: 0.8976
Epoch 19/50, Loss: 0.6588
Epoch 20/50, Loss: 0.6299
Epoch 21/50, Loss: 0.6173
Epoch 22/50, Loss: 0.6006
Epoch 23/50, Loss: 0.6037
Epoch 24/50, Loss: 0.6116
Epoch 25/50, Loss: 0.6341
Epoch 26/50, Loss: 0.6364
Epoch 27/50, Loss: 0.6191
Epoch 28/50, Loss: 0.6250
Epoch 29/50, Loss: 0.5996
Epoch 30/50, Loss: 0.5929
Epoch 31/50, Loss: 0.5981
Epoch 32/50, Loss: 0.5744
Epoch 33/50, Loss: 0.5557
Epoch 34/50, Loss: 0.5632
Epoch 35/50, Loss: 0.5271
Epoch 36/50, Loss: 0.5319
Epoch 37/50, Loss: 0.5108
Epoch 38/50, Loss: 0.6279
Epoch 39/50, Loss: 0

In [17]:
def test_model(model, test_loader, criterion):
    model.eval()  # 切换到评估模式
    total_loss = 0
    with torch.no_grad():  # 在测试阶段，我们不需要计算梯度
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            total_loss += loss.item()
    
    average_loss = total_loss / len(test_loader)
    print(f'Test MSE: {average_loss:.4f}')

# 调用测试函数
criterion = nn.MSELoss()  # 确保使用和训练相同的损失函数
test_model(model, test_loader, criterion)


NameError: name 'test_loader' is not defined

# 层级注意力机制重构