In [4]:
import pandas as pd
import seaborn as sns
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt

from torch_geometric.data import Data
import warnings
warnings.filterwarnings("ignore")
import torch


In [5]:
data=pd.read_csv("data_consolidation_standardization.csv")
data=data.drop(labels="Unnamed: 0", axis=1)
# data_test=data.loc[data['CellID']<=100]

In [6]:
data.sort_values(['CellID','datetime'], inplace=True)
data_test=data

In [7]:
data_test

Unnamed: 0,datetime,CellID,internet,calls,sms,longitude,latitude
0,2013-11-01 00:00:00,1,-1.045031,-0.739046,-0.846748,9.160012,45.358657
100,2013-11-01 01:00:00,1,-1.074765,-0.750493,-0.865873,9.160012,45.358657
200,2013-11-01 02:00:00,1,-1.087136,-0.754285,-0.873567,9.160012,45.358657
300,2013-11-01 03:00:00,1,-1.096080,-0.757753,-0.875403,9.160012,45.358657
400,2013-11-01 04:00:00,1,-1.109877,-0.756706,-0.878479,9.160012,45.358657
...,...,...,...,...,...,...,...
16399,2013-11-07 19:00:00,100,-0.050505,-0.004918,-0.064944,9.160606,45.568069
16499,2013-11-07 20:00:00,100,-0.141303,-0.239126,-0.156799,9.160606,45.568069
16599,2013-11-07 21:00:00,100,-0.231419,-0.426115,-0.262352,9.160606,45.568069
16699,2013-11-07 22:00:00,100,-0.319378,-0.600432,-0.395659,9.160606,45.568069


# 根据需要选择不同的基站或者流量/语音/短信数据

In [8]:
import pandas as pd

# 加载数据
# data = pd.read_csv('path_to_your_data.csv')  # 根据您的文件路径和文件名进行调整

data = data_test  # 根据您的文件路径和文件名进行调整

# 将时间字符串转换为pandas的datetime对象，这假设'datetime'是存储时间的列
data['datetime'] = pd.to_datetime(data['datetime'])

# 对每个基站分别提取数据
def extract_time_series(data, cell_id, feature):
    """
    提取特定基站的时间序列数据。
    Args:
    data (DataFrame): 包含所有数据的DataFrame。
    cell_id (int): 基站ID。
    feature (str): 要提取的特征名，如'internet', 'calls', 'sms'。
    
    Returns:
    DataFrame: 包含特定基站和特征的时间序列数据。
    """
    # 过滤特定基站的数据
    specific_data = data[data['CellID'] == cell_id][['datetime', feature]]
    specific_data.set_index('datetime', inplace=True)
    return specific_data

# 示例：提取基站ID为1的基站的流量数据
cell_id = 1
feature = 'internet'
ts_data = extract_time_series(data, cell_id, feature)
print(ts_data.head())


                     internet
datetime                     
2013-11-01 00:00:00 -1.045031
2013-11-01 01:00:00 -1.074765
2013-11-01 02:00:00 -1.087136
2013-11-01 03:00:00 -1.096080
2013-11-01 04:00:00 -1.109877


In [9]:
ts_data

Unnamed: 0_level_0,internet
datetime,Unnamed: 1_level_1
2013-11-01 00:00:00,-1.045031
2013-11-01 01:00:00,-1.074765
2013-11-01 02:00:00,-1.087136
2013-11-01 03:00:00,-1.096080
2013-11-01 04:00:00,-1.109877
...,...
2013-11-07 19:00:00,-0.930432
2013-11-07 20:00:00,-0.942378
2013-11-07 21:00:00,-0.963610
2013-11-07 22:00:00,-0.975810


# 滑动窗口构建数据集和标签

In [10]:
def create_sequences(data, window_size, predict_steps):
    """
    创建滑动窗口数据。
    Args:
    data (Series): 输入的时间序列数据。
    window_size (int): 滑动窗口的大小。
    predict_steps (int): 需要预测的未来步数。
    
    Returns:
    tuple: 包含特征和标签的元组。
    """
    X, y = [], []
    for i in range(len(data) - window_size - predict_steps + 1):
        X.append(data.iloc[i:(i + window_size)].values)
        y.append(data.iloc[i + window_size:(i + window_size + predict_steps)].values)
    return np.array(X), np.array(y)

# 使用示例
window_size = 6
predict_steps = 3
X, y = create_sequences(ts_data[feature], window_size, predict_steps)


# 构建训练集和测试集

In [11]:
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

# 转换数据为Tensor
X_tensor = torch.tensor(X, dtype=torch.float32).unsqueeze(-1)  # 增加一个维度以适应LSTM输入要求
y_tensor = torch.tensor(y, dtype=torch.float32)

# 划分数据集
X_train, X_test, y_train, y_test = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)

# 创建DataLoaders
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


# 定义LSTM模型并初始化

In [12]:
import torch.nn as nn

class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        _, (hn, _) = self.lstm(x)
        out = self.fc(hn[-1])
        return out

# 初始化模型
input_dim = 1  # 因为增加了一个维度，每个时间步只有一个特征
hidden_dim = 50
num_layers = 1
output_dim = predict_steps  # 预测步数
model = LSTMModel(input_dim, hidden_dim, num_layers, output_dim)


# 训练和测试

In [15]:
import torch.optim as optim

# 损失函数和优化器
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = model.to(device)

def train_model(model, train_loader, criterion, optimizer, num_epochs=50):
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(train_loader)}')

train_model(model, train_loader, criterion, optimizer)

def evaluate_model(model, test_loader, criterion):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            total_loss += loss.item()
    print(f'Test MSE: {total_loss/len(test_loader)}')

evaluate_model(model, test_loader, criterion)


Epoch 1/50, Loss: 0.029972142714541405
Epoch 2/50, Loss: 0.029799905605614185
Epoch 3/50, Loss: 0.00946376723004505
Epoch 4/50, Loss: 0.007438401458784938
Epoch 5/50, Loss: 0.007602635072544217
Epoch 6/50, Loss: 0.0048556438414379954
Epoch 7/50, Loss: 0.0055335917277261615
Epoch 8/50, Loss: 0.003371546568814665
Epoch 9/50, Loss: 0.00429895258275792
Epoch 10/50, Loss: 0.003271350229624659
Epoch 11/50, Loss: 0.0035784626961685717
Epoch 12/50, Loss: 0.003761613683309406
Epoch 13/50, Loss: 0.0033117233542725444
Epoch 14/50, Loss: 0.0033640293404459953
Epoch 15/50, Loss: 0.003135216946247965
Epoch 16/50, Loss: 0.003201183571945876
Epoch 17/50, Loss: 0.0031187618151307106
Epoch 18/50, Loss: 0.00314393820008263
Epoch 19/50, Loss: 0.0031374480458907783
Epoch 20/50, Loss: 0.0031213033362291753
Epoch 21/50, Loss: 0.0030768002616241574
Epoch 22/50, Loss: 0.0031566359102725983
Epoch 23/50, Loss: 0.0031896306318230927
Epoch 24/50, Loss: 0.003218326542992145
Epoch 25/50, Loss: 0.0030937873525545
Epo