In [3]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, TensorDataset
from torch.utils.tensorboard import SummaryWriter
from sklearn.preprocessing import StandardScaler

# 配置参数
SEQ_LENGTH = 30          # 使用30天历史数据
BATCH_SIZE = 64          
EPOCHS = 200             
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

class WeatherDataset(Dataset):
    def __init__(self, data, seq_length, pred_days):
        self.scaler = StandardScaler()
        scaled_data = self.scaler.fit_transform(data)
        
        # 创建滑动窗口数据集
        self.x, self.y1, self.y5 = [], [], []
        for i in range(len(scaled_data)-seq_length-pred_days):
            seq = scaled_data[i:i+seq_length]
            target1 = scaled_data[i+seq_length][0]        # 第1天最高温
            target5 = scaled_data[i+seq_length:i+seq_length+5, 0]  # 连续5天最高温
            self.x.append(seq)
            self.y1.append(target1)
            self.y5.append(target5)
            
    def __len__(self):
        return len(self.x)
    
    def __getitem__(self, idx):
        return torch.FloatTensor(self.x[idx]), \
               torch.FloatTensor([self.y1[idx]]), \
               torch.FloatTensor(self.y5[idx])

class WeatherRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super().__init__()
        self.rnn = nn.RNN(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=2,
            batch_first=True,
            dropout=0.3
        )
        self.fc1 = nn.Linear(hidden_size, 1)    # 预测第1天
        self.fc5 = nn.Linear(hidden_size, 5)    # 预测未来5天
        
    def forward(self, x):
        _, hn = self.rnn(x)  # hn形状: (num_layers, batch, hidden_size)
        last_hidden = hn[-1] # 取最后一层隐藏状态
        
        out1 = self.fc1(last_hidden).squeeze()  # 第1天预测
        out5 = self.fc5(last_hidden)           # 5天预测
        return out1, out5

# 数据准备
def load_data():
    df = pd.read_csv("./data/Summary of Weather.csv")
    df = df[['Date', 'MaxTemp']].dropna()
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.set_index('Date').resample('D').mean().ffill()  # 按日重采样
    
    # 创建完整数据序列
    full_dates = pd.date_range(start=df.index.min(), end=df.index.max(), freq='D')
    data = df.reindex(full_dates).ffill().values
    
    # 数据集划分
    train_size = int(0.7 * len(data))
    val_size = int(0.15 * len(data))
    test_size = len(data) - train_size - val_size
    
    train_data = data[:train_size]
    val_data = data[train_size:train_size+val_size]
    test_data = data[train_size+val_size:]
    
    return (WeatherDataset(train_data, SEQ_LENGTH, 5),
            WeatherDataset(val_data, SEQ_LENGTH, 5),
            WeatherDataset(test_data, SEQ_LENGTH, 5))

def train():
    train_ds, val_ds, test_ds = load_data()
    
    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE)
    
    model = WeatherRNN(input_size=1, hidden_size=128).to(DEVICE)
    criterion = nn.MSELoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=0.001)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5)
    
    writer = SummaryWriter()
    
    best_val_loss = float('inf')
    for epoch in range(EPOCHS):
        # 训练
        model.train()
        train_loss1, train_loss5 = 0, 0
        for x, y1, y5 in train_loader:
            x, y1, y5 = x.to(DEVICE), y1.to(DEVICE), y5.to(DEVICE)
            
            optimizer.zero_grad()
            pred1, pred5 = model(x)
            loss1 = criterion(pred1, y1)
            loss5 = criterion(pred5, y5)
            loss = 0.3*loss1 + 0.7*loss5  # 组合损失
            loss.backward()
            optimizer.step()
            
            train_loss1 += loss1.item()
            train_loss5 += loss5.item()
        
        # 验证
        model.eval()
        val_loss1, val_loss5 = 0, 0
        with torch.no_grad():
            for x, y1, y5 in val_loader:
                x, y1, y5 = x.to(DEVICE), y1.to(DEVICE), y5.to(DEVICE)
                pred1, pred5 = model(x)
                val_loss1 += criterion(pred1, y1).item()
                val_loss5 += criterion(pred5, y5).item()
        
        # 记录指标
        writer.add_scalars('Loss/Train', {'1-day': train_loss1/len(train_loader),
                                        '5-day': train_loss5/len(train_loader)}, epoch)
        writer.add_scalars('Loss/Val', {'1-day': val_loss1/len(val_loader),
                                       '5-day': val_loss5/len(val_loader)}, epoch)
        writer.add_scalar('Learning Rate', optimizer.param_groups[0]['lr'], epoch)
        
        # 保存最佳模型
        current_val_loss = val_loss1 + val_loss5
        if current_val_loss < best_val_loss:
            best_val_loss = current_val_loss
            torch.save(model.state_dict(), 'best_model.pth')
        
        # 调整学习率
        scheduler.step(current_val_loss)
        
        # 打印信息
        if (epoch+1) % 10 == 0:
            print(f"Epoch {epoch+1}/{EPOCHS} | "
                  f"Train Loss: 1-day {train_loss1/len(train_loader):.4f} "
                  f"5-day {train_loss5/len(train_loader):.4f} | "
                  f"Val Loss: 1-day {val_loss1/len(val_loader):.4f} "
                  f"5-day {val_loss5/len(val_loader):.4f}")
    
    writer.close()

if __name__ == "__main__":
    train()

  df = pd.read_csv("./data/Summary of Weather.csv")
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 10/200 | Train Loss: 1-day 1.0223 5-day 0.1435 | Val Loss: 1-day 1.2246 5-day 0.0307
Epoch 20/200 | Train Loss: 1-day 1.0294 5-day 0.1422 | Val Loss: 1-day 1.2123 5-day 0.0319
Epoch 30/200 | Train Loss: 1-day 1.0313 5-day 0.1413 | Val Loss: 1-day 1.2125 5-day 0.0320
Epoch 40/200 | Train Loss: 1-day 1.0177 5-day 0.1411 | Val Loss: 1-day 1.2126 5-day 0.0320
Epoch 50/200 | Train Loss: 1-day 1.0193 5-day 0.1416 | Val Loss: 1-day 1.2126 5-day 0.0320
Epoch 60/200 | Train Loss: 1-day 1.0169 5-day 0.1423 | Val Loss: 1-day 1.2126 5-day 0.0320
Epoch 70/200 | Train Loss: 1-day 1.0134 5-day 0.1418 | Val Loss: 1-day 1.2127 5-day 0.0320
Epoch 80/200 | Train Loss: 1-day 1.0194 5-day 0.1413 | Val Loss: 1-day 1.2127 5-day 0.0320
Epoch 90/200 | Train Loss: 1-day 1.0416 5-day 0.1422 | Val Loss: 1-day 1.2127 5-day 0.0320
Epoch 100/200 | Train Loss: 1-day 1.0210 5-day 0.1399 | Val Loss: 1-day 1.2127 5-day 0.0320
Epoch 110/200 | Train Loss: 1-day 1.0088 5-day 0.1396 | Val Loss: 1-day 1.2127 5-day 0.03