#### 使用RNN实现一个天气预测模型，能预测1天的最高气温

In [3]:
import pandas as pd
import torch
import math
import numpy as np
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import mean_squared_error
from torch.utils.tensorboard import SummaryWriter

In [4]:
def read_data():
    df = pd.read_csv('/kaggle/input/weatherww2/Summary of Weather.csv')
    sta_temp_list = []
    len_list = []
    for sta, group in df.groupby('STA'):
        max_temp = group['MaxTemp']
        max_temp_tensor = torch.tensor(max_temp.values, dtype=torch.float32)
        if len(max_temp_tensor) < 365: continue # 只保留365天以上的气象站数据
        sta_temp_list.append(max_temp_tensor)
        len_list.append(len(max_temp_tensor))
    min_len = min(len_list)
    batch_num = len(sta_temp_list)

    weather_series = torch.zeros(batch_num, min_len, 1)
    for i in range(batch_num):
        weather_series[i, :min_len, :] = sta_temp_list[i][:min_len].contiguous().view(-1, 1)
    return weather_series

def plot_series(x, y=None, y_pred=None):
    scalar_num = 10
    writer_x = SummaryWriter(log_dir='./runs/x')
    for n in range(scalar_num):
        item = x[n]
        for i in range(item.shape[0]):
            writer_x.add_scalar(f'series/num{n}', item[i][0], global_step=i)
    writer_x.close()

    if y is not None:
        writer_y = SummaryWriter(log_dir='./runs/y')
        for n in range(scalar_num):
            item = y[n]
            for i in range(item.shape[0]):
                writer_y.add_scalar(f'series/num{n}', item[i], global_step=i+x.shape[1])
        writer_y.close()

    if y_pred is not None:
        writer_y_pred = SummaryWriter(log_dir='./runs/y_pred')
        for n in range(scalar_num):
            item = y_pred[n]
            for i in range(item.shape[0]):
                writer_y_pred.add_scalar(f'series/num{n}', item[i], global_step=i+x.shape[1])
        writer_y_pred.close()

class TimeSeriesDataset(Dataset):
    def __init__(self, X,y=None, train=True):
        self.X = X
        self.y = y
        self.train = train
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        if self.train:
            return self.X[idx], self.y[idx]
        return self.X[idx]


In [21]:
class RNN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.rnn = torch.nn.RNN(input_size=1, hidden_size=200, num_layers=3, batch_first=True)
        self.fc = torch.nn.Linear(200, 1)
    def forward(self, x):
        out, _ = self.rnn(x)
        out = self.fc(out[:, -1])
        return out

In [10]:
device = "cuda" if torch.cuda.is_available() else "cpu"
def fit(model, dataloader, epochs=200):
    model.to(device)
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    for epoch in range(epochs):
        model.train()
        train_loss = []
        for X, y in dataloader['train']:
            X, y = X.to(device), y.to(device)
            y_hat = model(X)
            loss = criterion(y_hat, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss.append(loss.item())
        print(f"epoch={epoch}, loss={np.mean(train_loss)}")

def predict(model, dataloader):
    model.eval()
    with torch.no_grad():
        preds = torch.tensor([]).to(device)
        for X in dataloader:
            X = X.to(device)
            pred = model(X)
            preds = torch.cat([preds, pred])
    return preds

In [23]:
weather_series = read_data()
batch_size = weather_series.shape[0]
s_steps = weather_series.shape[1]
train_size = math.ceil(batch_size*0.8)
# 训练集和测试集划分
X_train, Y_train = weather_series[:train_size, :s_steps-1], weather_series[:train_size, -1]
X_test, Y_test = weather_series[train_size:, :s_steps-1], weather_series[train_size:, -1]
dataset = {
    'train': TimeSeriesDataset(X_train, Y_train),
    'test': TimeSeriesDataset(X_test, Y_test, train=False)
}
dataloader = {
    'train': DataLoader(dataset['train'], shuffle=True, batch_size=64),
    'test': DataLoader(dataset['test'], shuffle=False, batch_size=64)
}
# 模型训练
rnn = RNN()
fit(rnn, dataloader, 2000)
# 模型预测：未来1天的最高气温
y_pred = predict(rnn, dataloader['test'])
plot_series(X_test, Y_test, y_pred.cpu())
print('均方误差：', mean_squared_error(Y_test, y_pred.cpu()))

  df = pd.read_csv('/kaggle/input/weatherww2/Summary of Weather.csv')


epoch=0, loss=735.8498840332031
epoch=1, loss=612.2080078125
epoch=2, loss=528.7060241699219
epoch=3, loss=459.9213409423828
epoch=4, loss=447.1535186767578
epoch=5, loss=412.81951904296875
epoch=6, loss=409.82484436035156
epoch=7, loss=371.35858154296875
epoch=8, loss=362.0721130371094
epoch=9, loss=358.82635498046875
epoch=10, loss=343.8515319824219
epoch=11, loss=330.58506774902344
epoch=12, loss=309.73760986328125
epoch=13, loss=291.83850860595703
epoch=14, loss=292.14772033691406
epoch=15, loss=284.66954040527344
epoch=16, loss=260.41968536376953
epoch=17, loss=266.70001220703125
epoch=18, loss=256.03773498535156
epoch=19, loss=233.8425064086914
epoch=20, loss=228.94428253173828
epoch=21, loss=224.28601837158203
epoch=22, loss=213.52754974365234
epoch=23, loss=195.61495208740234
epoch=24, loss=198.41895294189453
epoch=25, loss=192.83492279052734
epoch=26, loss=185.32949829101562
epoch=27, loss=178.9085922241211
epoch=28, loss=171.98133850097656
epoch=29, loss=160.75127410888672
ep