In [None]:
# -*- coding: utf-8 -*-
# 1.导入包以及设置随机种子
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
from math import sqrt
import time

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
%matplotlib inline

import random
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

In [None]:
# 2.以类的方式定义超参数
class argparse():
    pass

args = argparse()

# 固定参数

args.patience =20   #early stopping相关


# 可优化参数
args.epochs =200
args.learning_rate = 0.00375
args.BTACH_SIZE =512     
args.hidden_size =35
args.data = pd.read_csv('./rainfall_traindata.csv')

args.sequence_length =126
args.delay =72
args.device= [torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),]
args.layers=2
args.input=4
args.data.set_index('time', inplace=True)

In [None]:
#定义数据集函数  
class Mydataset(torch.utils.data.Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels
        
    def __getitem__(self, index):
        feature = self.features[index]
        label = self.labels[index]
        return feature, label

    def __len__(self):
        return len(self.features)
    
def build_databbase(sequence_length,delay,data):
    data_ = []
    for i in range(len(data) - sequence_length - delay):
        data_.append(data.iloc[i: i + sequence_length + delay])

    data_ = np.array([df.values for df in data_])
    np.random.shuffle(data_)
    x = data_[:, :-delay, :]
    y = data_[:, -1, 0]
    #y = data_[:,-delay:, 0]

    x = x.astype(np.float32)
    y = y.astype(np.float32)
   
    #根据8：2的比例划分训练集和测试集
    split_boundary = int(data_.shape[0] * 0.8)

    train_x = x[: split_boundary]
    test_x = x[split_boundary:]

    train_y = y[: split_boundary]
    test_y = y[split_boundary:]

    mean = train_x.mean(axis=0)
    std = train_x.std(axis=0)
    train_x = (train_x - mean)/std
    test_x = (test_x - mean)/std
    train_ds = Mydataset(train_x, train_y)
    test_ds = Mydataset(test_x, test_y)

    train_dl = torch.utils.data.DataLoader(
                                           train_ds,
                                           batch_size=args.BTACH_SIZE,
                                           shuffle=True
    )
    test_dl = torch.utils.data.DataLoader(
                                           test_ds,
                                           batch_size=args.BTACH_SIZE
    )
    return train_dl,test_dl,test_y

    

In [None]:
# 2.定义模型
class Net(nn.Module):
    def __init__(self, hidden_size):
        super(Net, self).__init__()
        self.rnn = nn.LSTM(args.input, 
                           hidden_size,
                           args.layers,
                           batch_first=True)
        self.fc1 = nn.Linear(hidden_size, 128)
        self.fc2 = nn.Linear(128, 1)

    def forward(self, inputs):
       
        s_o,_ = self.rnn(inputs)
        s_o = s_o[:, -1, :]
        x = F.dropout(F.relu(self.fc1(s_o)),0.2,training=True)
        x = F.relu(self.fc2(x))
        return torch.squeeze(x)

In [None]:
# 3.定义early stopping
class EarlyStopping():
    def __init__(self,patience=7,verbose=False,delta=0):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        
    def __call__(self,val_loss,model,path):
        print("val_loss={}".format(val_loss))
        score = -val_loss
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss,model,path)
        elif score < self.best_score+self.delta:
            self.counter+=1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter>=self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss,model,path)
            self.counter = 0
            
    def save_checkpoint(self,val_loss,model,path):
        if self.verbose:
            print(
                f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
            torch.save(model.state_dict(), path+'/'+'model_earlystopping.pth')
        self.val_loss_min = val_loss

In [None]:
# 4.定义训练函数
def fit(epoch, model, trainloader, testloader):
    total = 0
    running_loss = 0
    
    model.train()
    for x, y in trainloader:
        if torch.cuda.is_available():
            x, y = x.to('cuda'), y.to('cuda')
        y_pred = model(x)
        loss = loss_fn(y_pred, y) # 计算loss
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        with torch.no_grad():
            total += y.size(0)
            running_loss += loss.item()
#    exp_lr_scheduler.step()
    epoch_loss = running_loss / len(trainloader.dataset)
        
        
    test_total = 0
    test_running_loss = 0 
    
    model.eval()
    with torch.no_grad():
        for x, y in testloader:
            if torch.cuda.is_available():
                x, y = x.to('cuda'), y.to('cuda')
            y_pred = model(x)
            loss = loss_fn(y_pred, y)
            test_total += y.size(0)
            test_running_loss += loss.item()
    
    epoch_test_loss = test_running_loss / len(testloader.dataset)
    
     
    print('epoch: ', epoch, 
           'loss： ', round(epoch_loss, 3),
           'test_loss： ', round(epoch_test_loss, 3),
              )

        
    return epoch_loss, epoch_test_loss

In [None]:
#处理数据集

train_dl,test_dl,test_y=build_databbase(args.sequence_length,args.delay,args.data)
# 5.实例化模型，设置loss，优化器等

model = Net(args.hidden_size)
if torch.cuda.is_available():
    model.to('cuda')
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)


early_stopping = EarlyStopping(patience=args.patience)

#6.训练模型并保存损失函数值
train_loss = []
test_loss = []
start_time = time.time()

for epoch in range(args.epochs):
    epoch_loss, epoch_test_loss = fit(epoch,
                                      model,
                                      train_dl,
                                      test_dl)

    train_loss.append(epoch_loss)
    test_loss.append(epoch_test_loss)
    #==================early stopping======================
    early_stopping(test_loss[-1],model=model,path='./models')
    if early_stopping.early_stop:
        print("Early stopping")
        break

end_time = time.time()
print("耗时: {:.2f}秒".format(end_time - start_time))



In [None]:
#保存模型
torch.save(model, "./models/Lstm_model.pth")