In [1]:
import pandas as pd
import numpy as np
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
my_font = FontProperties(fname=r"c:\windows\fonts\SimHei.ttf",size=12)

In [2]:
class WindDataSet(Dataset):
    def __init__(self,path,num_steps=50):
        self.data = []
        file = pd.read_csv(path,skiprows=1)
        self.feature = np.array(file[[ "Month", "Day", "Hour", "Minute", "surface air pressure (Pa)",
                                       "relative humidity at 2m (%)", "surface precipitation rate (mm/h)",
                                         "air temperature at 10m (C)", "wind direction at 10m (deg)",
                                         "wind speed at 10m (m/s)"]])
        self.target = np.array(file["wind speed at 10m (m/s)"])
        wind_len = len(self.feature)
        for i in range(wind_len-num_steps-1):
            self.data.append((self.feature[i:i+num_steps],self.target[i+num_steps]))
        self.data = self.data[:int(len(self.data)/250)*250]
    def __len__(self):
        return len(self.data)   
    def __getitem__(self,index):
        seq,pre = self.data[index]
        return seq,pre


In [3]:
def try_gpu(i=0):
    if torch.cuda.device_count() >= i + 1:
        return torch.device(f'cuda:{i}')
    return torch.device('cpu')
try_gpu()

device(type='cpu')

In [4]:
INPUT_SIZE=10
HIDDEN_SIZE=50
BATCH_SIZE=250
OUTPUT_SIZE=1
NUM_LAYERS=2

class lstm(nn.Module):
    def __init__(self):
        super(lstm,self).__init__()
        self.rnn = nn.LSTM(input_size=INPUT_SIZE,hidden_size=HIDDEN_SIZE,num_layers=NUM_LAYERS)
        self.fc = nn.Linear(HIDDEN_SIZE,OUTPUT_SIZE)
    def forward(self,x,state):
        x = torch.transpose(x,dim0=0,dim1=1).reshape((x.shape[1],-1,INPUT_SIZE))
        out,state = self.rnn(x,state)
        out = self.fc(out)
        return out[-1],state
    def begin_state(self,batch_size,device):
        return (torch.zeros((2,batch_size,HIDDEN_SIZE),device=device),torch.zeros((2,batch_size, HIDDEN_SIZE), device=device))

In [5]:
net = lstm()
lr = 0.001
device = try_gpu()
print(device)
net = net.to(device)
optimizer = torch.optim.Adam(net.parameters(),lr=lr)
loss = nn.MSELoss()
epochs = 10
num_steps=50

cpu


In [6]:
def grad_clipping(net,theta):
    if isinstance(net,nn.Module):
        params = [p for p in net.parameters() if p.requires_grad]
    else:
        params = net.params
    norm = torch.sqrt(sum(torch.sum((p.grad ** 2)) for p in params))
    if norm > theta:
        for param in params:
            param.grad[:] *= theta/norm

In [7]:

def train_epoch(epoch,net,train_loader,device,train_loss):
    net = net.to(device)
    net.train()
    runing_loss=0
    for batch_idx,(X,y) in enumerate(train_loader):
        state=net.begin_state(batch_size=BATCH_SIZE, device=device)
        for s in state:
            s.detach_()
        optimizer.zero_grad()
        X,y = X.to(torch.float32).to(device),y.to(torch.float32).T.to(device)
        #print(X.shape,y.shape)
        y_hat,state = net(X,state)
        y_hat = y_hat.reshape(-1)
        l = loss(y_hat,y).mean()
        l.backward()
        grad_clipping(net, 1)
        optimizer.step()
        runing_loss += l.item()
        if batch_idx%400 == 399:
            print(f'epoch:{epoch+1},batch_idx:{batch_idx+1},running_loss:{runing_loss/400}')
            train_loss.append(l.item())
            runing_loss=0


In [8]:
dataset = WindDataSet('../data/wind_dataset144-2014/wind_dataset144/1.csv',num_steps)
print(dataset.__len__())

105000


In [9]:
def save_net(path = 'wind_10input.pt', net=None):
    torch.save(net.state_dict(),path)
# save_net(net=net)

In [10]:
def train(epochs):
    train_loss = []
    for epoch in range(epochs):
        for i in range(120):
            dataset = WindDataSet(f'./datasets/{i}.csv',num_steps)
            train_loader = DataLoader(dataset,batch_size=BATCH_SIZE,shuffle=False,num_workers=0)
            train_epoch(epoch,net,train_loader,device,train_loss)
        print(f'###epoch:{epoch+1},train_loss:{train_loss[-1]}')
        save_net(path=f"multidata-epoch{epoch}.pt",net=net.to('cpu'))
        #net = net.to(device)
    

In [34]:
train(epochs)

epoch:1,batch_idx:400,running_loss:6.056187234446407
epoch:1,batch_idx:400,running_loss:7.098025044947863
epoch:1,batch_idx:400,running_loss:5.9750065587461
epoch:1,batch_idx:400,running_loss:7.234809423238039
epoch:1,batch_idx:400,running_loss:9.239081171005964
epoch:1,batch_idx:400,running_loss:10.610373565405608
epoch:1,batch_idx:400,running_loss:11.19194963991642
epoch:1,batch_idx:400,running_loss:11.511015555858613
epoch:1,batch_idx:400,running_loss:11.777962229624391
epoch:1,batch_idx:400,running_loss:12.346329095736145
epoch:1,batch_idx:400,running_loss:12.914522670656442
epoch:1,batch_idx:400,running_loss:13.47696782708168
epoch:1,batch_idx:400,running_loss:9.607671436071396
epoch:1,batch_idx:400,running_loss:10.072358027547597
epoch:1,batch_idx:400,running_loss:10.322921684682369
epoch:1,batch_idx:400,running_loss:10.655389100164175
epoch:1,batch_idx:400,running_loss:11.00442012757063
epoch:1,batch_idx:400,running_loss:11.304035099893808
epoch:1,batch_idx:400,running_loss:11.5

In [11]:
def load_net(path='multidata-epoch9.pt',net=None):
    net.load_state_dict(torch.load(path))
load_net(net=net)

In [12]:
def predict(prefix,num_preds,net,device):
    net.eval()
    state = net.begin_state(batch_size=1,device=device)
    outputs = [prefix[0]]
    get_input = lambda:torch.tensor([outputs[-1]],device=device).reshape(1,1)
    for y in prefix[1:]:
        _,state = net(get_input(),state)
        outputs.append(y)
    for _ in range(num_preds):
        y,state = net(get_input(),state)
        outputs.append(y.reshape(1).detach()[0])
    tmp = []
    for t in outputs:
        tmp.append(t.item())
    outputs =tmp
    return outputs#torch.cat(outputs,dim=1)


In [13]:
test_dataset = WindDataSet('../data/wind_dataset144-2014/wind_dataset144/142.csv',num_steps=50)
test_loader = DataLoader(test_dataset,batch_size=1,shuffle=False)
preds,truth=None,None
for batch_idx,(X, y) in enumerate(test_loader):
    if batch_idx == 0:
        X = (X.reshape(-1).to(torch.float32))
        preds = predict(X,50,net,device)
    if batch_idx == 1:
        truth = np.array(X)
        break
print((preds[50:]))
print((truth.reshape(50)))



RuntimeError: shape '[1, -1, 10]' is invalid for input of size 1