In [1]:
import pandas as pd
import numpy as np
import torch
import torch.utils.data as Data
import torch.nn as nn
from torch import optim


In [2]:
def create_features(df):
    year = pd.get_dummies(df['year'], drop_first=True, prefix='year')
    month = pd.get_dummies(df['month'], drop_first=True, prefix='month')
    df['date'] = df.year.astype(str) + '-' + df.month.astype(str).str.zfill(2) + '-' + df.day.astype(str).str.zfill(2)
    df['week_day'] = pd.to_datetime(df.date).dt.dayofweek
    week_day = pd.get_dummies(df['week_day'], drop_first=True, prefix='weekday')
    df['week_num'] = pd.to_datetime(df.date).dt.weekofyear
    df['sin_week'] = np.sin(2 * np.pi * df.week_num / 52)
    df['cos_week'] = np.cos(2 * np.pi * df.week_num / 52)
    df['sin_hour'] = np.sin(2 * np.pi * df.hour / 24)
    df['cos_hour'] = np.cos(2 * np.pi * df.hour / 24)
    return pd.concat([df, year, month, week_day], axis=1)

data = pd.read_csv('PRSA_data_2010.1.1-2014.12.31.xls')

data = create_features(data)
# pm2.5列必须放在第一个
FEATURE_COLS = ['pm2.5', 'year', 'TEMP']
DATE_COLS = ['sin_week', 'cos_week', 'sin_hour', 'cos_hour', 'month_2', 'month_3', 'month_4', 'month_5',
             'month_6', 'month_7', 'month_8', 'month_9', 'month_10', 'month_11',
             'month_12', 'weekday_1', 'weekday_2', 'weekday_3', 'weekday_4',
             'weekday_5', 'weekday_6']

In [3]:
### 基础数据处理：1. (last: 31*24) = test; 2. 对 TEMP 进行均值化  ; 3. target 进行log

df = data[FEATURE_COLS + DATE_COLS].copy()
df = df.fillna(0)
df['pm2.5'] = np.log1p(df['pm2.5'])  ## 近似于log1p

Xtrain = df.iloc[:(-31*24),:].iloc[:-1,:]
Ytrain = df.iloc[:(-31*24),:1].shift(-1).dropna().rename(columns= {'pm2.5':'target'})
Xtest = df.iloc[(-31*24):,:].iloc[:-1,:]
Ytest = df.iloc[(-31*24):,:1].shift(-1).dropna().rename(columns= {'pm2.5':'target'})

for normal_features in set(FEATURE_COLS):
    mean_ = Xtrain[normal_features].mean()
    std_ = Xtrain[normal_features].std()
    Xtrain[normal_features] = (Xtrain[normal_features] - mean_)/std_
    Xtest[normal_features] = (Xtest[normal_features] - mean_)/std_

In [4]:
class encoder(nn.Module):
    def __init__(self, FEATURE_SIZE, ENCODER_HIDDEN_SIZE, DECODER_HIDDEN_SIZE):
        super(encoder,self).__init__()
        self.feature_size = FEATURE_SIZE
        self.encoder_hidden_size = ENCODER_HIDDEN_SIZE
        self.decoder_hidden_size = DECODER_HIDDEN_SIZE
        self.gru = nn.GRU(FEATURE_SIZE, ENCODER_HIDDEN_SIZE, num_layers = 3, bidirectional = True)       
        self.linear = nn.Linear(ENCODER_HIDDEN_SIZE * 2 * 3, DECODER_HIDDEN_SIZE)
    def forward(self, ENCODER_INPUTS):
        encoder_outputs, encoder_hidden = self.gru(ENCODER_INPUTS.float())   
        hidden = torch.tanh(self.linear(torch.cat([encoder_hidden[::2,:,:], encoder_hidden[1::2,:,:]],dim=2).view(1,encoder_hidden.size(1),-1)))
        return encoder_outputs, hidden

### 生成不同encoder_hidden的alpha

class attention(nn.Module):
    def __init__(self, BATCH_SIZE, ENCODER_HIDDEN_SIZE, DECODER_HIDDEN_SIZE):
        super(attention,self).__init__()
        self.attn = nn.Linear(ENCODER_HIDDEN_SIZE*2 + DECODER_HIDDEN_SIZE, DECODER_HIDDEN_SIZE)
        self.v = nn.Parameter(torch.rand(DECODER_HIDDEN_SIZE), requires_grad = True)   
        
    def forward(self, encoder_outputs, decoder_each_hidden):
        encoder_outputs ### (time_step, batch, hidden_encoder)
        decoder_each_hidden ###(time_step, batch, hidden_decoder)
        encoder_time_step, batch_size, _ = encoder_outputs.size()
        ### linear([hidden, output]) to hidden_size, [batch, hidden] * 
        v_repeat = self.v.repeat(encoder_time_step,1)  ### [time_step, decoder_hidden_size]
        v_repeat = v_repeat.unsqueeze(1)  ### [time_step, 1, decoder_hidden_size]
        decoder_each_hidden_repeat = decoder_each_hidden.squeeze(0).repeat(INPUT_SEQ_LEN, 1, 1)
        
#         import pdb; pdb.set_trace()
        energy = torch.cat([encoder_outputs, decoder_each_hidden_repeat], dim=2)  ### (time_step, batch, 2* encoder_hidden + decoder_hidden)
        energy_attn = self.attn(energy)  ### (time_step, batch, decoder_hidden)
        
        alpha_ = torch.softmax(torch.bmm(v_repeat, energy_attn.permute(0,2,1)).squeeze(1),dim=1)
        #### [time_step, 1, decoder_hidden_size] bmm (time_step, decoder_hidden, batch) = [time_step, batch]
        return alpha_     
        
class decoder(nn.Module):
    def __init__(self, FEATURE_SIZE, DECODER_HIDDEN_SIZE,ENCODER_HIDDEN_SIZE, attention):
        super(decoder,self).__init__()
        self.gru = nn.GRU(FEATURE_SIZE+ENCODER_HIDDEN_SIZE*2, DECODER_HIDDEN_SIZE)
        self.linear = nn.Linear(DECODER_HIDDEN_SIZE, 1)
        self.attention = attention
        
    def forward(self, encoder_outputs, decoder_each_hidden, decoder_inputs):
        alpha_ = self.attention(encoder_outputs, decoder_each_hidden)  
        ### [time_step, batch] ### encoder_outputs:[time_step, batch, encoder_hidden*2]
        ### attention needs to : [batch, decoder_hidden*2]
        attention = torch.bmm(encoder_outputs.permute(1,2,0), alpha_.permute(1,0).unsqueeze(2))  
        ### alpha_ = [time_step, batch] --> [batch, time] --> [batch,time,1]
        ### encoder_outputs = [time, batch, hidden*2] --> [batch, time, hidden*2] --> [batch, hidden*2, time]
        ### attention = [batch, hidden*2, 1]
#         import pdb; pdb.set_trace()
        decoder_input = torch.cat([attention.permute(2,0,1).float(), decoder_inputs.float()],dim=2) ### decoder_inputs = [1, batch, features]
        output, hidden = self.gru(decoder_input, decoder_each_hidden)
        output = self.linear(hidden)
        return hidden, output, alpha_
        
class seq2seq(nn.Module):
    def __init__(self, encoder, decoder):
        super(seq2seq,self).__init__()
        self.encoder = encoder
        self.decoder = decoder
    def forward(self,ENCODER_INPUTS, decoder_inputs, teacher_forcing_ratio):
        decoder_time_step, batch_size, _ = decoder_inputs.size()
        encoder_time_size = ENCODER_INPUTS.size(0)
        encoder_outputs, hidden = self.encoder(ENCODER_INPUTS)
        decoder_each_hidden = hidden
        outputs = torch.zeros(decoder_time_step, batch_size,1)
        alpha_s = torch.zeros(decoder_time_step, encoder_time_size, batch_size)
        for t in range(decoder_time_step):
            hidden, output, alpha_ = self.decoder(encoder_outputs, decoder_each_hidden, decoder_inputs[t:t+1,:,:])
            alpha_s[t:t+1,:,:] = alpha_
            outputs[t:t+1,:,:] = output
            
            if t == decoder_time_step - 1:
                break
            
            if np.random.random() < teacher_forcing_ratio:
                decoder_inputs[t+1:t+2,:,:] = torch.cat([outputs[t:t+1,:,:].float(),decoder_inputs[t+1:t+2,:,1:].float()], dim=2)
        return outputs, alpha_s
        
        
# def evaluate(seq2seq, loader_test, INPUT_SEQ_LEN):
#     with torch.no_grad():
#         for times, (b_x, b_y) in enumerate(loader_train):
#             model = seq2seq
#             model.eval()
#             outputs = model(b_x.permute(1,0,2)[:INPUT_SEQ_LEN,:,:], b_x.permute(1,0,2)[INPUT_SEQ_LEN:,:,:], 1)
#         #     import pdb; pdb.set_trace()
#             loss_sum = loss(outputs, b_y[:,INPUT_SEQ_LEN:,:].squeeze(2).float())   ### lack of exp()
#     return outputs, loss_sum

In [5]:
### 构建time_step
ENCODER_HIDDEN_SIZE = 64
DECODER_HIDDEN_SIZE = 128
LAYER_NUM = 2
ENCODER_DROPOUT = 0.5
DECODER_DROPOUT = 0.5


INPUT_SEQ_LEN = 30
OUTPUT_SEQ_LEN = 14
BATCH_SIZE = 256
EPOCH_NUM = 100
CLIP = 1

FEATURE_SIZE = len(FEATURE_COLS) + len(DATE_COLS)

### train集上数据处理

sample_nums = (Xtrain.values.shape[0] - INPUT_SEQ_LEN - OUTPUT_SEQ_LEN -1)
train_set_index = [list(range(i, i + INPUT_SEQ_LEN + OUTPUT_SEQ_LEN)) for i in range(sample_nums)]
# for i in range()
encoder_data = torch.tensor(np.take(Xtrain.values, train_set_index, axis =0))
decoder_data = torch.tensor(np.take(Ytrain.values, train_set_index, axis =0))

train = Data.TensorDataset(encoder_data, decoder_data)
loader_train = Data.DataLoader(dataset = train,  batch_size = BATCH_SIZE, shuffle = True, num_workers = 4, drop_last= True )
### x,y : [batch_size, time_step, features]

def set_seed():
    SEED = 12
    np.random.seed(SEED)
    torch.manual_seed(SEED)
    torch.backends.cudnn.deterministic = True

In [6]:
### next step: 1. for all batchs; 2. encoder中将所有time_step放里生成; 3. decoder中 for every time step,将其放入其中
encoder = encoder(FEATURE_SIZE, ENCODER_HIDDEN_SIZE, DECODER_HIDDEN_SIZE)
attention = attention(BATCH_SIZE, ENCODER_HIDDEN_SIZE, DECODER_HIDDEN_SIZE)
decoder = decoder(FEATURE_SIZE, DECODER_HIDDEN_SIZE,ENCODER_HIDDEN_SIZE, attention)
seq2seq = seq2seq(encoder, decoder)

optimizer = optim.Adam(seq2seq.parameters(), lr = 0.00001)
loss = nn.MSELoss(reduction = 'sum')

def train_epoch(seq2seq, loader_train, INPUT_SEQ_LEN):
    for times, (b_x, b_y) in enumerate(loader_train):
        model = seq2seq
        model.train()
        optimizer.zero_grad()
        outputs, alpha_s = model(b_x.permute(1,0,2)[:INPUT_SEQ_LEN,:,:], b_x.permute(1,0,2)[INPUT_SEQ_LEN:,:,:], 0.5)
#         import pdb; pdb.set_trace()
        loss_sum = loss(outputs, b_y[:,INPUT_SEQ_LEN:,:].permute(1,0,2).float())   ### lack of exp()
        optimizer.step()
    return outputs, loss_sum


In [None]:
sample_nums_test = (Xtest.values.shape[0] - INPUT_SEQ_LEN - OUTPUT_SEQ_LEN -1)
train_set_index_test = [list(range(i, i + INPUT_SEQ_LEN + OUTPUT_SEQ_LEN)) for i in range(sample_nums_test)]
# for i in range()
encoder_data_test = torch.tensor(np.take(Xtest.values, train_set_index_test, axis =0))
decoder_data_test = torch.tensor(np.take(Ytest.values, train_set_index_test, axis =0))

test = Data.TensorDataset(encoder_data_test, decoder_data_test)
loader_test = Data.DataLoader(dataset = test,  batch_size = BATCH_SIZE, shuffle = True, num_workers = 4, drop_last= True )




EPOCH = 2
for epoch in range(EPOCH):
    outputs, loss_sum = train_epoch(seq2seq, loader_train, INPUT_SEQ_LEN)
    outputs_eval, loss_sum_eval = evaluate(seq2seq, loader_test, INPUT_SEQ_LEN)