In [6]:
import torch
import numpy as np
from torch.utils.data import Dataset, Sampler, DataLoader, WeightedRandomSampler, RandomSampler


class TimeSeriesDataSet(Dataset):
    def __init__(self, data_path, mode='train'):
        if mode == 'train':
            self.encoder_inputs = np.load(data_path + 'train_encoder_inputs_exog.npy')
            self.decoder_targets = np.load(data_path + 'train_decoder_targets_exog.npy')
        else:
            self.encoder_inputs = np.load(data_path + 'test_encoder_inputs_exog.npy')
            self.decoder_targets = np.load(data_path + 'test_decoder_targets_exog.npy')

    def __len__(self):
        return len(self.encoder_inputs)

    def __getitem__(self, item):
        return item, self.encoder_inputs[item, :], self.decoder_targets[item, :]

In [7]:
import torch.nn as nn
import torch
import torch.nn.functional as F
import numpy as np
class DenseConv(nn.Module):
    def __init__(self, input_channel_size, output_channel_size):
        super(DenseConv, self).__init__()
    
        self.conv1 = nn.Conv1d(in_channels = input_channel_size, out_channels = output_channel_size, kernel_size = 1, stride =1)
        self.relu = nn.ReLU()
    def forward(self, inputs):
        conv1d_outputs = self.relu(self.conv1(inputs))
        return conv1d_outputs
    
class CausalDilatedConv(nn.Module):
    def __init__(self, input_channel_size, output_channel_size, kernel_size=2, dilation=1):
        super(CausalDilatedConv, self).__init__()
        self.padding = (kernel_size-1)*dilation
        self.CausalDilatedConv = nn.Conv1d(in_channels = input_channel_size, out_channels = output_channel_size, \
                                    kernel_size = kernel_size, stride =1, padding = self.padding, dilation= dilation)
    def forward(self, inputs):
#         import pdb;pdb.set_trace()
        causal_conv1d_outputs = self.CausalDilatedConv(inputs)[:,:,:-self.padding]
        return causal_conv1d_outputs
        
class TemporalConv(nn.Module):
    def __init__(self, input_channel_size, output_channel_size, intermediate_channel_size, kernel_size, dilation):
        super(TemporalConv, self).__init__()
        self.conv_dense_pre = DenseConv(input_channel_size, output_channel_size)
        self.filter = CausalDilatedConv(output_channel_size, intermediate_channel_size, kernel_size, dilation)  
        self.gate = CausalDilatedConv(output_channel_size, intermediate_channel_size, kernel_size, dilation) 
        self.conv_dense_post = DenseConv(intermediate_channel_size, output_channel_size)
        self.tanh = nn.Tanh()
        self.relu = nn.ReLU()
    def forward(self, inputs):
        outputs_pre = self.conv_dense_pre(inputs)
        outputs_filter = self.filter(outputs_pre)
        outputs_gate = self.filter(outputs_pre)
        outputs_post = self.tanh(outputs_filter) * self.tanh(outputs_gate)
        outputs_post = self.conv_dense_post(outputs_post)
        outputs = outputs_pre + outputs_post
        return outputs, outputs_post
        
class WaveNet(nn.Module):
    def __init__(self, encoder_seq_len, decoder_seq_len, input_channel_size, output_channel_size, \
                 intermediate_channel_size,kernel_size, dilation_list,post_channel_size,dropout, device):
        super(WaveNet, self).__init__()
        self.decoder_seq_len = decoder_seq_len
        self.encoder_seq_len = encoder_seq_len
        self.device = device
        ### N层temporalConv，需要将其size写清楚：
        self.tcn_list = nn.ModuleList([TemporalConv(input_channel_size, output_channel_size, \
                                               intermediate_channel_size, kernel_size, dilation=1)])
        for dilation in dilation_list[1:]:
            self.tcn_list.append(TemporalConv(output_channel_size, output_channel_size, \
                                               intermediate_channel_size, kernel_size, dilation))
        self.conv_post = nn.Sequential(DenseConv(output_channel_size, post_channel_size),
                                      nn.Dropout(0.2),
                                      DenseConv(post_channel_size,1))
            
    def forward_t(self,inputs):
        outputs_post_list = []
        for tcn in self.tcn_list:
            outputs, outputs_post = tcn(inputs)
            inputs = outputs
            outputs_post_list.append(outputs_post)
        sum_outputs_post = sum(outputs_post_list)
        outputs = self.conv_post(sum_outputs_post)
        return outputs    
        
    def forward(self, inputs, teacher_forcing_ratio):
        if teacher_forcing_ratio == 1:
            outputs = self.forward_t(inputs).permute(0,2,1)
            return outputs[:, -self.decoder_seq_len:, :]
        
        else:
            batch_size = inputs.size(0)
            decoder_inputs = inputs[:, :, :-self.decoder_seq_len+1].clone()
            decoder_outputs = torch.zeros(batch_size, self.decoder_seq_len, device = self.device)
            
            for i in range(self.decoder_seq_len):
                outputs = self.forward_t(decoder_inputs)[:,:,-1:] ### [129, 22, decoder_size]
                decoder_exog = inputs[:,1:,-self.decoder_seq_len+1+i].unsqueeze(2) ### [129, 22-1, decoder_size]                
                decoder_exog = torch.cat([outputs,decoder_exog],dim=1)
                decoder_inputs = torch.cat([decoder_inputs[:,:,1:], decoder_exog], dim =2)
                decoder_outputs[:,i] = outputs.squeeze(2).squeeze(1)
            return decoder_outputs.unsqueeze(2)

In [8]:
data_path = 'data/'
DATA_PATH = 'data/'
ENCODER_SEQ_LEN = 430
DECODER_SEQ_LEN = 60
BATCH_SIZE = 128
BATCH_SIZE_TEST = 1024
EPOCH_NUM = 100
LEARNING_RATE = 0.001
INPUT_CHANNEL_SIZE = 22
OUTPUT_CHANNEL_SIZE = 32
INTERMEDIATE_CHANNEL_SIZE = 32
POST_CHANNEL_SIZE = 128
HIDDEN_SIZE = 128
DROPOUT = 0.2
KERNEL_SIZE = 2
DILATION_LIST = [1, 2, 4, 8]


device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
# device = torch.device('cpu')

train_dataset = TimeSeriesDataSet(data_path,mode='train')
test_dataset = TimeSeriesDataSet(data_path,mode='test')
trian_sampler = RandomSampler(train_dataset)
test_sampler = RandomSampler(test_dataset)

train_loader = DataLoader(train_dataset, batch_size = BATCH_SIZE, sampler = trian_sampler, drop_last = True)
test_loader = DataLoader(test_dataset, batch_size = BATCH_SIZE, sampler = test_sampler, drop_last = True)

In [None]:
import torch.optim as optim
seq2seq = WaveNet(ENCODER_SEQ_LEN, DECODER_SEQ_LEN, INPUT_CHANNEL_SIZE, OUTPUT_CHANNEL_SIZE,
                      INTERMEDIATE_CHANNEL_SIZE, KERNEL_SIZE, DILATION_LIST, POST_CHANNEL_SIZE, DROPOUT, device).to(device)


loss_func = nn.L1Loss(reduction = 'elementwise_mean')
optimize = optim.Adam(seq2seq.parameters(), lr = LEARNING_RATE, )

def train_epoch(model, train_loader, loss_func, optimize):
    model.train()
    epoch_loss, total_num = 0, 0 
    for step, (index,train_features, train_label) in enumerate(train_loader):
        optimize.zero_grad()
        train_features = train_features.float().to(device).permute(0,2,1)
        outputs = model(train_features, teacher_forcing_ratio=1)
#         import pdb; pdb.set_trace()
        loss = loss_func(outputs.contiguous().view(-1), train_label.view(-1).float())
        loss.backward()
        optimize.step()
#         import pdb; pdb.set_trace()
        epoch_loss =epoch_loss + loss.item() * outputs.contiguous().view(-1).size(0)
        total_num = total_num + outputs.contiguous().view(-1).size(0)
    return epoch_loss/total_num
        
def test_epoch(model, test_loader,loss_func):
    model.eval()
    epoch_loss_test, total_num = 0 , 0
    for step, (index,test_features, test_label) in enumerate(test_loader):
#         import pdb; pdb.set_trace()
        outputs = model(test_features.permute(0,2,1).float().to(device), teacher_forcing_ratio = 0)
        loss = loss_func(outputs.contiguous().view(-1), test_label.view(-1).float().to(device))
        epoch_loss_test = epoch_loss_test + loss.item() * outputs.contiguous().view(-1).size(0)
        total_num = total_num + outputs.contiguous().view(-1).size(0)
    return epoch_loss_test/total_num
    
    
for i in range(EPOCH_NUM):
    train_loss = train_epoch(seq2seq, train_loader, loss_func, optimize)
    test_loss = test_epoch(seq2seq, test_loader,loss_func)
    print('train_loss', train_loss)
    print('test_loss', test_loss)
    

train_loss 0.5201876596097024
test_loss 0.5270009463833224
train_loss 0.41802334112505757
test_loss 0.49892126264110687
train_loss 0.3961267855859572
test_loss 0.4927492930043128
train_loss 0.3929706629245512
test_loss 0.4808902346318768
train_loss 0.3924235941902284
test_loss 0.48891660090415706
train_loss 0.39163795209700064
test_loss 0.47565070756020084
train_loss 0.3910533026341469
test_loss 0.4667402265533324
train_loss 0.3903825782960461
test_loss 0.47121471839566387
train_loss 0.39026661938236606
test_loss 0.4697215989712746


$ math::
    \ell(x, y) = \begin{cases}
        \operatorname{mean}(L), & \text{if}\; \text{size_average} = \text{True},\\
        \operatorname{sum}(L),  & \text{if}\; \text{size_average} = \text{False}.
    \end{cases}$

In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn.utils import clip_grad_norm_
from torch.utils.data import DataLoader, RandomSampler


import numpy as np
from time import time
from functools import partial



def train_epoch(model, data_loader, optimizer, loss_func, device):
    model.train()

    epoch_loss, total_num = 0, 0
    for i, (_, encoder_inputs, decoder_targets) in enumerate(data_loader):
        # encoder_inputs: (batch, 1, total_sequence)
        # decoder_targets: (batch, decoder_sequence, 1)

        optimizer.zero_grad()

        encoder_inputs = encoder_inputs.float().to(device).permute(0, 2, 1)
        decoder_targets = decoder_targets.float().to(device)

        model_outputs = model(encoder_inputs, teacher_forcing_ratio=1)
#         import pdb; pdb.set_trace()
        loss, cnt = loss_func(model_outputs.contiguous().view(-1), decoder_targets.view(-1))
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item() * cnt
        total_num += cnt

    return epoch_loss / total_num


def evaluate_epoch(model, data_loader, loss_func, device):
    model.eval()

    epoch_loss, total_num = 0, 0
    with torch.no_grad():
        for i, (_, encoder_inputs, decoder_targets) in enumerate(data_loader):
            # encoder_inputs: (batch, 1, encoder_sequence)
            # decoder_targets: (batch, decoder_sequence, 1)

            encoder_inputs = encoder_inputs.float().to(device).permute(0, 2, 1)
            decoder_targets = decoder_targets.float().to(device)

            model_outputs = model(encoder_inputs, teacher_forcing_ratio=0)
            loss, cnt = loss_func(model_outputs.contiguous().view(-1), decoder_targets.view(-1))
            epoch_loss += loss.item() * cnt
            total_num += cnt

    return epoch_loss / total_num


def mae_loss(outputs, targets, reduction='elementwise_mean'):
    mae = nn.L1Loss(reduction=reduction)
    return mae(outputs, targets), outputs.numel()


def save_embedding(target):
    embedding_weights = eval('seq2seq.embedding_' + target + '.weight.data.cpu().numpy()')
    embedding_index = eval('train_set.' + target + '_arr')
    embedding = np.concatenate([embedding_index.reshape((-1, 1)), embedding_weights], axis=1)
    np.save('model/' + target + '_embedding.npy', embedding)


def init_weights(m):
    for name, param in m.named_parameters():
        if len(param.data.size()) > 1:
            nn.init.kaiming_normal_(param.data)


def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs


def set_seed():
    SEED = 1
    np.random.seed(SEED)
    torch.manual_seed(SEED)
    torch.backends.cudnn.deterministic = True




if __name__ == '__main__':
    DATA_PATH = 'data/'
    ENCODER_SEQ_LEN = 430
    DECODER_SEQ_LEN = 60
    BATCH_SIZE = 128
    BATCH_SIZE_TEST = 1024
    EPOCH_NUM = 100
    LEARNING_RATE = 0.001
    INPUT_CHANNEL_SIZE = 22
    OUTPUT_CHANNEL_SIZE = 32
    INTERMEDIATE_CHANNEL_SIZE = 38
    POST_CHANNEL_SIZE = 142
    HIDDEN_SIZE = 128
    DROPOUT = 0.2
    KERNEL_SIZE = 2
    DILATION_LIST = [1, 2, 4, 8]

    set_seed()

    # device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    device = torch.device('cpu')

    train_set = TimeSeriesDataSet(DATA_PATH, mode='train')
    test_set = TimeSeriesDataSet(DATA_PATH, mode='test')
    train_sampler = RandomSampler(train_set)
    test_sampler = RandomSampler(test_set)
    train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, sampler=train_sampler, num_workers=0)
    test_loader = DataLoader(test_set, batch_size=BATCH_SIZE_TEST, sampler=test_sampler, num_workers=0)

    seq2seq = WaveNet(ENCODER_SEQ_LEN, DECODER_SEQ_LEN, INPUT_CHANNEL_SIZE, OUTPUT_CHANNEL_SIZE,
                      INTERMEDIATE_CHANNEL_SIZE, KERNEL_SIZE, DILATION_LIST, POST_CHANNEL_SIZE, DROPOUT, device).to(device)

    seq2seq.apply(init_weights)

    optimizer = optim.Adam(seq2seq.parameters(), lr=LEARNING_RATE)

    min_val_loss = float('inf')
    not_descending_cnt = 0
    for epoch in range(EPOCH_NUM):

        start_time = time()

        train_loss = train_epoch(seq2seq, train_loader, optimizer, mae_loss, device)

#         val_loss = evaluate_epoch(seq2seq, test_loader, mae_loss, device)

#         end_time = time()

#         epoch_mins, epoch_secs = epoch_time(start_time, end_time)

#         print('Epoch: %s | Time: %sm %ss' % (str(epoch + 1).zfill(2), epoch_mins, epoch_secs))
#         print('\tTrain Loss: %.3f | Val Loss: %.3f' % (train_loss, val_loss))

#         if val_loss >= min_val_loss:
#             not_descending_cnt += 1
#             if not_descending_cnt >= 20 and epoch >= 19 and epoch != EPOCH_NUM - 1:
#                 print('Early Stopped ...')
#                 break
#         else:
#             not_descending_cnt = 0
#             if epoch >= 2:
#                 min_val_loss = val_loss
#                 torch.save(seq2seq.state_dict(), 'model/wavenet_model.pt')
#                 print()
#                 print('model saved with validation loss', val_loss)
#                 print()

In [None]:
import das.udf.estimator as de
u_job= de.Udf(entry_point = '/mnt/xiaoxiao10/pytorch/wavenet_all/train_evaluate.py', \
              image_name = 'repo.jd.local/public/notebook:nb5.5-pytorch0.4-py3-gpu', \
              train_gpu_count = 1)
u_job.fit(base_job_name='pytorch-ts')