In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Run this cell to enable kaggle api if you have uploaded your kaggle credential key.

In [2]:
!mkdir ~/.kaggle
!cp ./drive/Shareddrives/ml/深度學習/kaggle.json ~/.kaggle/
!! chmod 600 ~/.kaggle/kaggle.json
!kaggle competitions download -c 111ntu-homework3
!unzip 111ntu-homework3.zip

Downloading 111ntu-homework3.zip to /content
100% 327k/327k [00:00<00:00, 951kB/s]
100% 327k/327k [00:00<00:00, 950kB/s]
Archive:  111ntu-homework3.zip
  inflating: testing_x.npy           
  inflating: training_x.npy          
  inflating: training_y.npy          


# Package

In [3]:
# Numerical Operations
import math
import numpy as np
import random

# Reading/Writing Data
import pandas as pd
import os
import csv

# for saving record
from datetime import datetime, timedelta

# Pytorch
import torch 
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split, ConcatDataset
import torch.nn.functional as F


from sklearn.model_selection import KFold

# For plotting learning curve
from torch.utils.tensorboard import SummaryWriter

import json

# Some Utilities

In [6]:
def trainer(train_loader, valid_loader, model, config, device):

    same_seed(config['seed'])
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=config['learning_rate'], betas=(0.9, 0.999), eps=1e-08, weight_decay=config['weight_decay'], amsgrad=False)
    writer = SummaryWriter() # Writer of tensoboard.

    if not os.path.isdir('./models'):
        os.mkdir('./models')

    n_epochs, best_loss, step, early_stop_count = config['n_epochs'], math.inf, 0, 0

    for epoch in range(n_epochs):
        model.train()
        loss_record = []

        for x, y in train_loader:
            optimizer.zero_grad()              
            x, y = x.to(device), y.to(device)  
            pred = model(x, y, config['teacher_forcing_ratio'])             
            loss = criterion(pred, y)
            loss.backward()                  
            optimizer.step()              
            step += 1
            loss_record.append(loss.detach().item())

        mean_train_loss = sum(loss_record)/len(loss_record)
        writer.add_scalar('Loss/train', mean_train_loss, step)

        model.eval()
        loss_record = []
        for x, y in valid_loader:
            x, y = x.to(device), y.to(device)
            with torch.no_grad():
                pred = model(x, y, 0)
                loss = criterion(pred, y)

            loss_record.append(loss.item())
            
        mean_valid_loss = sum(loss_record)/len(loss_record)
        print(f'Epoch [{epoch+1}/{n_epochs}]: Train loss: {mean_train_loss:.4f}, Valid loss: {mean_valid_loss:.4f}')
        writer.add_scalar('Loss/valid', mean_valid_loss, step)

        if mean_valid_loss < best_loss:
            best_loss = mean_valid_loss
            torch.save(model.state_dict(), config['save_path']) 
            print('Saving model with loss {:.3f}...'.format(best_loss))
            early_stop_count = 0
        else: 
            early_stop_count += 1

        if early_stop_count >= config['early_stop']:
            print('\nModel is not improving, so we halt the training session.')
            return

def same_seed(seed): 
    '''Fixes random number generator seeds for reproducibility.'''
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

def train_valid_split(data_set, valid_ratio, seed):
    '''Split provided training data into training set and validation set'''
    valid_set_size = int(valid_ratio * len(data_set)) 
    train_set_size = len(data_set) - valid_set_size
    train_set, valid_set = random_split(data_set, [train_set_size, valid_set_size], generator=torch.Generator().manual_seed(seed))
    return np.array(train_set), np.array(valid_set)

def predict(test_loader, model, device, config):
    model.eval() 
    preds = []
    for x, y in test_loader:
        x = x.to(device)
        y = y.to(device)                        
        with torch.no_grad():                
            pred = model(x, y, 0)          
            preds.append(pred.detach().cpu())
    preds = torch.cat(preds, dim=0).numpy()
    return preds

def select_feat(train_data, valid_data, test_data, select_all=True):
    '''Selects useful features to perform regression'''
    y_train, y_valid = train_data[:,-1], valid_data[:,-1]
    raw_x_train, raw_x_valid, raw_x_test = train_data[:,:-1], valid_data[:,:-1], test_data

    if select_all:
        feat_idx = list(range(raw_x_train.shape[1]))
    else:
        feat_idx =[1, 2, 5, 6, 7, 9] 
        
    return raw_x_train[:,feat_idx], raw_x_valid[:,feat_idx], raw_x_test[:,feat_idx], y_train, y_valid

# Read Dataset

In [4]:
x_training, y_training, x_testing = np.load('training_x.npy'), np.load('training_y.npy'), np.load('testing_x.npy')

# Preprocessing

In [None]:
x_training.shape, y_training.shape, x_testing.shape

((8760, 13), (8760, 1), (8760, 13))

In [5]:
timestep = 8
x_training = np.array([x_training[i:i+timestep] for i in range(x_training.shape[0] - timestep * 2)])
x_testing = np.array([x_testing[i:i+timestep] for i in range(x_testing.shape[0] - timestep * 2)])
y_training = np.array([y_training[i+timestep:i+(2 * timestep)] for i in range(y_training.shape[0] - timestep * 2)])

In [None]:
x_training.shape, y_training.shape, x_testing.shape

((8744, 8, 13), (8744, 8, 1), (8744, 8, 13))

# Build model

#### 使用 torch.nn + seq2seq + attention
mse loss: 102

In [11]:
class Encoder(nn.Module):
    def __init__(self,
                 input_size = 13,
                 hidden_size = 64,
                 num_layers = 4,
                 dropout = 0.5):
        super().__init__()
        self.rnn = nn.GRU(input_size, hidden_size, num_layers,
                           dropout=dropout, batch_first = True)

    def forward(self, x):
        output, hidden = self.rnn(x)
        return output, hidden
    
class Decoder(nn.Module):
    def __init__(self, hidden_size, num_layers=4, dropout=0.5):
        super().__init__()
        self.rnn = nn.GRU(input_size=hidden_size+1, hidden_size=hidden_size, num_layers=num_layers, dropout=dropout)
        self.fc = nn.Linear(hidden_size, 1)

        # Attention
        self.W1 = nn.Linear(hidden_size, hidden_size)
        self.W2 = nn.Linear(hidden_size, hidden_size)
        self.v = nn.Linear(hidden_size, 1)
        self.tanh = nn.Tanh()
        self.softmax = nn.Softmax(dim=1)
        self.hidden_size = hidden_size

    def forward(self, x, decoder_hidden, encoder_output):
        """
        torch.Size([2, 256, 64]) torch.Size([256, 8, 64])
        torch.Size([256, 2, 64])
        torch.Size([256, 1, 64])
        torch.Size([256, 8, 1])
        torch.Size([256, 8, 1])
        torch.Size([256, 64])
        torch.Size([256, 65])
        torch.Size([256, 1, 65])
        torch.Size([256, 1, 64]) torch.Size([2, 1, 64])
        torch.Size([256, 64])
        torch.Size([256, 1])
        """
        # Attention
        # (2, 256, 64, 256, 8, 64)
        decoder_hidden = decoder_hidden.view(decoder_hidden.shape[1], decoder_hidden.shape[0], self.hidden_size) # (256, 2, 64)
        decoder_hidden = decoder_hidden[:, -1, :].unsqueeze(1) # (256, 1, 64)
        energy = self.v(self.tanh(self.W1(encoder_output) + self.W2(decoder_hidden))) # (256, 8, 1)
        attention_weight = self.softmax(energy) # (256, 8, 1)
        context_vector = torch.sum(attention_weight * encoder_output, dim=1) # (256, 64)

        concat_input = torch.concat([x, context_vector], dim=-1) # (256, 65)
        concat_input_fit = concat_input.unsqueeze(1) # (256, 1, 65)
        output, hidden = self.rnn(concat_input_fit) # (256, 1, 64), (2, 1, 64)

        output = output.squeeze(1) # (256, 64)

        prediction = self.fc(output) # (256, 1)

        return prediction, hidden
    
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device

    def forward(self, x, y, teacher_forcing_ratio = 0.5):
        """
        teacher_forcing_ratio: probability of using groud truth instead of training output
        set teacher_forcing_ratio to 0 for testing purpose
        """
        batch_size = x.shape[0]
        target_len = y.shape[1]
        outputs = torch.zeros(y.shape).to(self.device)
        encoder_output, hidden = self.encoder(x)
        decoder_input = torch.zeros((batch_size, 1), dtype=torch.float).to(self.device)
        for i in range(target_len):
            output, hidden = self.decoder(decoder_input, hidden, encoder_output)
            outputs[:,i] = output
            teacher_forcing = random.random() < teacher_forcing_ratio
            decoder_input = y[:,i] if teacher_forcing else output

        return outputs

# Fit dataset

In [7]:
class CustomDataset(Dataset):
    def __init__(self, x, y=None):
        if y is None:
            self.y = y
        else:
            self.y = torch.FloatTensor(y)
        self.x = torch.FloatTensor(x)

    def __getitem__(self, idx):
        if self.y is None:
            return self.x[idx]
        else:
            return self.x[idx], self.y[idx]

    def __len__(self):
        return len(self.x)

In [8]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
config = {
    'seed': 175,      
    'select_all': True,   
    'valid_ratio': 0.2,  
    'n_epochs': 120,        
    'batch_size': 256, 
    'learning_rate': 1e-3,          
    'early_stop': 400,    
    'save_path': './models/b09705017_王紹安.ckpt',
    'momentum': 0.9,
    'weight_decay': 1e-6,
    'input_size': x_training.shape[2],
    'hidden_size': 64,
    'output_size': 1,
    'teacher_forcing_ratio': 0.5,
    'num_layers': 2,
    'encoder': {
        'dropout': 0.5,
    },
    'decoder': {
        'dropout': 0.5,
    }
}

In [18]:
same_seed(config['seed'])

x_train, x_valid = train_valid_split(x_training, config['valid_ratio'], config['seed'])
y_train, y_valid = train_valid_split(y_training, config['valid_ratio'], config['seed'])
x_test = x_testing
# Print out the data size.
print(f"""train_data size: {x_train.shape}
valid_data size: {x_valid.shape}
test_data size: {x_test.shape}""")

# Select features
# x_train, x_valid, x_test, y_train, y_valid = select_feat(train_data, valid_data, test_data, config['select_all'])

# Print out the number of features.
print(f'number of features: {x_train.shape[2]}')

train_dataset, valid_dataset, test_dataset = CustomDataset(x_train, y_train), \
                                            CustomDataset(x_valid, y_valid), \
                                            CustomDataset(x_test, y_training) # y_training is dummy tensor, only needs the shape of it.

# Pytorch data loader loads pytorch dataset into batches.
train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True, pin_memory=True, drop_last=False)
valid_loader = DataLoader(valid_dataset, batch_size=config['batch_size'], shuffle=True, pin_memory=True, drop_last=False)
test_loader = DataLoader(test_dataset, batch_size=config['batch_size'], shuffle=False, pin_memory=True, drop_last=False)

train_data size: (6996, 8, 13)
valid_data size: (1748, 8, 13)
test_data size: (8744, 8, 13)
number of features: 13


In [12]:
encoder = Encoder(input_size=config['input_size'], hidden_size=config['hidden_size'], num_layers=config['num_layers'], dropout=config['encoder']['dropout'])
decoder = Decoder(config['hidden_size'], num_layers=config['num_layers'], dropout=config['decoder']['dropout'])
model = Seq2Seq(encoder, decoder, device).to(device)
trainer(train_loader, valid_loader, model, config, device)

Epoch [1/120]: Train loss: 768.1933, Valid loss: 699.3330
Saving model with loss 699.333...
Epoch [2/120]: Train loss: 618.7797, Valid loss: 605.3973
Saving model with loss 605.397...
Epoch [3/120]: Train loss: 541.4111, Valid loss: 536.0429
Saving model with loss 536.043...
Epoch [4/120]: Train loss: 482.9065, Valid loss: 478.9467
Saving model with loss 478.947...
Epoch [5/120]: Train loss: 430.7972, Valid loss: 430.8225
Saving model with loss 430.823...
Epoch [6/120]: Train loss: 389.0450, Valid loss: 390.7300
Saving model with loss 390.730...
Epoch [7/120]: Train loss: 352.1788, Valid loss: 358.3255
Saving model with loss 358.326...
Epoch [8/120]: Train loss: 323.4513, Valid loss: 329.6020
Saving model with loss 329.602...
Epoch [9/120]: Train loss: 300.3755, Valid loss: 306.7133
Saving model with loss 306.713...
Epoch [10/120]: Train loss: 284.4927, Valid loss: 289.8104
Saving model with loss 289.810...
Epoch [11/120]: Train loss: 271.1350, Valid loss: 277.3538
Saving model with lo

# Re-load best weight, and predict test dataset

In [13]:
encoder = Encoder(input_size=config['input_size'], hidden_size=config['hidden_size'], num_layers=config['num_layers'], dropout=config['encoder']['dropout'])
decoder = Decoder(config['hidden_size'], num_layers=config['num_layers'], dropout=config['decoder']['dropout'])
model = Seq2Seq(encoder, decoder, device).to(device)
model.load_state_dict(torch.load(config['save_path']))
preds = predict(test_loader, model, device, config=config)

# save test prediction

In [14]:
def save_pred(preds, file):
    ''' Save predictions to specified file '''
    with open(file, 'w') as fp:
        writer = csv.writer(fp)
        writer.writerow(['INDEX', *[f'PM2.5-{i+1}' for i in range(8)]])
        for i, pred in enumerate(preds):
            # if p2 > p1:
            #     p1, p2 = p2, p1
            writer.writerow([i+1, *pred])
save_pred(preds.squeeze(), 'b09705017_王紹安.csv')

In [15]:
submit_time = (datetime.now() + timedelta(hours=8)).strftime('%m%d%H%M')
!kaggle competitions submit -c 111ntu-homework3 -f b09705017_王紹安.csv -m {submit_time}

100% 715k/715k [00:01<00:00, 678kB/s]
Successfully submitted to 111NTU Homework3

In [None]:
score = 104
if not os.path.isdir(f'./drive/Shareddrives/ml/深度學習/hw3/{submit_time}_{score}'):
    os.mkdir(f'./drive/Shareddrives/ml/深度學習/hw3/{submit_time}_{score}')
name = str(submit_time + '_' + str(score))
%notebook -e ./drive/Shareddrives/ml/深度學習/hw3/$name/b09705017_王紹安.ipynb
save_pred(preds, f'./drive/Shareddrives/ml/深度學習/hw3/{submit_time}_{score}/b09705017_王紹安.csv')
%cp ./models/b09705017_王紹安.ckpt ./drive/Shareddrives/ml/深度學習/hw3/$name/b09705017_王紹安.ckpt

### 手刻 seq2seq
mse: 136

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
config = {
    'seed': 175,      
    'select_all': True,   
    'valid_ratio': 0.2,  
    'n_epochs': 800,        
    'batch_size': 512, 
    'learning_rate': 1e-5,          
    'early_stop': 400,    
    'save_path': './models/b09705017_王紹安.ckpt',
    'momentum': 0.9,
    'weight_decay': 1e-5,
    'input_size': x_training.shape[2],
    'hidden_size': 64,
    'output_size': 1,
    'teacher_forcing_ratio': 0.5,
    'num_layers': 2,
    'encoder': {
        'dropout': 0.5,
    },
    'decoder': {
        'dropout': 0.5,
    }
}

In [None]:
class MyLinear(nn.Module):
    """ Custom Linear layer but mimics a standard linear layer """
    def __init__(self, size_in, size_out):
        super().__init__()
        self.size_in, self.size_out = size_in, size_out
        weights = torch.Tensor(size_out, size_in).to('cuda')
        self.weights = nn.Parameter(weights)  # nn.Parameter is a Tensor that's a module parameter.
        bias = torch.Tensor(size_out).to('cuda')
        self.bias = nn.Parameter(bias)

        # initialize weights and biases
        nn.init.kaiming_uniform_(self.weights, a=math.sqrt(5)) # weight init
        fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weights)
        bound = 1 / math.sqrt(fan_in)
        nn.init.uniform_(self.bias, -bound, bound)  # bias init

    def forward(self, x):
        w_times_x= torch.mm(x, self.weights.t())
        return torch.add(w_times_x, self.bias)  # w times x + b

class Encoder(nn.Module):
    def __init__(self,
                 input_size = 13,
                 hidden_size = 64,
                 num_layers = 4,
                 dropout = 0.5):
        super().__init__()
        self.linear1 = MyLinear(input_size, 512)
        self.linear2 = MyLinear(512, hidden_size)
        self.relu1 = nn.ReLU()
        self.relu2 = nn.ReLU()
        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.25)
        self.hidden_size = hidden_size


    def forward(self, x):
        outputs = torch.zeros((8, x.shape[0], self.hidden_size)).to('cuda')
        for i, data in enumerate(x.permute(1, 0, 2)):
            output = self.linear1(data)
            output = self.dropout1(output)
            output = self.relu1(output)

            output = self.linear2(output)
            output = self.dropout2(output)
            output = self.relu2(output)

            outputs[i, :, :] = output
        return outputs

class Decoder(nn.Module):
    def __init__(self, hidden_size, output_size):
        super().__init__()
        self.linear = MyLinear(hidden_size, int(hidden_size / 2))
        self.dropout1 = nn.Dropout(0.25)
        self.linear2 = MyLinear(int(hidden_size / 2), output_size)
        self.dropout2 = nn.Dropout(0.25)
        self.relu1 = nn.ReLU()
        self.relu2 = nn.ReLU()


    def forward(self, x, y, teacher_forcing):
        outputs = torch.zeros((8, x.shape[1], 1)).to('cuda')
        for i, data in enumerate(x):
            output = self.linear(data)
            output = self.dropout1(output)
            output = self.relu1(output)
            output = self.linear2(output)
            output = self.dropout2(output)
            output = self.relu2(output)
            outputs[i, :, :] = output
        return outputs

class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device

    def forward(self, x, y, teacher_forcing=0):
        target_len = y.shape[1]
        encoder_output = self.encoder(x)
        decoder_output = self.decoder(encoder_output, y, teacher_forcing)
        decoder_output = decoder_output.permute(1, 0, 2)
        return decoder_output

In [None]:
same_seed(config['seed'])
encoder = Encoder(config['input_size'], config['hidden_size'])
decoder = Decoder(config['hidden_size'], config['output_size'])
model = Seq2Seq(encoder, decoder, device).to(device)
trainer(train_loader, valid_loader, model, config, device)

Epoch [1/800]: Train loss: 872.9366, Valid loss: 919.1982
Saving model with loss 919.198...
Epoch [2/800]: Train loss: 862.9677, Valid loss: 916.3078
Saving model with loss 916.308...
Epoch [3/800]: Train loss: 851.2251, Valid loss: 884.7419
Saving model with loss 884.742...
Epoch [4/800]: Train loss: 833.2215, Valid loss: 855.3817
Saving model with loss 855.382...
Epoch [5/800]: Train loss: 815.6066, Valid loss: 844.1859
Saving model with loss 844.186...
Epoch [6/800]: Train loss: 793.5525, Valid loss: 829.0648
Saving model with loss 829.065...
Epoch [7/800]: Train loss: 769.3176, Valid loss: 776.6666
Saving model with loss 776.667...
Epoch [8/800]: Train loss: 749.1922, Valid loss: 752.3290
Saving model with loss 752.329...
Epoch [9/800]: Train loss: 729.0294, Valid loss: 724.1123
Saving model with loss 724.112...
Epoch [10/800]: Train loss: 704.2593, Valid loss: 705.8696
Saving model with loss 705.870...
Epoch [11/800]: Train loss: 679.7728, Valid loss: 679.9767
Saving model with lo

In [None]:
encoder = Encoder(config['input_size'], config['hidden_size'])
decoder = Decoder(config['hidden_size'], config['output_size'])
model = Seq2Seq(encoder, decoder, device).to(device)
model.load_state_dict(torch.load(config['save_path']))
preds = predict(test_loader, model, device, config=config)
save_pred(preds.squeeze(), 'b09705017_王紹安.csv')
submit_time = (datetime.now() + timedelta(hours=8)).strftime('%m%d%H%M')
!kaggle competitions submit -c 111ntu-homework3 -f b09705017_王紹安.csv -m {submit_time}

100% 720k/720k [00:03<00:00, 187kB/s]
Successfully submitted to 111NTU Homework3

### 手刻 Seq2Seq + attention
mse: 104

In [20]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
config = {
    'seed': 175,      
    'select_all': True,   
    'valid_ratio': 0.2,  
    'n_epochs': 120,        
    'batch_size': 512, 
    'learning_rate': 1e-3,          
    'early_stop': 400,    
    'save_path': './models/b09705017_王紹安.ckpt',
    'momentum': 0.9,
    'weight_decay': 1e-5,
    'input_size': x_training.shape[2],
    'hidden_size': 64,
    'output_size': 1,
    'teacher_forcing_ratio': 0.5,
    'num_layers': 2,
    'encoder': {
        'dropout': 0.5,
    },
    'decoder': {
        'dropout': 0.5,
    }
}

In [23]:
class MyLinear(nn.Module):
    """ Custom Linear layer but mimics a standard linear layer """
    def __init__(self, size_in, size_out):
        super().__init__()
        self.size_in, self.size_out = size_in, size_out
        weights = torch.Tensor(size_out, size_in).to('cuda')
        self.weights = nn.Parameter(weights)  # nn.Parameter is a Tensor that's a module parameter.
        bias = torch.Tensor(size_out).to('cuda')
        self.bias = nn.Parameter(bias)

        # initialize weights and biases
        nn.init.kaiming_uniform_(self.weights, a=math.sqrt(5)) # weight init
        fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weights)
        bound = 1 / math.sqrt(fan_in)
        nn.init.uniform_(self.bias, -bound, bound)  # bias init

    def forward(self, x):
        w_times_x = torch.matmul(x, self.weights.t())
        return torch.add(w_times_x, self.bias)  # w times x + b


class Encoder(nn.Module):
    def __init__(self,
                 input_size = 13,
                 hidden_size = 64,
                 num_layers = 4,
                 dropout = 0.5):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.rnn = MyRNN(input_size, hidden_size, num_chunks=num_layers, device='cuda')

    def forward(self, x):
        hidden = torch.zeros((x.shape[0], self.hidden_size)).to('cuda')
        outputs = torch.zeros((x.shape[1], x.shape[0], self.hidden_size)).to('cuda')
        for i, data in enumerate(x.permute(1, 0, 2)):
            hidden = self.rnn(data, hidden)
            outputs[i, :, :] = hidden
        return outputs, hidden
    
class Decoder(nn.Module):
    def __init__(self, hidden_size, num_layers=4, dropout=0.5):
        super().__init__()
        self.rnn = MyRNN(input_size=hidden_size+1, hidden_size=hidden_size, num_chunks=num_layers, device='cuda')
        self.fc = MyLinear(hidden_size, 1)

        # Attention
        self.W1 = MyLinear(hidden_size, hidden_size)
        self.W2 = MyLinear(hidden_size, hidden_size)
        self.v = MyLinear(hidden_size, 1)
        self.softmax = nn.Softmax(dim=1)
        self.hidden_size = hidden_size
        self.tanh = nn.Tanh()


    def forward(self, x, decoder_hidden, encoder_output):
        # Attention
        # (2, 256, 64, 256, 8, 64)
        decoder_hidden = decoder_hidden.unsqueeze(0)
        decoder_hidden = decoder_hidden.view(decoder_hidden.shape[1], decoder_hidden.shape[0], self.hidden_size) 
        decoder_hidden = decoder_hidden[:, -1, :].unsqueeze(1) 
        energy = self.v(self.tanh(self.W1(encoder_output) + self.W2(decoder_hidden)))

        attention_weight = self.softmax(energy) 
        context_vector = torch.sum(attention_weight * encoder_output, dim=1) 

        concat_input = torch.concat([x, context_vector], dim=-1)
        concat_input_fit = concat_input.unsqueeze(1).permute(1, 0, 2) 

        decoder_hidden = decoder_hidden.permute(1, 0, 2)
        for i, (data, hidden) in enumerate(zip(concat_input_fit, decoder_hidden)):
            hidden = self.rnn(data, hidden) 
            output = hidden

        output = output.squeeze(1) 

        prediction = self.fc(output) 

        return prediction, hidden
    
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device

    def forward(self, x, y, teacher_forcing_ratio = 0.5):
        """
        teacher_forcing_ratio: probability of using groud truth instead of training output
        set teacher_forcing_ratio to 0 for testing purpose
        """
        batch_size = x.shape[0]
        target_len = y.shape[1]
        outputs = torch.zeros(y.shape).to(self.device)
        encoder_output, hidden = self.encoder(x)
        encoder_output = encoder_output.permute(1, 0, 2)
        decoder_input = torch.zeros((batch_size, 1), dtype=torch.float).to(self.device)
        for i in range(target_len):
            output, hidden = self.decoder(decoder_input, hidden, encoder_output)
            outputs[:,i] = output
            teacher_forcing = random.random() < teacher_forcing_ratio
            decoder_input = y[:,i] if teacher_forcing else output

        return outputs

class MyRNN(nn.Module):
    def __init__(self, input_size: int, hidden_size: int, num_chunks: int, device: str):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.device = device
        self.weight_ih = nn.Parameter(torch.empty((self.hidden_size, input_size), device=device))
        self.weight_hh = nn.Parameter(torch.empty((self.hidden_size, hidden_size), device=device))

        self.bias_ih = nn.Parameter(torch.empty(self.hidden_size, device=device))
        self.bias_hh = nn.Parameter(torch.empty(self.hidden_size, device=device))

        self.tanh = nn.Tanh()
        bound = 1.0 / math.sqrt(hidden_size)

        for weight in self.parameters():
            nn.init.uniform_(weight, -bound, bound)

    def forward(self, x, hidden=None):
        a = torch.add(torch.mm(x, self.weight_ih.t()), self.bias_ih)
        b = torch.add(torch.mm(hidden, self.weight_hh.t()), self.bias_hh)
        h = self.tanh(torch.add(a, b))
        return h


In [24]:
encoder = Encoder(input_size=config['input_size'], hidden_size=config['hidden_size'], num_layers=config['num_layers'], dropout=config['encoder']['dropout'])
decoder = Decoder(config['hidden_size'], num_layers=config['num_layers'], dropout=config['decoder']['dropout'])
model = Seq2Seq(encoder, decoder, device).to(device)
trainer(train_loader, valid_loader, model, config, device)

Epoch [1/120]: Train loss: 833.3227, Valid loss: 768.1931
Saving model with loss 768.193...
Epoch [2/120]: Train loss: 723.1583, Valid loss: 703.9720
Saving model with loss 703.972...
Epoch [3/120]: Train loss: 657.5162, Valid loss: 650.2147
Saving model with loss 650.215...
Epoch [4/120]: Train loss: 607.2661, Valid loss: 606.6707
Saving model with loss 606.671...
Epoch [5/120]: Train loss: 568.1829, Valid loss: 582.6170
Saving model with loss 582.617...
Epoch [6/120]: Train loss: 532.5788, Valid loss: 558.3920
Saving model with loss 558.392...
Epoch [7/120]: Train loss: 499.1891, Valid loss: 505.5654
Saving model with loss 505.565...
Epoch [8/120]: Train loss: 472.5276, Valid loss: 481.7463
Saving model with loss 481.746...
Epoch [9/120]: Train loss: 447.6411, Valid loss: 454.6034
Saving model with loss 454.603...
Epoch [10/120]: Train loss: 422.4062, Valid loss: 437.2215
Saving model with loss 437.222...
Epoch [11/120]: Train loss: 400.0846, Valid loss: 418.0502
Saving model with lo

In [25]:
encoder = Encoder(input_size=config['input_size'], hidden_size=config['hidden_size'], num_layers=config['num_layers'], dropout=config['encoder']['dropout'])
decoder = Decoder(config['hidden_size'], num_layers=config['num_layers'], dropout=config['decoder']['dropout'])
model = Seq2Seq(encoder, decoder, device).to(device)
model.load_state_dict(torch.load(config['save_path']))
preds = predict(test_loader, model, device, config=config)
save_pred(preds.squeeze(), 'b09705017_王紹安.csv')
submit_time = (datetime.now() + timedelta(hours=8)).strftime('%m%d%H%M')
!kaggle competitions submit -c 111ntu-homework3 -f b09705017_王紹安.csv -m {submit_time}

100% 715k/715k [00:01<00:00, 612kB/s]
Successfully submitted to 111NTU Homework3