In [1]:
import torch.nn as nn
import torch

In [2]:

sentences = [
    # enc_input           dec_input         dec_output
    ['ich mochte ein bier P', 'S i want a beer .', 'i want a beer . E'],
    ['ich mochte ein cola P', 'S i want a coke .', 'i want a coke . E']
]

In [3]:
sentences

[['ich mochte ein bier P', 'S i want a beer .', 'i want a beer . E'],
 ['ich mochte ein cola P', 'S i want a coke .', 'i want a coke . E']]

In [4]:

# Padding Should be Zero
source_vocab = {'P': 0, 'ich': 1, 'mochte': 2, 'ein': 3, 'bier': 4, 'cola': 5}
source_vocab_size = len(source_vocab)

target_vocab = {'P': 0, 'i': 1, 'want': 2, 'a': 3, 'beer': 4, 'coke': 5, 'S': 6, 'E': 7, '.': 8}
idx2word = {i: w for i, w in enumerate(target_vocab)}
target_vocab_size = len(target_vocab)
source_len = 5  # max length of input sequence
target_len = 6

In [7]:

def make_data(sentences):
    encoder_inputs, decoder_inputs, decoder_outputs = [], [], []
    for i in range(len(sentences)):
        encoder_input = [source_vocab[word] for word in sentences[i][0].split()]
        decoder_input = [target_vocab[word] for word in sentences[i][1].split()]
        decoder_output = [target_vocab[word] for word in sentences[i][2].split()]
        encoder_inputs.append(encoder_input)
        decoder_inputs.append(decoder_input)
        decoder_outputs.append(decoder_output)

    return torch.LongTensor(encoder_inputs), torch.LongTensor(decoder_inputs), torch.LongTensor(decoder_outputs)


In [9]:
from torch.utils import data as Data
class Seq2SeqDataset(Data.Dataset):

    def __init__(self, encoder_input, decoder_input, decoder_output):
        super(Seq2SeqDataset, self).__init__()
        self.encoder_input = encoder_input
        self.decoder_input = decoder_input
        self.decoder_output = decoder_output

    def __len__(self):
        return self.encoder_input.shape[0]

    def __getitem__(self, idx):
        return self.encoder_input[idx], self.decoder_input[idx], self.decoder_output[idx]


In [10]:
encoder_inputs, decoder_inputs, decoder_outputs = make_data(sentences)

In [11]:
encoder_inputs

tensor([[1, 2, 3, 4, 0],
        [1, 2, 3, 5, 0]])

In [12]:
decoder_inputs

tensor([[6, 1, 2, 3, 4, 8],
        [6, 1, 2, 3, 5, 8]])

In [30]:

def create_pad_mask(t, pad):
    r"""
    在Encoder中使用Mask, 是为了将Encoder_inputs中没有内容而打上PAD的部分进行Mask, 方便矩阵运算.
    在Decoder中使用Mask, 可能是在Decoder的自注意力对Decoder_inputs的PAD进行Mask,
    也有可能是对Encoder-outputs的PAD进行Mask.

    :param t: [batch_size, seq_len]
    :param pad:
    :return: [batch_size, 1, seq_len]
    """
    mask = t.data.eq(pad).unsqueeze(1)
    return mask

In [31]:


def create_target_self_mask(target_len):
    r"""
    生成上三角的矩阵

    在Decoder中使用Mask, 在Decoder的自注意力对Decoder_inputs进行Mask,
    :param target_len:
    :return:[batch, target_len, target_len]
    """
    ones = torch.ones(target_len, target_len, dtype=torch.uint8)
    self_mask = torch.triu(ones, diagonal=1).unsqueeze(0)
    return self_mask



In [32]:
encoder_mask = create_pad_mask(encoder_inputs, 0)

In [33]:
encoder_mask

tensor([[[False, False, False, False,  True]],

        [[False, False, False, False,  True]]])

In [39]:
dim_feedforward = 64

In [40]:
dim_feedforward ** 0.5

8.0