In [96]:
import numpy as np
import torch
import torch.nn as nn
from torch.autograd import Variable

In [77]:
sent_1_x = ['is', 'it', 'too', 'late', 'now', 'say', 'sorry']
sent_1_y = ['VB', 'PRP', 'RB', 'RB', 'RB', 'VB', 'JJ']

sent_2_x = ['ooh', 'ooh']
sent_2_y = ['NNP', 'NNP']

sent_3_x = ['sorry', 'yeah']
sent_3_y = ['JJ', 'NNP']

X = [sent_1_x, sent_2_x, sent_3_x]
Y = [sent_1_y, sent_2_y, sent_3_y]

In [78]:
X

[['is', 'it', 'too', 'late', 'now', 'say', 'sorry'],
 ['ooh', 'ooh'],
 ['sorry', 'yeah']]

In [79]:
Y

[['VB', 'PRP', 'RB', 'RB', 'RB', 'VB', 'JJ'], ['NNP', 'NNP'], ['JJ', 'NNP']]

In [80]:
# map sentences to vocab
vocab = {'': 0, 'is': 1, 'it': 2, 'too': 3, 'late': 4, 'now': 5, 'say': 6, 'sorry': 7, 'ooh': 8, 'yeah': 9} 

# fancy nested list comprehension
X =  [[vocab[word] for word in sentence] for sentence in X]

In [81]:
X

[[1, 2, 3, 4, 5, 6, 7], [8, 8], [7, 9]]

In [82]:
# get the length of each sentence
X_lengths = [len(sentence) for sentence in X]
X_lengths

[7, 2, 2]

In [104]:
# create an empty matrix with padding tokens
pad_token = vocab['']
longest_sent = max(X_lengths)
batch_size = len(X)
padded_X = np.ones((batch_size, longest_sent), dtype=int) * pad_token

In [105]:
padded_X

array([[0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0]])

In [106]:
# copy over the actual sequences
for i, x_len in enumerate(X_lengths):
  sequence = X[i]
  padded_X[i, 0:x_len] = sequence[:x_len]

In [107]:
padded_X

array([[1, 2, 3, 4, 5, 6, 7],
       [8, 8, 0, 0, 0, 0, 0],
       [7, 9, 0, 0, 0, 0, 0]])

In [108]:
import numpy as np

tags = {'': 0, 'VB': 1, 'PRP': 2, 'RB': 3, 'JJ': 4, 'NNP': 5}

# fancy nested list comprehension
Y =  [[tags[tag] for tag in sentence] for sentence in Y]

# Y now looks like:
# [[1, 2, 3, 3, 3, 1, 4], [5, 5], [4, 5]]

# get the length of each sentence
Y_lengths = [len(sentence) for sentence in Y]

# create an empty matrix with padding tokens
pad_token = tags['']
longest_sent = max(Y_lengths)
batch_size = len(Y)
padded_Y = np.ones((batch_size, longest_sent)) * pad_token

# copy over the actual sequences
for i, y_len in enumerate(Y_lengths):
  sequence = Y[i]
  padded_Y[i, 0:y_len] = sequence[:y_len]

# # padded_Y looks like:
# array([[ 1.,  2.,  3.,  3.,  3.,  1.,  4.],
#        [ 5.,  5.,  0.,  0.,  0.,  0.,  0.],
#        [ 4.,  5.,  0.,  0.,  0.,  0.,  0.]])

KeyError: 1

In [109]:
padded_Y

array([[1., 2., 3., 3., 3., 1., 4.],
       [5., 5., 0., 0., 0., 0., 0.],
       [4., 5., 0., 0., 0., 0., 0.]])

In [110]:
word_embedding = torch.nn.Embedding(
            num_embeddings=len(vocab),
            embedding_dim=3,
            padding_idx=pad_token
        )

In [117]:
cuda = torch.cuda.is_available()
device = torch.device("cuda" if cuda else "cpu")
# device='cpu'

In [118]:
X_tensor = torch.from_numpy(padded_X)#.to(device)
X_lengths_tensor = torch.from_numpy(np.array(X_lengths))#.to(device)

# # batch_size, seq_len, _ = padded_X.size()
# X_embed = word_embedding(X_tensor)

# X_pack_padded = torch.nn.utils.rnn.pack_padded_sequence(X_embed, np.array(X_lengths), batch_first=True)

In [119]:
X_tensor

tensor([[1, 2, 3, 4, 5, 6, 7],
        [8, 8, 0, 0, 0, 0, 0],
        [7, 9, 0, 0, 0, 0, 0]])

In [120]:
X_lengths_tensor

tensor([7, 2, 2])

In [121]:
X_embed = word_embedding(X_tensor)

In [124]:
X_embed

tensor([[[-0.1799,  0.3167,  1.1528],
         [-1.1772, -0.5380,  0.8375],
         [-0.4743,  1.3381,  1.4349],
         [ 1.6677,  0.6381, -0.3952],
         [ 1.6498,  0.5117, -0.9820],
         [-0.4703,  1.4595, -0.3970],
         [-1.1577,  0.4299, -0.2120]],

        [[ 0.6767, -1.2970, -1.3783],
         [ 0.6767, -1.2970, -1.3783],
         [ 0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000]],

        [[-1.1577,  0.4299, -0.2120],
         [ 0.6238,  0.2805,  0.9411],
         [ 0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000]]], grad_fn=<EmbeddingBackward>)

In [122]:
X_pack_padded = torch.nn.utils.rnn.pack_padded_sequence(X_embed, np.array(X_lengths), batch_first=True)

In [123]:
X_pack_padded

PackedSequence(data=tensor([[-0.1799,  0.3167,  1.1528],
        [ 0.6767, -1.2970, -1.3783],
        [-1.1577,  0.4299, -0.2120],
        [-1.1772, -0.5380,  0.8375],
        [ 0.6767, -1.2970, -1.3783],
        [ 0.6238,  0.2805,  0.9411],
        [-0.4743,  1.3381,  1.4349],
        [ 1.6677,  0.6381, -0.3952],
        [ 1.6498,  0.5117, -0.9820],
        [-0.4703,  1.4595, -0.3970],
        [-1.1577,  0.4299, -0.2120]], grad_fn=<PackPaddedSequenceBackward>), batch_sizes=tensor([3, 3, 1, 1, 1, 1, 1]), sorted_indices=None, unsorted_indices=None)

In [128]:
batch_size = 3
max_length = 3  #longest_sent  # 7  이걸로 하면 에러난다.
hidden_size = 3
n_layers =1

# initialize
rnn = nn.RNN(max_length, hidden_size, n_layers, batch_first=True)     # (중요) batch_first=True
h0 = Variable(torch.randn(n_layers, batch_size, hidden_size))

#forward 
out, _ = rnn(X_pack_padded, h0)

In [129]:
out

PackedSequence(data=tensor([[ 0.8422, -0.5469,  0.6166],
        [ 0.2324, -0.7649,  0.8579],
        [ 0.8054, -0.9036,  0.7375],
        [ 0.7346, -0.7846,  0.7465],
        [ 0.0186, -0.8162,  0.7977],
        [ 0.3019, -0.8325,  0.2773],
        [ 0.7203, -0.9097,  0.1353],
        [-0.2071, -0.8956,  0.0162],
        [ 0.0606, -0.8438,  0.4753],
        [ 0.6802, -0.9495,  0.3824],
        [ 0.6285, -0.9241,  0.5867]], grad_fn=<CatBackward>), batch_sizes=tensor([3, 3, 1, 1, 1, 1, 1]), sorted_indices=None, unsorted_indices=None)

In [130]:
unpacked, unpacked_len = torch.nn.utils.rnn.pad_packed_sequence(out)

In [131]:
unpacked

tensor([[[ 0.8422, -0.5469,  0.6166],
         [ 0.2324, -0.7649,  0.8579],
         [ 0.8054, -0.9036,  0.7375]],

        [[ 0.7346, -0.7846,  0.7465],
         [ 0.0186, -0.8162,  0.7977],
         [ 0.3019, -0.8325,  0.2773]],

        [[ 0.7203, -0.9097,  0.1353],
         [ 0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000]],

        [[-0.2071, -0.8956,  0.0162],
         [ 0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000]],

        [[ 0.0606, -0.8438,  0.4753],
         [ 0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000]],

        [[ 0.6802, -0.9495,  0.3824],
         [ 0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000]],

        [[ 0.6285, -0.9241,  0.5867],
         [ 0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000]]], grad_fn=<CopySlices>)