# Feed RNN inputs with variable length

In [6]:
import torch
import torch.nn as nn
from torch.nn.utils.rnn import pack_padded_sequence, PackedSequence, pad_packed_sequence
from torch.autograd import Variable

In [7]:
# zero-padded input
x = Variable(torch.Tensor([
    [[1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.]],
    [[2., 2.], [2., 2.], [2., 2.], [0., 0.], [0., 0.]],
    [[3., 3.], [0., 0.], [0., 0.], [0., 0.], [0., 0.]]]))

# list of valid length of each batch
batch_sizes = [5, 3, 1]

In [11]:
x
print(x,'\n[batch_size, max_seq_len, input_size]')

Variable containing:
(0 ,.,.) = 
  1  1
  1  1
  1  1
  1  1
  1  1

(1 ,.,.) = 
  2  2
  2  2
  2  2
  0  0
  0  0

(2 ,.,.) = 
  3  3
  0  0
  0  0
  0  0
  0  0
[torch.FloatTensor of size 3x5x2]
 
[batch_size, max_seq_len, input_size]


## pack_padded_sequence
### torch.nn.utils.rnn.pack_padded_sequence(input, lengths, batch_first=False)
### Save values dynamically from zero-padded inputs

Packs a Tensor containing padded sequences of variable length.<br/>

Input can be of size `T x B x *` where T is the length of the longest sequence (equal to lengths[0]), B is the batch size, and `*` is any number of dimensions (including 0). If `batch_first` is True `B x T x *` inputs are expected.

The sequences should be sorted by length in a decreasing order, i.e. `input[:,0]` should be the longest sequence, and `input[:,B-1]` the shortest one.

This function accepts any input that has at least two dimensions. You can apply it to pack the labels, and use the output of the RNN with them to compute the loss directly. A Tensor can be retrieved from a PackedSequence object by accessing its `.data `attribute.

> Parameters:	
  - input (Tensor) – padded batch of variable length sequences.
  - lengths (Tensor) – list of sequences lengths of each batch element.
  - batch_first (bool, optional) – if True, the input is expected in B x T x * format.
  
> Returns:	
  - a PackedSequence object

In [12]:
packed_x = pack_padded_sequence(
    input=x,
    lengths=[5, 3, 1], # list of length of each batch
    batch_first=True # input shape: [batch_first, max_seq_len, input_size]
)
packed_x

PackedSequence(data=Variable containing:
    1     1
    2     2
    3     3
    1     1
    2     2
    1     1
    2     2
    1     1
    1     1
[torch.FloatTensor of size 9x2]
, batch_sizes=[3, 2, 2, 1, 1])

In [14]:
x_T = x.transpose(0, 1)
x_T

Variable containing:
(0 ,.,.) = 
  1  1
  2  2
  3  3

(1 ,.,.) = 
  1  1
  2  2
  0  0

(2 ,.,.) = 
  1  1
  2  2
  0  0

(3 ,.,.) = 
  1  1
  0  0
  0  0

(4 ,.,.) = 
  1  1
  0  0
  0  0
[torch.FloatTensor of size 5x3x2]

In [17]:
packed_x_T = pack_padded_sequence(
    input=x_T,
    lengths=[5, 3, 1], # list of length of each batch
    batch_first=False # [max_seq_len, batch_first, input_size]
)
packed_x_T

PackedSequence(data=Variable containing:
    1     1
    2     2
    3     3
    1     1
    2     2
    1     1
    2     2
    1     1
    1     1
[torch.FloatTensor of size 9x2]
, batch_sizes=[3, 2, 2, 1, 1])

## PackedSequence
#### torch.nn.utils.rnn.PackedSequence(cls, *args)
* internal

```
PackedSequence_ = namedtuple('PackedSequence', ['data', 'batch_sizes'])
class PackedSequence(PackedSequence_):
    pass
```
* Args:
    * data (Variable) <= **zero-padded** tensor
    * batch_sizes (list of int) <= **in decreasing order**

Holds the data and list of `batch_sizes` of a packed sequence.

All RNN modules accept packed sequences as inputs.

### Note

Instances of this class should never be created manually. They are meant to be instantiated by functions like `pack_padded_sequence()`.

Batch sizes represent the number elements at each sequence step in the batch, not the varying sequence lengths passed to `pack_padded_sequence()`. For instance, given data abc and x the `PackedSequenc`e would contain data `axbc` with `batch_sizes=[2,1,1]`.

In [18]:
x

Variable containing:
(0 ,.,.) = 
  1  1
  1  1
  1  1
  1  1
  1  1

(1 ,.,.) = 
  2  2
  2  2
  2  2
  0  0
  0  0

(2 ,.,.) = 
  3  3
  0  0
  0  0
  0  0
  0  0
[torch.FloatTensor of size 3x5x2]

In [19]:
PackedSequence(x, batch_sizes)

PackedSequence(data=Variable containing:
(0 ,.,.) = 
  1  1
  1  1
  1  1
  1  1
  1  1

(1 ,.,.) = 
  2  2
  2  2
  2  2
  0  0
  0  0

(2 ,.,.) = 
  3  3
  0  0
  0  0
  0  0
  0  0
[torch.FloatTensor of size 3x5x2]
, batch_sizes=[5, 3, 1])

### RNN takes PackedSequence

Applies a linear transformation to the incoming `data: y=Ax+b`

In [41]:
m = nn.Linear(2, 1)
print(vars(m))
input = Variable(torch.randn(1, 2))
print(input)
output = (m(input))
print(output.size())
output

{'_backend': <torch.nn.backends.thnn.THNNFunctionBackend object at 0x0000000007630828>, '_parameters': OrderedDict([('weight', Parameter containing:
-0.4197  0.1056
[torch.FloatTensor of size 1x2]
), ('bias', Parameter containing:
-0.6356
[torch.FloatTensor of size 1]
)]), '_buffers': OrderedDict(), '_backward_hooks': OrderedDict(), '_forward_hooks': OrderedDict(), '_forward_pre_hooks': OrderedDict(), '_modules': OrderedDict(), 'training': True, 'in_features': 2, 'out_features': 1}
Variable containing:
-0.1316 -0.1698
[torch.FloatTensor of size 1x2]

torch.Size([1, 1])


Variable containing:
-0.5983
[torch.FloatTensor of size 1x1]

In [20]:
linear = nn.Linear(2, 2)
rnn = nn.RNN(2, 2, batch_first=True)

In [21]:
packed_x

PackedSequence(data=Variable containing:
    1     1
    2     2
    3     3
    1     1
    2     2
    1     1
    2     2
    1     1
    1     1
[torch.FloatTensor of size 9x2]
, batch_sizes=[3, 2, 2, 1, 1])

In [24]:
linear(packed_x.data)

Variable containing:
 1.8025  0.6315
 2.9572  1.3289
 4.1119  2.0264
 1.8025  0.6315
 2.9572  1.3289
 1.8025  0.6315
 2.9572  1.3289
 1.8025  0.6315
 1.8025  0.6315
[torch.FloatTensor of size 9x2]

In [22]:
packed_h = PackedSequence(linear(packed_x.data), packed_x.batch_sizes)
packed_h

PackedSequence(data=Variable containing:
 1.8025  0.6315
 2.9572  1.3289
 4.1119  2.0264
 1.8025  0.6315
 2.9572  1.3289
 1.8025  0.6315
 2.9572  1.3289
 1.8025  0.6315
 1.8025  0.6315
[torch.FloatTensor of size 9x2]
, batch_sizes=[3, 2, 2, 1, 1])

In [48]:
packed_y, last_h = rnn(packed_h)
print('packed_y\n', packed_y,'\n')
print('last_h\n', last_h)

packed_y
 PackedSequence(data=Variable containing:
 0.8189 -0.8096
 0.9589 -0.8931
 0.9912 -0.9411
 0.8608 -0.6986
 0.9712 -0.8224
 0.8719 -0.7371
 0.9724 -0.8369
 0.8707 -0.7277
 0.8712 -0.7302
[torch.FloatTensor of size 9x2]
, batch_sizes=[3, 2, 2, 1, 1]) 

last_h
 Variable containing:
(0 ,.,.) = 
  0.8712 -0.7302
  0.9724 -0.8369
  0.9912 -0.9411
[torch.FloatTensor of size 1x3x2]



## pad_packed_sequence
#### - zero-pad inputs and make it Tensor again

* Args:
    * sequence (PackedSequence)
    * batch_first (bool)


* Return:
    * output (tuple of Variable)
    * lengths (list of int)

In [49]:
packed_y

PackedSequence(data=Variable containing:
 0.8189 -0.8096
 0.9589 -0.8931
 0.9912 -0.9411
 0.8608 -0.6986
 0.9712 -0.8224
 0.8719 -0.7371
 0.9724 -0.8369
 0.8707 -0.7277
 0.8712 -0.7302
[torch.FloatTensor of size 9x2]
, batch_sizes=[3, 2, 2, 1, 1])

In [50]:
pad_packed_sequence(packed_y, batch_first=True)

(Variable containing:
 (0 ,.,.) = 
   0.8189 -0.8096
   0.8608 -0.6986
   0.8719 -0.7371
   0.8707 -0.7277
   0.8712 -0.7302
 
 (1 ,.,.) = 
   0.9589 -0.8931
   0.9712 -0.8224
   0.9724 -0.8369
   0.0000  0.0000
   0.0000  0.0000
 
 (2 ,.,.) = 
   0.9912 -0.9411
   0.0000  0.0000
   0.0000  0.0000
   0.0000  0.0000
   0.0000  0.0000
 [torch.FloatTensor of size 3x5x2], [5, 3, 1])