In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.optim import SGD
from torch.nn import CrossEntropyLoss

from time import time
import copy

In [4]:
m = nn.Linear(20, 3)
input = Variable(torch.randn(128, 20))
output = m(input)
print(output.size())

torch.Size([128, 3])


In [3]:
seq_len = 10
batch_size = 3
hidden_size = 5
num_layers = 1

In [4]:
inputs = Variable(torch.randn(batch_size, seq_len, hidden_size))
labels = Variable(torch.ones(batch_size, seq_len).long())

inputs.size(), labels.size()

(torch.Size([3, 10, 5]), torch.Size([3, 10]))

## class torch.nn.RNN(*args, **kwargs)

### Parameters:	
 - **input_size** – The number of expected features in the input x
 - **hidden_size** – The number of features in the hidden state h
 - **num_layers** – Number of recurrent layers. E.g., setting num_layers=2 would mean stacking two RNNs together to form a stacked RNN, with the second RNN taking in outputs of the first RNN and computing the final results. Default: 1
 - **nonlinearity** – The non-linearity to use. Can be either ‘tanh’ or ‘relu’. Default: ‘tanh’
 - **bias** – If False, then the layer does not use bias weights b_ih and b_hh. Default: True
 - **batch_first** – If True, then the input and output tensors are provided as (batch, seq, feature)
 - **dropout** – If non-zero, introduces a Dropout layer on the outputs of each RNN layer except the last layer, with dropout probability equal to dropout. Default: 0
 - **bidirectional** – If True, becomes a bidirectional RNN. Default: False

In [5]:
rnn = nn.RNN(hidden_size, hidden_size, num_layers, batch_first=True)
h_n = Variable(torch.randn(1, batch_size, hidden_size))

loss_fn = CrossEntropyLoss()
opt = SGD(rnn.parameters(), lr=0.01)

In [6]:
start = time()

for i in range(1000):
    loss = 0
    
    out, last_h = rnn(inputs, h_n)
    
    # out: [batch_size, seq_len, hidden_size]
    # lables: [batch_size, seq_len]
    
    for j in range(seq_len):
        loss += loss_fn(out[:,j,:], labels[:,j])
        
    opt.zero_grad()
    loss.backward()
    opt.step()
    
    if (i+1) % 100 == 0:
        print(loss)

print(f'{time() - start:.2f}')

Variable containing:
 4.6322
[torch.FloatTensor of size 1]

Variable containing:
 4.4481
[torch.FloatTensor of size 1]

Variable containing:
 4.4049
[torch.FloatTensor of size 1]

Variable containing:
 4.3851
[torch.FloatTensor of size 1]

Variable containing:
 4.3736
[torch.FloatTensor of size 1]

Variable containing:
 4.3660
[torch.FloatTensor of size 1]

Variable containing:
 4.3607
[torch.FloatTensor of size 1]

Variable containing:
 4.3567
[torch.FloatTensor of size 1]

Variable containing:
 4.3535
[torch.FloatTensor of size 1]

Variable containing:
 4.3510
[torch.FloatTensor of size 1]

2.02


In [28]:
rnn = nn.RNNCell(10, 10)
_input = Variable(torch.randn(6, 2, 10))
hx = Variable(torch.randn(2, 10))
output = []
for i in range(2):
        hx = rnn(_input[i], hx)
        output.append(hx)
output

[Variable containing:
  0.1874 -0.7648 -0.0574  0.3063  0.3283 -0.8304  0.8753  0.7592  0.3851 -0.1417
  0.5973 -0.0967  0.6885  0.0338  0.9345 -0.0655 -0.9436  0.8758 -0.9282 -0.4330
 [torch.FloatTensor of size 2x10], Variable containing:
  0.7570 -0.3473  0.5445 -0.3237  0.9141 -0.4219  0.0067  0.4525  0.3122  0.2558
 -0.4528 -0.1836 -0.8534 -0.6030 -0.5890 -0.3248  0.0043 -0.8554  0.8664  0.4537
 [torch.FloatTensor of size 2x10]]

In [7]:
rnncell = nn.RNNCell(hidden_size, hidden_size)
loss_fn = CrossEntropyLoss()
opt = SGD(rnncell.parameters(), lr=0.01)

In [13]:
inputs = Variable(torch.randn(batch_size,seq_len, hidden_size))
labels = Variable(torch.ones(batch_size, seq_len).long())
h = Variable(torch.randn(batch_size,hidden_size))
print(inputs)
print(labels)
print(h)

Variable containing:
(0 ,.,.) = 
  2.7433  0.0471  1.2622  0.2100 -0.3848
  1.4554  0.3721 -2.4634  0.7018 -0.0480
  2.2410 -0.9494 -1.4348 -0.0517  0.6590
 -0.5991 -0.7685 -1.3306 -2.2601  0.6363
 -0.1966  0.2830 -1.3152  1.4531 -2.1519
  0.3345 -0.1216 -0.0286  0.2817  1.5224
 -0.6287  0.2152 -1.7900 -0.9344  0.0898
  0.8218 -2.5248  1.0621  0.0030  0.4964
 -1.8711 -0.9957  1.3306  0.4637  0.2865
  0.4904  0.8932 -1.2823 -1.1580  0.6046

(1 ,.,.) = 
  0.1499  0.5721  0.7506 -0.3534  1.5718
  1.3562  0.8376 -2.8787 -1.9997  0.7123
 -1.3980 -0.7345  0.1998  0.8465 -0.8675
 -0.1045 -0.5209  0.8285  0.4737 -1.2573
 -0.6930 -1.7201  0.1213  0.6195 -0.5531
 -0.1842  0.1953 -0.3475  0.0005 -0.3065
  0.8331 -0.8477 -1.0281 -0.2570  0.8831
 -3.0522 -1.0129  0.3017 -0.8542  1.4732
 -1.3109  0.6289  0.6967 -1.3588 -1.0148
  0.4323  0.4267  0.7468 -1.3492  0.9131

(2 ,.,.) = 
 -1.9109 -0.3432 -0.7599  1.5289  0.3528
 -0.3480 -0.2581  1.0816 -1.9361  0.1008
  1.6056  0.1046 -0.1466  1.5988 -1.595

In [15]:
start = time()
output = []
for i in range(1000):
    loss = 0
    
    h_next = Variable(h.data.new(batch_size,hidden_size))

    for j in range(seq_len):
        h_next = rnncell(inputs[:,j,:], h_next)
        loss += loss_fn(h_next, labels[:, j])
        output.append(h_next)
    opt.zero_grad()
    loss.backward()
    opt.step()

    if (i+1) % 100 == 0:
        print(loss)

print(f'{time() - start:.2f}')

Variable containing:
 4.3372
[torch.FloatTensor of size 1]

Variable containing:
 4.3366
[torch.FloatTensor of size 1]

Variable containing:
 4.3361
[torch.FloatTensor of size 1]

Variable containing:
 4.3357
[torch.FloatTensor of size 1]

Variable containing:
 4.3352
[torch.FloatTensor of size 1]

Variable containing:
 4.3349
[torch.FloatTensor of size 1]

Variable containing:
 4.3345
[torch.FloatTensor of size 1]

Variable containing:
 4.3342
[torch.FloatTensor of size 1]

Variable containing:
 4.3339
[torch.FloatTensor of size 1]

Variable containing:
 4.3337
[torch.FloatTensor of size 1]

1.77


In [18]:
len(output)

10000