## 使用RnnCell

In [1]:
import torch
input_size = 4
hidden_size = 4
batch_size = 1

In [2]:
idx2char = ['e', 'h', 'l', 'o']
x_data = [1, 0, 2, 2, 3]
y_data = [3, 1, 2, 3, 2]#ohlol
one_hot_lookup = [[1, 0, 0, 0],
                    [0, 1, 0, 0],
                    [0, 0, 1, 0],
                    [0, 0, 0, 1]]
x_one_hot = [one_hot_lookup[x] for x in x_data]
inputs = torch.Tensor(x_one_hot).view(-1, batch_size, input_size)
labels = torch.LongTensor(y_data).view(-1, 1)

In [2]:
class Model(torch.nn.Module):
    def __init__(self, input_size, hidden_size, batch_size):
        super(Model, self).__init__()
        self.batch_size = batch_size
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.rnncell = torch.nn.RNNCell(input_size=self.input_size,
                hidden_size=self.hidden_size)
    def forward(self, input, hidden):
        hidden = self.rnncell(input, hidden)
        return hidden
    def init_hidden(self):
        return torch.zeros(self.batch_size, self.hidden_size)
net = Model(input_size, hidden_size, batch_size)

In [3]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.1)

In [None]:
for epoch in range(15):
    loss = 0
    optimizer.zero_grad()
    hidden = net.init_hidden()
    print('Predicted string: ', end='')
    for input, label in zip(inputs, labels):
        hidden = net(input, hidden)#[1, 4],[1, 4] ->[1, 4]
        loss += criterion(hidden, label)
        _, idx = hidden.max(dim=1)
        print(idx2char[idx.item()], end='')
    loss.backward()
    optimizer.step()
    print(', Epoch [%d/15] loss=%.4f' % (epoch+1, loss.item()))

Predicted string: 

## 使用RNN

In [3]:
class Model(torch.nn.Module):
    def __init__(self,input_size,hidden_size,batch_size,num_layers=1):
        super().__init__()
        self.hidden_size = hidden_size
        self.batch_size = batch_size
        self.num_layers = num_layers
        self.rnn = torch.nn.RNN(input_size,hidden_size,num_layers)
    def forward(self,input):
        hidden = torch.zeros((self.num_layers,self.batch_size,self.hidden_size))
        output,hidden = self.rnn(input,hidden)#[5, 1, 4],[1, 1, 4] ->[5,1,4],[1, 1, 4]
        return output.permute(1,2,0)


In [4]:
input_size = 4
hidden_size = 4
num_layers = 1
batch_size = 1
seq_len = 5
net = Model(input_size, hidden_size, batch_size, num_layers)

In [5]:
idx2char = ['e', 'h', 'l', 'o']
x_data = [1, 0, 2, 2, 3]
y_data = [3, 1, 2, 3, 2]#ohlol
one_hot_lookup = [[1, 0, 0, 0],
                    [0, 1, 0, 0],
                    [0, 0, 1, 0],
                    [0, 0, 0, 1]]
x_one_hot = [one_hot_lookup[x] for x in x_data]
inputs = torch.Tensor(x_one_hot).view(-1, batch_size, input_size)
labels = torch.LongTensor(y_data).view(batch_size, -1)
out = net(inputs)
_,idx = out.max(dim=1)

out,out.shape,labels.shape,idx.data.numpy()

(tensor([[[-0.1839, -0.4537, -0.2574, -0.1384,  0.3776],
          [-0.4430, -0.2907,  0.5403,  0.3733,  0.3725],
          [-0.4269, -0.0384, -0.7657, -0.7928, -0.6674],
          [-0.1535,  0.3500,  0.5106,  0.2585,  0.2497]]],
        grad_fn=<PermuteBackward>),
 torch.Size([1, 4, 5]),
 torch.Size([1, 5]),
 array([[3, 3, 1, 1, 0]], dtype=int64))

In [6]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.05)
for epoch in range(50):
    optimizer.zero_grad()
    outputs = net(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    _, idx = outputs.max(dim=1)
    idx = idx.data.numpy()[0]
    print('Predicted: ', ''.join([idx2char[x] for x in idx]), end='')
    print(', Epoch [%d/15] loss = %.3f' % (epoch + 1, loss.item()))

Predicted:  oohhe, Epoch [1/15] loss = 1.704
Predicted:  ooooo, Epoch [2/15] loss = 1.592
Predicted:  ooooo, Epoch [3/15] loss = 1.477
Predicted:  ooooo, Epoch [4/15] loss = 1.353
Predicted:  ooooo, Epoch [5/15] loss = 1.226
Predicted:  ohooo, Epoch [6/15] loss = 1.109
Predicted:  ohool, Epoch [7/15] loss = 1.011
Predicted:  ohool, Epoch [8/15] loss = 0.934
Predicted:  ohlol, Epoch [9/15] loss = 0.875
Predicted:  ohlol, Epoch [10/15] loss = 0.827
Predicted:  ohlol, Epoch [11/15] loss = 0.783
Predicted:  ohlol, Epoch [12/15] loss = 0.741
Predicted:  ohlol, Epoch [13/15] loss = 0.701
Predicted:  ohlol, Epoch [14/15] loss = 0.662
Predicted:  ohlol, Epoch [15/15] loss = 0.627
Predicted:  ohlol, Epoch [16/15] loss = 0.595
Predicted:  ohlol, Epoch [17/15] loss = 0.566
Predicted:  ohlol, Epoch [18/15] loss = 0.541
Predicted:  ohlol, Epoch [19/15] loss = 0.519
Predicted:  ohlol, Epoch [20/15] loss = 0.499
Predicted:  ohlol, Epoch [21/15] loss = 0.481
Predicted:  ohlol, Epoch [22/15] loss = 0.4

## 加入嵌入层

In [2]:
class Model(torch.nn.Module):
    def __init__(self,input_size,hidden_size,embedding_size,num_class,batch_size,num_layers=1):
        super().__init__()
        self.hidden_size = hidden_size
        self.batch_size = batch_size
        self.num_layers = num_layers
        self.emb = torch.nn.Embedding(input_size,embedding_size)
        self.rnn = torch.nn.GRU(embedding_size,hidden_size,num_layers)
        self.fc = torch.nn.Linear(hidden_size,num_class)
    def forward(self,input):
        hidden = torch.zeros((self.num_layers,self.batch_size,self.hidden_size))
        input = self.emb(input)
        input = input.permute(1,0,2)
        output,hidden = self.rnn(input,hidden)
        output = self.fc(output)
        return output.permute(1,2,0)

In [3]:
num_class = 4
input_size = 4
hidden_size = 8
embedding_size = 10
num_layers = 2
batch_size = 1
seq_len = 5
net = Model(input_size,hidden_size,embedding_size,num_class,batch_size,num_layers)

In [4]:
idx2char = ['e', 'h', 'l', 'o']
x_data = [1, 0, 2, 2, 3]
y_data = [3, 1, 2, 3, 2]#ohlol
inputs = torch.LongTensor(x_data).view(batch_size,-1)
labels = torch.LongTensor(y_data).view(batch_size,-1)
out = net(inputs)
_,idx = out.max(dim=2)

out,out.shape,labels.shape,idx.data.numpy()

(tensor([[[-0.2390, -0.2747, -0.2483, -0.1974, -0.1640],
          [ 0.2475,  0.2623,  0.2835,  0.3037,  0.3067],
          [-0.0921, -0.0486, -0.0689, -0.0762, -0.0331],
          [-0.1449, -0.1646, -0.2391, -0.3194, -0.3411]]],
        grad_fn=<PermuteBackward>),
 torch.Size([1, 4, 5]),
 torch.Size([1, 5]),
 array([[4, 4, 4, 0]], dtype=int64))

In [None]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.05)
for epoch in range(50):
    optimizer.zero_grad()
    outputs = net(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    _, idx = outputs.max(dim=1)
    idx = idx.data.numpy()[0]
    print('Predicted: ', ''.join([idx2char[x] for x in idx]), end='')
    print(', Epoch [%d/15] loss = %.3f' % (epoch + 1, loss.item()))