In [2]:
import numpy as np
import torch
import torch.optim as optim

In [127]:
#How to use RNNCell
batch_size=2
seq_len=3
input_size=4
hidden_size=2
cell=torch.nn.RNNCell(input_size=input_size,hidden_size=hidden_size)
dataset=torch.randn(seq_len,batch_size,input_size)
hidden=torch.zeros(batch_size,hidden_size)
for idx,input in enumerate(dataset,0):
    print("="*12,idx,"="*12)
    print("input_size:",input.shape)
    print(input.dtype)
    print(hidden.dtype)
    hidden=cell(input,hidden)
    print("output_size:",hidden.shape)
    print(hidden)

input_size: torch.Size([2, 4])
torch.float32
torch.float32
output_size: torch.Size([2, 2])
tensor([[-0.8323,  0.3388],
        [ 0.3986,  0.6711]], grad_fn=<TanhBackward0>)
input_size: torch.Size([2, 4])
torch.float32
torch.float32
output_size: torch.Size([2, 2])
tensor([[-0.9278, -0.5077],
        [-0.9987, -0.9079]], grad_fn=<TanhBackward0>)
input_size: torch.Size([2, 4])
torch.float32
torch.float32
output_size: torch.Size([2, 2])
tensor([[-0.5757,  0.0725],
        [-0.1552,  0.6591]], grad_fn=<TanhBackward0>)


In [128]:
#How to use RNN
input_size=4
seq_len=3
hidden_size=2
num_layers=2
batch_size=1
cell=torch.nn.RNN(input_size=input_size,hidden_size=hidden_size,num_layers=num_layers)
inputs=torch.randn(seq_len,batch_size,input_size)
hidden=torch.zeros(num_layers,batch_size,hidden_size)
out,hidden=cell(inputs,hidden)
#hidden.shape=(num_layers,batch_size,hidden_size)
#out.shape=(seq_len,batch_size,hidden_size)
print("Output_size:",out.shape)
print("Output:",out)
print("hidden_size:",hidden.shape)
print("hidden:",hidden)
#在torch.nn.RNN()中有个设置batch_first=True只需要将inputs中seq_len和batch_size互换位置

Output_size: torch.Size([3, 1, 2])
Output: tensor([[[0.7498, 0.1833]],

        [[0.9132, 0.8118]],

        [[0.9391, 0.4266]]], grad_fn=<StackBackward0>)
hidden_size: torch.Size([2, 1, 2])
hidden: tensor([[[-0.4206,  0.6046]],

        [[ 0.9391,  0.4266]]], grad_fn=<StackBackward0>)


In [129]:
#One-hot vectors 独热编码
#将hello转化成ohlol
#导入数据
batch_size=1
input_size=4
hidden_size=4
num_layers=1
seq_len=5
idx2char=['e','h','l','o']
x_data=[1,0,2,2,3]
y_data=[3,1,2,3,2]
one_hot_lookup=[[1,0,0,0],
                [0,1,0,0],
                [0,0,1,0],
                [0,0,0,1]]
one_hot_x=[one_hot_lookup[x] for x in x_data]
inputs=torch.tensor(one_hot_x).view(-1,batch_size,input_size).to(torch.float32) #-1处是seq_len,注意inputs从列表转化过来的数据类型是int64，需要转化成float32才能被神经网路接受
labels=torch.LongTensor(y_data).view(-1,1) #标签的形状是(seq_len*1)
print(inputs.shape)
print(inputs)
print(labels.shape)
print(labels)

torch.Size([5, 1, 4])
tensor([[[0., 1., 0., 0.]],

        [[1., 0., 0., 0.]],

        [[0., 0., 1., 0.]],

        [[0., 0., 1., 0.]],

        [[0., 0., 0., 1.]]])
torch.Size([5, 1])
tensor([[3],
        [1],
        [2],
        [3],
        [2]])


In [130]:
#用RNNCell实现一下
class Net1(torch.nn.Module):
    def __init__(self,input_size,hidden_size,batch_size):
        super(Net1,self).__init__()
        self.input_size=input_size
        self.hidden_size=hidden_size
        self.batch_size=batch_size
        self.rnncell=torch.nn.RNNCell(input_size=input_size,hidden_size=hidden_size)
    def forward(self,input,hidden):
        hidden=self.rnncell(input,hidden)
        return hidden
    def init_hidden(self):
        return torch.zeros(self.batch_size,self.hidden_size)
model1=Net1(input_size,hidden_size,batch_size)

In [131]:
#定义损失和优化函数
criterion=torch.nn.CrossEntropyLoss()
optimizer=optim.Adam(model1.parameters(),lr=0.5)

In [132]:
#训练过程 15个epoch
for epoch in range(15):
    loss=0
    optimizer.zero_grad()
    hidden=model1.init_hidden()
    print('Predicted string: ',end='')
    for input,label in zip(inputs,labels):
        hidden=model1(input,hidden)
        loss+=criterion(hidden,label)
        predict=torch.max(hidden,dim=1).indices.item()
        print(idx2char[predict],end='')
    loss.backward()
    optimizer.step()
    print(', Epoch [%d/15] loss is:%.4f'%(epoch+1,loss.item()))

Predicted string: lloll, Epoch [1/15] loss is:6.0964
Predicted string: ohooo, Epoch [2/15] loss is:4.4539
Predicted string: ohlol, Epoch [3/15] loss is:4.0322
Predicted string: ohlol, Epoch [4/15] loss is:4.0755
Predicted string: ohlol, Epoch [5/15] loss is:4.0117
Predicted string: ohlol, Epoch [6/15] loss is:3.8690
Predicted string: ooloo, Epoch [7/15] loss is:3.6564
Predicted string: ooloo, Epoch [8/15] loss is:3.5979
Predicted string: ooloo, Epoch [9/15] loss is:3.3437
Predicted string: ohlol, Epoch [10/15] loss is:2.4880
Predicted string: ohlol, Epoch [11/15] loss is:1.7879
Predicted string: ohlol, Epoch [12/15] loss is:1.7156
Predicted string: ohlol, Epoch [13/15] loss is:1.7173
Predicted string: ohlol, Epoch [14/15] loss is:1.7215
Predicted string: ohlol, Epoch [15/15] loss is:1.7244


In [133]:
#用RNN实现一下
class Net2(torch.nn.Module):
    def __init__(self,input_size,hidden_size,batch_size,num_layers):
        super(Net2,self).__init__()
        self.input_size=input_size
        self.hidden_size=hidden_size
        self.batch_size=batch_size
        self.num_layers=num_layers
        self.rnn=torch.nn.RNN(input_size=input_size,hidden_size=hidden_size,num_layers=num_layers)
    def forward(self,inputs):
        hidden=torch.zeros(self.num_layers,self.batch_size,self.hidden_size)
        out,_=self.rnn(inputs,hidden)
        out=out.view(-1,self.hidden_size)
        return out
model2=Net2(input_size,hidden_size,batch_size,num_layers)

In [134]:
#定义损失和优化
criterion=torch.nn.CrossEntropyLoss()
optimizer=optim.Adam(model2.parameters(),lr=0.5)

In [135]:
labels=torch.LongTensor(y_data)
for epoch in range(15):
    optimizer.zero_grad()
    print("Predicted string:",end='')
    out=model2(inputs)
    loss=criterion(out,labels)
    loss.backward()
    optimizer.step()
    out=torch.max(out,dim=1).indices
    out=out.data.numpy()
    print("".join([idx2char[x] for x in out]),end='')
    print(',epoch [%d/15] loss=%.4f'%(epoch+1,loss.item()))

Predicted string:eeeee,epoch [1/15] loss=1.8657
Predicted string:ooloe,epoch [2/15] loss=1.1752
Predicted string:ohool,epoch [3/15] loss=0.8497
Predicted string:ohool,epoch [4/15] loss=0.6763
Predicted string:ohool,epoch [5/15] loss=0.5633
Predicted string:ohool,epoch [6/15] loss=0.5096
Predicted string:ohool,epoch [7/15] loss=0.4754
Predicted string:ohool,epoch [8/15] loss=0.4598
Predicted string:ohool,epoch [9/15] loss=0.4522
Predicted string:ohool,epoch [10/15] loss=0.4475
Predicted string:ohool,epoch [11/15] loss=0.4444
Predicted string:ohool,epoch [12/15] loss=0.4423
Predicted string:ohool,epoch [13/15] loss=0.4408
Predicted string:ohool,epoch [14/15] loss=0.4397
Predicted string:ohool,epoch [15/15] loss=0.4389


In [5]:
#词嵌入embedding算法
#embedding层输入必需是LongTensor
num_classes=4
embedding_size=10
input_size=4
hidden_size=8
seq_len=5
num_layers=2
batch_size=1

In [4]:
idx2char=['e','h','l','o']
x_data=[[1,0,2,2,3]] #embeddind输入的尺寸是（batch_size,seqlen）输出尺寸是(batch_size,seq_len,embedding_size)
y_data=[3,1,2,3,2] 
input=torch.LongTensor(x_data)
labels=torch.LongTensor(y_data)
print(input.shape)
print(labels.shape)

torch.Size([1, 5])
torch.Size([5])


In [12]:
#定义模型
class Net3(torch.nn.Module):
    def __init__(self,embedding_size,batch_size,input_size,hidden_size,num_layers,seq_len,num_classes):
        super(Net3,self).__init__()
        self.embeddingg_num=embedding_size
        self.batch_size=batch_size
        self.hidden_size=hidden_size
        self.num_layers=num_layers
        self.input_size=input_size
        self.seq_len=seq_len
        self.emb=torch.nn.Embedding(input_size,embedding_size)
        self.rnn=torch.nn.RNN(input_size=embedding_size,hidden_size=hidden_size,num_layers=num_layers,batch_first=True)
        self.fc=torch.nn.Linear(batch_size*hidden_size,num_classes)
    def forward(self,x):
        x=self.emb(x) #输入尺寸(batch_size,seq_len)输出尺寸(batch_size，seq_len,embedding_num)
        x,_=self.rnn(x) #输出是
        x=x.view(self.seq_len,-1)
        x=self.fc(x)
        return x
model3=Net3(embedding_num,batch_size,input_size,hidden_size,num_layers,seq_len,num_classes)
output=model3(input)
print(output.shape)

torch.Size([5, 4])


In [13]:
#定义交叉熵损失和优化器
criterion=torch.nn.CrossEntropyLoss()
optimizer=optim.Adam(model3.parameters(),lr=0.1)

In [14]:
for epoch in range(15):
    optimizer.zero_grad()
    print("Predicted string:",end='')
    out=model3(input)
    loss=criterion(out,labels)
    loss.backward()
    optimizer.step()
    out=torch.max(out,dim=1).indices
    out=out.data.numpy()
    print("".join([idx2char[x] for x in out]),end='')
    print(',epoch [%d/15] loss=%.4f'%(epoch+1,loss.item()))

Predicted string:eohoh,epoch [1/15] loss=1.4044
Predicted string:ohlll,epoch [2/15] loss=1.0494
Predicted string:ohlol,epoch [3/15] loss=0.5459
Predicted string:ohlol,epoch [4/15] loss=0.3367
Predicted string:ohlol,epoch [5/15] loss=0.1811
Predicted string:ohlol,epoch [6/15] loss=0.1029
Predicted string:ohlol,epoch [7/15] loss=0.0581
Predicted string:ohlol,epoch [8/15] loss=0.0342
Predicted string:ohlol,epoch [9/15] loss=0.0210
Predicted string:ohlol,epoch [10/15] loss=0.0134
Predicted string:ohlol,epoch [11/15] loss=0.0088
Predicted string:ohlol,epoch [12/15] loss=0.0060
Predicted string:ohlol,epoch [13/15] loss=0.0043
Predicted string:ohlol,epoch [14/15] loss=0.0032
Predicted string:ohlol,epoch [15/15] loss=0.0025
