In [2]:
import torch
import time
import math
import numpy as np
import torch.nn.functional as F
from torch import nn,optim
import zipfile

print(torch.cuda.is_available())
device = 'cpu'

def load_data_jay_lyrics():
    with zipfile.ZipFile('Datasets/jaychou_lyrics.txt.zip') as zin:
        with zin.open('jaychou_lyrics.txt') as f:
            corpus_chars = f.read().decode('utf-8')
    corpus_chars = corpus_chars.replace('\n', ' ').replace('\r', ' ')
    corpus_chars = corpus_chars[0:10000]
    idx_to_char = list(set(corpus_chars))
    char_to_idx = dict([(char, i) for i, char in enumerate(idx_to_char)])
    vocab_size = len(char_to_idx)
    corpus_indices = [char_to_idx[char] for char in corpus_chars]
    return corpus_indices, char_to_idx, idx_to_char, vocab_size
(corpus_indices, char_to_idx, idx_to_char, vocab_size) = load_data_jay_lyrics()

def one_hot(x,n_class,dtype=torch.float32):
    # x batchsize  output batchsize,vocab_size 一个矩阵，分numsteps处理 vocab_size即n_class
    x = x.long()
    res = torch.zeros(x.shape[0],n_class,dtype=dtype,device=device)
    res.scatter_(1,x.view(-1,1),1)  #最后一个1是填充1，可以改任意整数值，或者矩阵
    return res
def to_onehot(X,n_class): # for epoch，分批次转化
    #X shape (batchsize,numsteps)  output numsteps个 batchsize,vocab_size的矩阵
    return [one_hot(X[:,i],n_class) for i in range(X.shape[1])]

False


In [3]:
num_hiddens = 256
rnn_layer = nn.RNN(input_size = vocab_size,hidden_size=num_hiddens)
#inputs [num_steps,batch_size,vocab_size] outputs [num_steps,batch_size,hiddens]
#这里输入不一样，是在RNN输入时有state输入，内部处理了

class RNNModel(nn.Module):
    def __init__(self,rnn_layer,vocab_size):
        super(RNNModel,self).__init__()
        self.rnn = rnn_layer
        self.hidden_size = rnn_layer.hidden_size * (2 if rnn_layer.bidirectional else 1)
        self.vocab_size = vocab_size
        self.dense = nn.Linear(self.hidden_size,vocab_size)
        self.state = None
    def forward(self,inputs,state):# inputs:batch_size,seq_len  
        X = to_onehot(inputs,self.vocab_size)  #list numsteps 个batchsize,vocab_size
        Y,self.state = self.rnn(torch.stack(X),state)  #输入tensor size  [numsteps,batchsize,vocab_size]
        output = self.dense(Y.view(-1,Y.shape[-1]))
        # Y [num_steps,batch_size,hiddens] -[num_steps*batch_size,hiddens]
        # output [num_steps*batch_size,vocab_size] 
        return output,self.state

def predict_rnn_pytorch(prefix,num_chars,model,vocab_size,device,idx_to_char,char_to_idx):
    state = None
    output = [char_to_idx[prefix[0]]]
    for t in range(num_chars+len(prefix)-1):
        X = torch.tensor([output[-1]],device=device).view(1,1)  #只输入一个字，还是字符形式，size [1,1]
        if state is not None:
            if isinstance(state, tuple): # LSTM, state:(h, c)  
                state = (state[0].to(device), state[1].to(device))
            else:   
                state = state.to(device)
        (Y,state) = model(X,state)
        if t < len(prefix) - 1:
            output.append(char_to_idx[prefix[t + 1]])
        else:
            output.append(int(Y.argmax(dim=1).item()))
    return ''.join([idx_to_char[i] for i in output])

model = RNNModel(rnn_layer, vocab_size).to(device)
predict_rnn_pytorch('分开', 10, model, vocab_size, device, idx_to_char, char_to_idx)

'分开正斜当市晴移市摩摩摩'

In [None]:
#训练过程跟6.1基本一样，暂时不写了