# GateRNN
本篇主要是想利用pytorch复现《深度学习进阶--自然语言处理》第六章内容，读取数据分析结果依旧用书中代码。
RNN部分主要参考 
https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html

GRU文档：https://pytorch.org/docs/stable/generated/torch.nn.GRU.html#torch.nn.GRU


## 模型创建

In [85]:
import torch
import torch.nn as nn

class Rnnlm(nn.Module):
    def __init__(self,vocab_size=10000,wordvec_size=1000):
        super(Rnnlm,self).__init__()
        self.embedding = nn.Embedding(vocab_size,wordvec_size)
        self.gru = nn.GRU(wordvec_size,wordvec_size,batch_first=True)
        self.affine = nn.Linear(wordvec_size,vocab_size)
        self.softmax = nn.Softmax(dim=-1)
        self.flatten = nn.Flatten(0,-2)

    def forward(self,input,hidden):
        embeded = self.embedding(input)
        output = embeded


        output, hidden = self.gru(output,hidden)
        output = self.affine(output)
        output = self.softmax(output)
        output = self.flatten(output)
        return output,hidden


    ## 测试模型
x  = [
        [1,1,1,2],
        [5,6,7,8],
        [9,10,11,12]
    ]

x = torch.Tensor(x).long()

input = x[:,1:]
target =x[:,0:-1]





#hiden = torch.zeros((1,3,5),dtype=torch.float32)
hidden = None


tmodel = Rnnlm(50,5)
out ,hidden= tmodel(input,hidden)

target = torch.flatten(target)

criterion = torch.nn.CrossEntropyLoss()
criterion(out,target)

# print(f"out.shape:{out.size()}")

# softmax = nn.Softmax(dim=-1)
# test = torch.zeros((2,5,4),dtype=torch.float32)
# test[:,:,3] = 1

# test[0,2,0] = 3
# print(test)
# print(softmax(test))



tensor(3.9122, grad_fn=<NllLossBackward0>)

In [91]:

## 训练模型
def train(model:Rnnlm,criterion,optimizer,input,target,h = None):
    optimizer.zero_grad()
    pred ,h = model(input,h)
    loss = criterion(pred,target)
    loss.backward()
    optimizer.step()

    return loss,h


## 加载数据


### colab加载主公共模块

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import sys
sys.path.append("/content/drive/MyDrive/ml/nlp/")

### 本机环境中用当前前目录

In [48]:
import sys
sys.path.append("../")

In [94]:


import sys

import matplotlib.pyplot as plt
import numpy as np
from common.optimizer import SGD
from dataset import ptb



batch_size = 500
wordvec_size = 100
hidden_size=100
time_size = 10
lr = 0.1
max_epoch = 100


corpus , word_to_id,id_to_word = ptb.load_data('train')
corpus_size = 1000


#corpus = corpus[:corpus_size]
vocab_size = len(word_to_id)
xs = corpus[:-1]
ts = corpus[1:]


data_size = len(corpus)
print("corpus size %d ,vocabulary size : %d, " % (corpus_size,vocab_size))

max_iters = data_size // (batch_size * time_size)
time_idx = 0
total_loss = 0


loss_cout = 0
ppl_list = []


print(f"vocab_size = {vocab_size} wordvec_size= {wordvec_size} hidden_size = {hidden_size}")



device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

model = Rnnlm(vocab_size,wordvec_size)
model.to(device=device)

optimizer = torch.optim.Adadelta(model.parameters(), lr=1e-2)

criterion = torch.nn.CrossEntropyLoss()


model.train()
for epoch in range(max_epoch):

    
   
    for  iter in range(max_iters):


        start_idx = iter * (batch_size * time_size)
        end_idx = (iter + 1) * (batch_size * time_size)

        batch_x = xs[start_idx:end_idx]
        batch_t = ts[start_idx:end_idx]

 
        batch_x = batch_x.reshape(batch_size,time_size) 
        #batch_t = batch_t.reshape(batch_size,time_size)


        batch_x = torch.from_numpy(batch_x).to(device=device)
        batch_t = torch.from_numpy(batch_t).to(device=device)


        h = None

        loss , h = train(model,criterion,optimizer,batch_x,batch_t,h)



    
        



corpus size 1000 ,vocabulary size : 10000, 
vocab_size = 10000 wordvec_size= 100 hidden_size = 100


KeyboardInterrupt: 

## 训练数据读取



