In [1]:
%%javascript
$.getScript("http://120.78.95.32/j_n_contents.js")

<IPython.core.display.Javascript object>

<div id="toc">
<div>

# 6.1 语言模型

# 6.2 循环神经网络

In [2]:
import torch
import torch.nn as nn

In [31]:
x,w_xh=torch.randn(3,1),torch.randn(1,4)
h,w_hh=torch.randn(3,4),torch.randn(4,4)
torch.matmul(x,w_xh)+torch.matmul(h,w_hh)

tensor([[-2.6263,  1.2154,  0.2870,  0.3547],
        [-2.5293,  2.8904, -2.9870, -0.3797],
        [ 1.9435, -0.9750, -0.8919,  0.7421]])

In [32]:
torch.matmul(torch.cat((x,h),dim=1),torch.cat((w_xh,w_hh),dim=0))

tensor([[-2.6263,  1.2154,  0.2870,  0.3547],
        [-2.5293,  2.8904, -2.9870, -0.3797],
        [ 1.9435, -0.9750, -0.8919,  0.7421]])

# 6.3 语言模型数据集(周杰伦专辑歌词)

In [50]:
import torch
import random
import zipfile

with zipfile.ZipFile("../../data/jaychou_lyrics.txt.zip") as zin:
    with zin.open('jaychou_lyrics.txt') as f:
        corpus_chars=f.read().decode('utf-8')
corpus_chars[:40]

'想要有直升机\n想要和你飞到宇宙去\n想要和你融化在一起\n融化在宇宙里\n我每天每天每'

In [51]:
corpus_chars = corpus_chars.replace('\n', ' ').replace('\r', ' ')
corpus_chars = corpus_chars[0:10000]

In [52]:
idx_to_char=list(set(corpus_chars))
char_to_idx=dict([(char,i) for i,char in enumerate(idx_to_char)])
vocab_size=len(char_to_idx)
vocab_size

1027

In [53]:
corpus_indices=[char_to_idx[char] for char in corpus_chars]
sample=corpus_indices[:20]
print("".join([idx_to_char[idx] for idx in sample]))
print(sample)

想要有直升机 想要和你飞到宇宙去 想要和
[802, 492, 828, 567, 673, 789, 920, 802, 492, 957, 944, 86, 739, 704, 801, 97, 920, 802, 492, 957]


In [54]:
def data_iter_random(corpus_indices,batch_size,num_steps,device=None):
    num_examples=(len(corpus_indices)-1)//num_steps
    epoch_size=num_examples//batch_size
    example_indices=list(range(num_examples))
    random.shuffle(example_indices)
    
    def _data(pos):
        return corpus_indices[pos:pos+num_steps]
    if device is None:
        device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
       
    for i in range(epoch_size):
        i=i*batch_size
        batch_indices=example_indices[i:i+batch_size]
        x=[_data(j*num_steps) for j in batch_indices]
        y=[_data(j*num_steps+1) for j in batch_indices]
        
        yield torch.tensor(x,dtype=torch.float32,device=device),torch.tensor(y,dtype=torch.float32,device=device)
        

In [55]:
my_seq=list(range(30))
for x,y in data_iter_random(my_seq,batch_size=2,num_steps=6):
    print("x",x," y",y)

x tensor([[12., 13., 14., 15., 16., 17.],
        [ 0.,  1.,  2.,  3.,  4.,  5.]], device='cuda:0')  y tensor([[13., 14., 15., 16., 17., 18.],
        [ 1.,  2.,  3.,  4.,  5.,  6.]], device='cuda:0')
x tensor([[18., 19., 20., 21., 22., 23.],
        [ 6.,  7.,  8.,  9., 10., 11.]], device='cuda:0')  y tensor([[19., 20., 21., 22., 23., 24.],
        [ 7.,  8.,  9., 10., 11., 12.]], device='cuda:0')


In [57]:
def data_iter_consecutive(corpus_indices,batch_size,num_steps,device=None):
    if device is None:
        device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
    corpus_indices=torch.tensor(corpus_indices,dtype=torch.float32,device=device)
    data_len=len(corpus_indices)
    batch_len=data_len//batch_size
    indices=corpus_indices[0:batch_size*batch_len].view(batch_size,batch_len)
    epoch_size=(batch_len-1)//num_steps
    for i in range(epoch_size):
        i=i*num_steps
        x=indices[:,i:i+num_steps]
        y=indices[:,i+1:i+num_steps+1]
        yield x,y

In [59]:
for X, Y in data_iter_consecutive(my_seq, batch_size=2, num_steps=6):
    print('X: ', X, '\nY:', Y, '\n')


X:  tensor([[ 0.,  1.,  2.,  3.,  4.,  5.],
        [15., 16., 17., 18., 19., 20.]], device='cuda:0') 
Y: tensor([[ 1.,  2.,  3.,  4.,  5.,  6.],
        [16., 17., 18., 19., 20., 21.]], device='cuda:0') 

X:  tensor([[ 6.,  7.,  8.,  9., 10., 11.],
        [21., 22., 23., 24., 25., 26.]], device='cuda:0') 
Y: tensor([[ 7.,  8.,  9., 10., 11., 12.],
        [22., 23., 24., 25., 26., 27.]], device='cuda:0') 



# 6.4 循环神经网络从零实现

In [61]:
import time
import math 
import numpy as np
import torch
from torch import nn,optim
import torch.nn.functional as F

import sys
sys.path.append("..")
import d2lzh_pytorch as d2l

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
(corpus_indices, char_to_idx, idx_to_char, vocab_size) = d2l.load_data_jay_lyrics()
