In [1]:
# 查看当前挂载的数据集目录
!ls /home/aistudio/data/

In [2]:
# 查看个人持久化工作区文件
!ls /home/aistudio/work/

!wget https://zh.d2l.ai/d2l-zh.zip

!unzip d2l-zh.zip

In [3]:
!nvcc -V

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2018 NVIDIA Corporation
Built on Tue_Jun_12_23:07:04_CDT_2018
Cuda compilation tools, release 9.2, V9.2.148


In [4]:
!nvidia-smi

Mon Jul  1 14:55:56 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 396.37                 Driver Version: 396.37                    |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:09.0 Off |                    0 |
| N/A   34C    P0    38W / 300W |      0MiB / 16160MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage    

In [5]:
!pip install mxnet-cu92==1.4.0

In [6]:
!pip install d2lzh==0.8.11

In [7]:
import d2lzh as d2l
ctx = d2l.try_gpu()
print('will use', ctx)

will use gpu(0)


# 语言模型数据集
## 读取数据集

In [18]:
from mxnet import nd
import random
import zipfile

with zipfile.ZipFile('data/jaychou_lyrics.txt.zip') as zin:
    with zin.open('jaychou_lyrics.txt') as f:
        corpus_chars=f.read().decode('utf-8')

corpus_chars.replace('\n',' ')[:5]

'想要有直升'

In [21]:
##将转义字符转换成空格
corpus_chars=corpus_chars.replace('\n',' ').replace('\r',' ')
print(len(corpus_chars))

##建立字符映射
idx_to_char=list(set(corpus_chars))
char_to_idx=dict([(char,idx) for idx,char in enumerate(idx_to_char)])
vocab_size=len(idx_to_char)
print(vocab_size)

##将每个字符转换成索引
corpus_indices=[char_to_idx[char] for char in corpus_chars]
print(corpus_indices[0:20])

63282
2582
[270, 1273, 2121, 1159, 2448, 2212, 49, 270, 1273, 2478, 168, 2559, 2482, 1375, 1277, 2160, 49, 270, 1273, 2478]


## 时序数据的采样
### 随机采样
*相邻的两个随机小批量在原始序列上的weizhi位置不一定相毗邻*

In [22]:
def data_iter_random(corpus_indices, batch_size, num_steps, ctx=None):
    num_examples=(len(corpus_indices)-1)//num_steps
    epoch_size=num_examples//batch_size
    example_indices=list(range(num_examples))
    random.shuffle(example_indices)
    
    def _data(pos):
        return corpus_indices[pos:pos+num_steps]
        
    for i in range(epoch_size):
        i=i*batch_size
        batch_indices=example_indices[i:i+batch_size]
        x=[_data(j*num_steps) for j in batch_indices]
        y=[_data(j*num_steps+1) for j in batch_indices]
        yield nd.array(x, ctx), nd.array(y, ctx)

In [25]:
my_seq=list(range(30))
for x,y in data_iter_random(my_seq, batch_size=2, num_steps=6):
    print('x:',x,'\ny:',y,'\n')

x: 
[[ 6.  7.  8.  9. 10. 11.]
 [12. 13. 14. 15. 16. 17.]]
<NDArray 2x6 @cpu(0)> 
y: 
[[ 7.  8.  9. 10. 11. 12.]
 [13. 14. 15. 16. 17. 18.]]
<NDArray 2x6 @cpu(0)> 

x: 
[[18. 19. 20. 21. 22. 23.]
 [ 0.  1.  2.  3.  4.  5.]]
<NDArray 2x6 @cpu(0)> 
y: 
[[19. 20. 21. 22. 23. 24.]
 [ 1.  2.  3.  4.  5.  6.]]
<NDArray 2x6 @cpu(0)> 



### 相邻采样

In [None]:
def data_iter_consecutive(corpus_indices,batch_size,num_steps,ctx=None):
    corpus_indices=nd.array(corpus_indices,ctx=ctx)
    data_len=len(corpus_indices)
    batch_len=data_len//batch_size
    indices=corpus_indices[0:batch_size*batch_len].reshape((batch_size,batch_len))
    epoch_size=(batch_size-1)//num_steps
    for i in range(epoch_size):
        i=i*num_steps
        x=indices[:,i:i+num_steps]
        y=indices[:,i+1:i+num_steps+1]
        yield x,y

In [None]:
for x,y in data_iter_consecutive(my_seq, batch_size=2, num_steps=6):
    print('x is ', x, '\ny is ', y, '\n')

# RNN从零实现

In [12]:
import d2lzh as d2l
import math 
from mxnet import autograd,nd
from mxnet.gluon import loss as gloss
import time

(corpus_indices, char_to_idx, idx_to_char, vocab_size) = d2l.load_data_jay_lyrics()

## one-hot向量

In [None]:
nd.one_hot(nd.array([0,2]), vocab_size)

In [None]:
def to_onehot(X, size):
    return [nd.onehot(x, size) for x in X.T]

X=nd.arange(10).reshape((2,5))
inputs=to_onehot(X, vocab_size)
len(inputs), inputs[0].shape

## 初始化模型参数
$H_{t} = \phi ( X_{t} W_{xh}+H_{t-1} W_{hh}+b_{h})$

$O_t=H_t W_{hq}+b_q$

In [None]:
num_inputs, num_hiddens, num_outputs=vocab_size, 256, vocab_size
ctx=d2l.try_gpu()
print('will use', ctx)

def get_params():
    def _one(shape):
        return nd.random.normal(scale=0.01, shape=shape, ctx=ctx)
    
            W_xh=_one((num_inputs,num_hiddens))
            W_hh=_one((num_hiddens,num_hiddens))
            b_h=nd.zeros(num_hiddens,ctx=ctx)
            
            W_hq=_one((num_hiddens,num_outputs))
            b_q=nd.zeros(num_outputs,ctx=ctx)
            
            params=[W_xh, W_hh, b_h, W_hq, b_q]
            for param in params:
                param.attach_grad()
            return params

### 定义模型

In [None]:
def init_rnn_state(batch_size, num_hiddens, ctx):
    return (nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx), )