In [3]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F

In [4]:
class TransformerModel(nn.Module):
    def __init__(self,ntoken,ninp,nhead,nhid,nlayers,dropout=0.5):
        super(TransformerModel,self).__init__()
        from torch.nn import TransformerEncoder,TransformerEncoderLayer
        self.model_type='Transformer'
        self.src_mask=None
        self.pos_encoder=PositionalEncoding(ninp,dropout)
        #ninp:the number of expected features in the input dmodel=512
        #nhid:the dimension of the feedforward network model (default=2048)
        encoder_layers=TransformerEncoderLayer(ninp,nhead,nhid,dropout)  #实例化encoder里面的一层
        self.transformer_encoder=TransformerEncoder(encoder_layers,nlayers)
        self.encoder=nn.Embedding(ntoken,ninp)  #ntoken是字母表个数
        self.ninp=ninp
        self.decoder=nn.Linear(ninp,ntoken)
        self.init_weights()
   
    def _generate_square_subsequent_mask(self,sz):
        mask=(torch.triu(torch.ones(sz,sz))==1).transpose(0,1) #下三角都是true的矩阵
        mask=mask.float().masked_fill(mask==0,float('-inf')).masked_fill(mask==1,float(0.0))
        return mask
              
    def init_weights(self):
        initrange=0.1
        self.encoder.weight.data.uniform_(-initrange,initrange)
        self.decoder.bias.data.zero_()
        self.decoder.weight.data.uniform_(-initrange,initrange)
        
    def forward(self,src):
        #src[S,N,E]:where S is the source sequence length, T is the target sequence length, N is the batch size, E is the feature number
        if self.src_mask is None or self.src_mask.size(0)!=len(src):
            device=src.device
            mask=self._generate_square_subsequent_mask(len(src)).to(device)
            self.src_mask=mask
        src=self.encoder(src)*math.sqrt(self.ninp)
        src=self.pos_encoder(src)
        output=self.transformer_encoder(src,self.src_mask)
        output=self.decoder(output)
        return output
        
        
              
              

In [5]:
class PositionalEncoding(nn.Module):
    def __init__(self,d_model,dropout=0.1,max_len=5000):
        super(PositionalEncoding,self).__init__()
        self.dropout=nn.Dropout(p=dropout)
        #torch.Size([5000, 1, 512])
        pe=torch.zeros(max_len,d_model)  #(最大时间步，512)
        position=torch.arange(0,max_len,dtype=torch.float).unsqueeze(1)
        div_term=torch.exp(torch.arange(0,d_model,2).float()*(-math.log(10000.0)/d_model))
        pe[:,0::2]=torch.sin(position*div_term)   #从0开始每两位一取
        pe[:,1::2]=torch.cos(position*div_term)
        pe=pe.unsqueeze(0).transpose(0,1)
        self.register_buffer("pe",pe)
    def forward(self,x):
        x=x+self.pe[:x.size(0),:]
        return self.dropout(x)

In [13]:
cc=PositionalEncoding(512)
cc
print(cc.pe.shape)

torch.Size([5000, 1, 512])


In [16]:
import torchtext
from torchtext.data.utils import get_tokenizer

In [17]:
TEXT=torchtext.data.Field(tokenize=get_tokenizer("basic_english"),init_token='<sos>',eos_token='<eos>',lower=True)

train_txt,val_txt,test_txt=torchtext.datasets.WikiText2.splits(TEXT)
device=torch.device("cuda" if torch.cuda.is_available() else 'cpu')


downloading wikitext-2-v1.zip


.data\wikitext-2\wikitext-2-v1.zip: 100%|██████████████████████████████████████████| 4.48M/4.48M [00:37<00:00, 121kB/s]


extracting


In [23]:
def batchify(data,bsz):
    data=TEXT.numericalize([data.examples[0].text])
    LABEL.build_vocab(data)
    nbatch=data.size(0)//bsz
    data=data.narrow(0,0,nbatch*bsz)
    
    data=data.view(bsz,-1).t().contiguous()
    return data.to(device)

In [24]:
batch_size=20
eval_batch_size=10
train_data=batchify(train_txt,batch_size)
val_data=batchify(val_txt,eval_batch_size)
test_data=batchify(test_txt,eval_batch_size)

AttributeError: 'Field' object has no attribute 'vocab'