In [1]:
from utils import *

In [2]:
def load_data(batch_size, max_length=50):
    sentences = read_ptb()
    vocab = Vocab(sentences, min_freq=10)
    subsampled, counter = subsample(sentences, vocab)
    corpus = [vocab[line] for line in subsampled]

    class PTBDataset(paddle.io.Dataset):
        def __init__(self, corpus):
            # super().__init__()
            self.data = np.zeros((len(corpus), max_length)).astype('int32')
            for i, sentence in enumerate(corpus):
                self.data[i, :len(sentence)] = sentence

        def __getitem__(self, index):
            return self.data[index]

        def __len__(self):
            return len(self.data)

    dataset = PTBDataset(corpus)

    data_iter = paddle.io.DataLoader(dataset, batch_size=batch_size,shuffle=True)
    return data_iter, vocab

In [3]:
data_iter, vocab = load_data(64)

In [4]:
class ELMo_Model(nn.Layer):
    def __init__(self, voc_size, output_size, emb_size=64, n_layers=2, lr=2e-3):
        super().__init__()
        self.voc_size = voc_size
        self.emb = nn.Embedding(voc_size, emb_size, padding_idx=0, weight_attr=paddle.ParamAttr(initializer=nn.initializer.Normal(0, 0.1)))
        self.f_lstms = nn.LayerList([nn.LSTM(emb_size, output_size, time_major=False) if i==0 else nn.LSTM(output_size, output_size, time_major=False) for i in range(n_layers)])
        self.b_lstms = nn.LayerList([nn.LSTM(emb_size, output_size, time_major=False) if i==0 else nn.LSTM(output_size, output_size, time_major=False) for i in range(n_layers)])
        self.f_linear = nn.Linear(output_size, voc_size)
        self.b_linear = nn.Linear(output_size, voc_size)
    
    def forward(self, seq):
        emb = self.emb(seq)
        f_outs = [emb[:, :-1, :]]
        b_outs = [emb[:, 1:, :]]
        for f_lstm in self.f_lstms:
            f_out, (h_, c_) = f_lstm(f_outs[-1])
            f_outs.append(f_out)
            
        for b_lstm in self.b_lstms:
            b_out, (h_, c_) = b_lstm(b_outs[-1])
            b_outs.append(b_out)
        
        f_out_l = self.f_linear(f_outs[-1])
        b_out_l = self.b_linear(b_outs[-1])
        return f_out_l, b_out_l
    
    def get_emb(self, seq):
        fo, bo = self(seq)
        embs = [paddle.concat([f[:, 1:, :], paddle.flip(b, axis=1)[:, :-1, :]], axis=2) for f, b in zip(fo, bo)]
        for i, emb in enumerate(embs, start=1):
            print('第{}的词向量维度为{}'.format(i, emb.shape[2]))
        return embs

In [6]:
epoch = 100
voc_size = len(vocab)
output_size = 128
batch_size = 64
lr = 2e-3

elmo = ELMo_Model(voc_size=voc_size, output_size=output_size)
opt = paddle.optimizer.Adam(learning_rate=lr, parameters=elmo.parameters())
loss = nn.CrossEntropyLoss()

In [7]:
for i in range(epoch):
    for batch, data in enumerate(data_iter()):
        f_outs, b_outs = elmo(data)
        
        fo = paddle.reshape(f_outs, (-1, voc_size))
        bo = paddle.reshape(b_outs, (-1, voc_size))
        
        f_label = paddle.reshape(data[:, 1:], (-1,))
        b_label = paddle.reshape(data[:, :-1], (-1,))
        f_label = paddle.cast(f_label, dtype='int64')
        b_label = paddle.cast(b_label, dtype='int64')
        
        l = (loss(fo, f_label) + loss(bo, b_label)) / 2
        l.backward()
        opt.step()
        opt.clear_grad()
        
        fo = f_outs[0].argmax(axis=1).numpy()
        bo = b_outs[0].argmax(axis=1).numpy()

    print('\n\nEpoch:{}, batch:{}, loss:{:.4f}'.format(i+1, batch+1, l.item()))
paddle.save(elmo.state_dict(), 'elmo.pdparams')


import numpy as np
from paddle.io import DataLoader, Dataset

class RandomDataset(Dataset):
    def __getitem__(self, idx):
        data = np.random.random((2, 3)).astype('float32')

        return data

    def __len__(self):
        return 10

dataset = RandomDataset()
loader = DataLoader(dataset, batch_size=1)
data = next(loader())

In Paddle <= 2.0, data is in format '[Tensor(shape=(1, 2, 3), dtype=float32)]', and in Paddle >= 2.1, data is in format 'Tensor(shape=(1, 2, 3), dtype=float32)'





Epoch:1, batch:658, loss:1.3957


Epoch:2, batch:658, loss:1.4755


Epoch:3, batch:658, loss:1.0948


Epoch:4, batch:658, loss:1.1913


Epoch:5, batch:658, loss:0.9537


Epoch:6, batch:658, loss:0.8333


Epoch:7, batch:658, loss:0.8670


Epoch:8, batch:658, loss:0.7123


Epoch:9, batch:658, loss:0.5120


Epoch:10, batch:658, loss:0.6755


Epoch:11, batch:658, loss:0.5826


Epoch:12, batch:658, loss:0.7171


Epoch:13, batch:658, loss:0.7279


Epoch:14, batch:658, loss:0.5472


Epoch:15, batch:658, loss:0.6205


Epoch:16, batch:658, loss:0.6797


Epoch:17, batch:658, loss:0.7520


Epoch:18, batch:658, loss:0.5729


Epoch:19, batch:658, loss:0.5044


Epoch:20, batch:658, loss:0.6645


Epoch:21, batch:658, loss:0.6159


Epoch:22, batch:658, loss:0.4917


Epoch:23, batch:658, loss:0.5226


Epoch:24, batch:658, loss:0.5689


Epoch:25, batch:658, loss:0.4974


Epoch:26, batch:658, loss:0.5546


Epoch:27, batch:658, loss:0.4441


Epoch:28, batch:658, loss:0.5952


Epoch:29, batch:658, loss:0