In [1]:
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
import tqdm
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
emb = nn.Embedding(10000, 20, padding_idx=0)
inp = torch.tensor([1, 2, 5, 2, 10], dtype=torch.int64)
out = emb(inp)
print(out)
print(out.size())

tensor([[ 1.7727, -0.3389, -0.1840,  0.5645, -0.0775, -0.2983, -1.2093,  0.4237,
          2.0949,  0.8848, -0.7230, -0.2230,  0.0488, -0.2833, -1.4652,  1.5824,
         -1.2820,  0.5358, -1.1885, -0.1016],
        [-0.6956,  1.3488,  0.7089, -0.1382,  0.6998,  0.2334,  0.5400, -0.1324,
          0.8616,  0.0209, -0.2070,  0.2540, -1.8005,  0.1579, -0.8390, -0.0417,
         -0.2288, -0.0323, -0.5640,  1.7304],
        [ 0.0446, -1.3939,  0.9387, -0.8969,  1.5750,  1.2532,  0.7739, -1.4090,
          1.5641,  0.7083, -0.9670,  1.0753, -0.9089,  0.6813,  3.0262, -0.2384,
          0.4439,  0.1285, -0.6647, -1.1171],
        [-0.6956,  1.3488,  0.7089, -0.1382,  0.6998,  0.2334,  0.5400, -0.1324,
          0.8616,  0.0209, -0.2070,  0.2540, -1.8005,  0.1579, -0.8390, -0.0417,
         -0.2288, -0.0323, -0.5640,  1.7304],
        [-0.0880, -0.4981, -0.4940,  1.6404, -0.0312, -0.0764, -0.5956,  1.2367,
         -0.9138,  0.7083,  0.2669, -1.4221,  2.2204, -0.0848, -1.3911,  0.4804,
      

In [3]:
import glob
import pathlib
import re

remove_marks_regex = re.compile("[,\.\(\)\[\]\*:;]<.*?>")
shift_marks_regex = re.compile("([?!])")

def text2ids(text, vocab_dict):
    text = remove_marks_regex.sub("", text)
    text = shift_marks_regex.sub(r" \1 ", text)
    tokens = text.split()
    return [vocab_dict.get(token, 0) for token in tokens]

def list2tensor(token_idxes, max_len=100, padding=True):
    if len(token_idxes) > max_len:
        token_idxes = token_idxes[:max_len]
    n_tokens = len(token_idxes)
    if padding:
        token_idxes = token_idxes + [0] * (max_len - len(token_idxes))
    return torch.tensor(token_idxes, dtype=torch.int64), n_tokens

In [4]:
class IMDBDataset(Dataset):
    def __init__(self, dir_path, train=True, max_len=100, padding=True):
        self.max_len = max_len
        self.padding = padding
        path = pathlib.Path(dir_path)
        vocab_path = path.joinpath("imdb.vocab")
        
        self.vocab_array = vocab_path.open().read().strip().splitlines()
        self.vocab_dict = dict((w, i+1) for (i, w) in enumerate(self.vocab_array))
        
        if train:
            target_path = path.joinpath("train")
        else:
            target_path = path.joinpath("test")
        pos_files = sorted(glob.glob(str(target_path.joinpath("pos/*.txt"))))
        neg_files = sorted(glob.glob(str(target_path.joinpath("neg/*.txt"))))
        self.labeled_files = \
            list(zip([0]*len(neg_files), neg_files)) + \
            list(zip([1]*len(pos_files), pos_files))
        
    @property
    def vocab_size(self):
        return len(self.vocab_array)
    
    def __len__(self):
        return len(self.labeled_files)
    
    def __getitem__(self, idx):
        label, f = self.labeled_files[idx]
        data = open(f).read().lower()
        data = text2ids(data, self.vocab_dict)
        data, n_tokens = list2tensor(data, self.max_len, self.padding)
        return data, label, n_tokens

In [5]:
train_data = IMDBDataset("./data/aclImdb/")
test_data = IMDBDataset("./data/aclImdb/", train=False)
train_loader = DataLoader(train_data, batch_size=32,
                          shuffle=True, num_workers=8)
test_loader = DataLoader(test_data, batch_size=32,
                         shuffle=False, num_workers=8)

In [6]:
class SequenceTaggingNet(nn.Module):
    def __init__(self, num_embeddings, embedding_dim=50, hidden_size=50,
                 num_layers=1, dropout=0.2):
        super().__init__()
        self.emb = nn.Embedding(num_embeddings, embedding_dim, padding_idx=0)
        self.lstm = nn.LSTM(embedding_dim, hidden_size,
                            num_layers, batch_first=True, dropout=dropout)
        self.linear = nn.Linear(hidden_size, 1)
        
    def forward(self, x, h0=None, l=None):
        x = self.emb(x)
        x, h = self.lstm(x, h0)
        if l is not None:
            x = x[list(range(len(x))), l-1, :]
        else:
            x = x[:, -1, :]
        x = self.linear(x)
        x = x.squeeze()
        return x

In [7]:
def eval_net(net, data_loader, device="cpu"):
    net.eval()
    ys = []
    ypreds = []
    for x, y, l in data_loader:
        x = x.to(device)
        y = y.to(device)
        l = l.to(device)
        with torch.no_grad():
            y_pred = net(x, l=l)
            y_pred = (y_pred > 0).long()
            ys.append(y)
            ypreds.append(y_pred)
    ys = torch.cat(ys)
    ypreds = torch.cat(ypreds)
    acc = (ys == ypreds).float().sum() / len(ys)
    return acc.item()

In [8]:
from statistics import mean

net = SequenceTaggingNet(train_data.vocab_size+1, num_layers=2)
net.to("cuda:0")
opt = optim.Adam(net.parameters())
loss_f = nn.BCEWithLogitsLoss()

for epoc in range(10):
    losses = []
    net.train()
    for x, y, l in tqdm.tqdm(train_loader):
        x = x.to("cuda:0")
        y = y.to("cuda:0")
        l = l.to("cuda:0")
        y_pred = net(x, l=l)
        loss = loss_f(y_pred, y.float())
        net.zero_grad()
        loss.backward()
        opt.step()
        losses.append(loss.item())
    train_acc = eval_net(net, train_loader, "cuda:0")
    val_acc = eval_net(net, test_loader, "cuda:0")
    print("epoc: {}\tmean(losses): {:.3f}\ttrain_acc: {:.3f}\tval_acc: {:.3f}".format(
    epoc, mean(losses), train_acc, val_acc))

100%|██████████| 782/782 [00:09<00:00, 80.95it/s]
  0%|          | 0/782 [00:00<?, ?it/s]

epoc: 0	mean(losses): 0.676	train_acc: 0.629	val_acc: 0.607


100%|██████████| 782/782 [00:09<00:00, 83.73it/s]
  0%|          | 0/782 [00:00<?, ?it/s]

epoc: 1	mean(losses): 0.603	train_acc: 0.742	val_acc: 0.698


100%|██████████| 782/782 [00:09<00:00, 83.87it/s]
  0%|          | 0/782 [00:00<?, ?it/s]

epoc: 2	mean(losses): 0.498	train_acc: 0.815	val_acc: 0.747


100%|██████████| 782/782 [00:09<00:00, 84.15it/s]
  0%|          | 0/782 [00:00<?, ?it/s]

epoc: 3	mean(losses): 0.410	train_acc: 0.861	val_acc: 0.768


100%|██████████| 782/782 [00:09<00:00, 84.18it/s]
  0%|          | 0/782 [00:00<?, ?it/s]

epoc: 4	mean(losses): 0.381	train_acc: 0.847	val_acc: 0.744


100%|██████████| 782/782 [00:09<00:00, 83.54it/s]
  0%|          | 0/782 [00:00<?, ?it/s]

epoc: 5	mean(losses): 0.325	train_acc: 0.891	val_acc: 0.773


100%|██████████| 782/782 [00:09<00:00, 84.46it/s]
  0%|          | 0/782 [00:00<?, ?it/s]

epoc: 6	mean(losses): 0.280	train_acc: 0.917	val_acc: 0.778


100%|██████████| 782/782 [00:09<00:00, 83.70it/s]
  0%|          | 0/782 [00:00<?, ?it/s]

epoc: 7	mean(losses): 0.234	train_acc: 0.937	val_acc: 0.780


100%|██████████| 782/782 [00:09<00:00, 84.21it/s]
  0%|          | 0/782 [00:00<?, ?it/s]

epoc: 8	mean(losses): 0.193	train_acc: 0.946	val_acc: 0.773


100%|██████████| 782/782 [00:09<00:00, 83.17it/s]


epoc: 9	mean(losses): 0.162	train_acc: 0.963	val_acc: 0.770


In [9]:
from sklearn.datasets import load_svmlight_file
from sklearn.linear_model import LogisticRegression

train_X, train_y = load_svmlight_file("./data/aclImdb/train/labeledBow.feat")
test_X, test_y = load_svmlight_file("./data/aclImdb/test/labeledBow.feat")

model = LogisticRegression(C=0.1, max_iter=5000)
model.fit(train_X[:, :test_X.shape[1]], train_y[:test_y.shape[0]])
train_score = model.score(train_X[:, :test_X.shape[1]], train_y[:test_y.shape[0]])
test_score = model.score(test_X, test_y)
print(train_score, test_score)

0.93132 0.394


In [10]:
class SequenceTaggingNet2(SequenceTaggingNet):
    def forward(self, x, h0=None, l=None):
        x = self.emb(x)
        if l is not None:
            x = nn.utils.rnn.pack_padded_sequence(x, l, batch_first=True)
        x, h = self.lstm(x, h0)
        if l is not None:
            hidden_state, cell_state = h
            x = hidden_state[-1]
        else:
            x = x[:, -1, :]
        
        x = self.linear(x).squeeze()
        return x

In [11]:
def eval_net2(net, data_loader, device="cpu"):
    net.eval()
    ys = []
    ypreds = []
    for x, y, l in data_loader:
        l, sort_idx = torch.sort(l, descending=True)
        x = x[sort_idx]
        y = y[sort_idx]
        
        x = x.to(device)
        y = y.to(device)
        l = l.to(device)
        with torch.no_grad():
            y_pred = net(x, l=l)
            y_pred = (y_pred > 0).long()
            ys.append(y)
            ypreds.append(y_pred)
    ys = torch.cat(ys)
    ypreds = torch.cat(ypreds)
    acc = (ys == ypreds).float().sum() / len(ys)
    return acc.item()

In [12]:
net = SequenceTaggingNet2(train_data.vocab_size+1, num_layers=2)
net.to("cuda:0")
opt = optim.Adam(net.parameters())
loss_f = nn.BCEWithLogitsLoss()

for epoch in range(10):
    losses = []
    net.train()
    for x, y, l in tqdm.tqdm(train_loader):
        l, sort_idx = torch.sort(l, descending=True)
        x = x[sort_idx]
        y = y[sort_idx]
        
        x = x.to("cuda:0")
        y = y.to("cuda:0")
        l = l.to("cuda:0")
        
        y_pred = net(x, l=l)
        loss = loss_f(y_pred, y.float())
        net.zero_grad()
        loss.backward()
        opt.step()
        losses.append(loss.item())
    train_acc = eval_net2(net, train_loader, "cuda:0")
    val_acc = eval_net2(net, test_loader, "cuda:0")
    print("epoch: {}\tmean(losses): {:.3f}\ttrain_acc: {:.3f}\tval_acc: {:.3f}".format(
    epoch, mean(losses), train_acc, val_acc))

100%|██████████| 782/782 [00:10<00:00, 72.54it/s]
  0%|          | 0/782 [00:00<?, ?it/s]

epoch: 0	mean(losses): 0.688	train_acc: 0.542	val_acc: 0.543


100%|██████████| 782/782 [00:10<00:00, 71.96it/s]
  0%|          | 0/782 [00:00<?, ?it/s]

epoch: 1	mean(losses): 0.681	train_acc: 0.592	val_acc: 0.561


100%|██████████| 782/782 [00:10<00:00, 72.31it/s]
  0%|          | 0/782 [00:00<?, ?it/s]

epoch: 2	mean(losses): 0.660	train_acc: 0.690	val_acc: 0.650


100%|██████████| 782/782 [00:10<00:00, 71.67it/s]
  0%|          | 0/782 [00:00<?, ?it/s]

epoch: 3	mean(losses): 0.575	train_acc: 0.732	val_acc: 0.676


100%|██████████| 782/782 [00:10<00:00, 72.04it/s]
  0%|          | 0/782 [00:00<?, ?it/s]

epoch: 4	mean(losses): 0.487	train_acc: 0.824	val_acc: 0.739


100%|██████████| 782/782 [00:10<00:00, 72.97it/s]
  0%|          | 0/782 [00:00<?, ?it/s]

epoch: 5	mean(losses): 0.422	train_acc: 0.861	val_acc: 0.757


100%|██████████| 782/782 [00:10<00:00, 72.92it/s]
  0%|          | 0/782 [00:00<?, ?it/s]

epoch: 6	mean(losses): 0.351	train_acc: 0.890	val_acc: 0.765


100%|██████████| 782/782 [00:10<00:00, 73.99it/s]
  0%|          | 0/782 [00:00<?, ?it/s]

epoch: 7	mean(losses): 0.296	train_acc: 0.916	val_acc: 0.771


100%|██████████| 782/782 [00:10<00:00, 73.17it/s]
  0%|          | 0/782 [00:00<?, ?it/s]

epoch: 8	mean(losses): 0.249	train_acc: 0.921	val_acc: 0.760


100%|██████████| 782/782 [00:10<00:00, 72.33it/s]


epoch: 9	mean(losses): 0.205	train_acc: 0.952	val_acc: 0.767


### 文章生成

In [13]:
import string
all_chars = string.printable
print(all_chars)
vocab_size = len(all_chars)
vocab_dict = dict((c, i) for i, c in enumerate(all_chars))

def str2ints(s, vocab_dict):
    return [vocab_dict[c] for c in s]

def ints2str(x, vocab_array):
    return "".join([vocab_array[i] for i in x])

0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ 	



In [14]:
class ShakespeareDataset(Dataset):
    def __init__(self, path, chunk_size=200):
        data = str2ints(open(path).read().strip(), vocab_dict)
        data = torch.tensor(data, dtype=torch.int64).split(chunk_size)
        if len(data[-1]) < chunk_size:
            data = data[:-1]
        self.data = data
        self.n_chunks = len(self.data)
        
    def __len__(self):
        return self.n_chunks
    
    def __getitem__(self, idx):
        return self.data[idx]

In [15]:
ds = ShakespeareDataset("./data/tinyshakespeare.txt")
loader = DataLoader(ds, batch_size=32, shuffle=True, num_workers=8)

In [16]:
class SequenceGenerationNet(nn.Module):
    def __init__(self, num_embeddings, embedding_dim=50, hidden_size=50,
                 num_layers=1, dropout=0.2):
        super().__init__()
        self.emb = nn.Embedding(num_embeddings, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_size, num_layers,
                            batch_first=True, dropout=dropout)
        self.linear = nn.Linear(hidden_size, num_embeddings)
        
    def forward(self, x, h0=None):
        x = self.emb(x)
        x, h = self.lstm(x, h0)
        x = self.linear(x)
        return x, h

In [17]:
def generate_seq(net, start_phrase="The King said ", length=200,
                 temperature=0.8, device="cpu"):
    net.eval()
    result = []
    
    start_tensor = torch.tensor(str2ints(start_phrase, vocab_dict),
                                dtype=torch.int64).to(device)
    x0 = start_tensor.unsqueeze(0)
    o, h = net(x0)
    out_dist = o[:, -1].view(-1).exp()
    top_i = torch.multinomial(out_dist, 1)[0]
    result.append(top_i)
    
    for i in range(length):
        inp = torch.tensor([[top_i]], dtype=torch.int64)
        inp = inp.to(device)
        o, h = net(inp, h)
        out_dist = o.view(-1).exp()
        top_i = torch.multinomial(out_dist, 1)[0]
        result.append(top_i)
        
    return start_phrase + ints2str(result, all_chars)

In [18]:
import numpy
s = numpy.array([[1, 2], [3, 4]])
print(s[:, -1].reshape(-1))

[2 4]


In [19]:
from statistics import mean

net = SequenceGenerationNet(vocab_size, 20, 50, num_layers=2, dropout=0.1)
net.to("cuda:0")
opt = optim.Adam(net.parameters())
loss_f = nn.CrossEntropyLoss()

for epoch in range(50):
    net.train()
    losses = []
    for data in tqdm.tqdm(loader):
        x = data[:, : -1]
        y = data[:, 1:]
        x = x.to("cuda:0")
        y = y.to("cuda:0")
        
        y_pred, _ = net(x)
        loss = loss_f(y_pred.view(-1, vocab_size), y.view(-1))
        net.zero_grad()
        loss.backward()
        opt.step()
        losses.append(loss.item())
        
    print("epoch:{}\tmean(losses):{:.3f}".format(epoch, mean(losses)))
    with torch.no_grad():
        print(generate_seq(net, device="cuda:0"))

100%|██████████| 175/175 [00:02<00:00, 60.23it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:0	mean(losses):3.490
The King said :de' mwTliain
nITp hciorfni
a`nhWbtOiRedtouc ,lrn Whbe:htao2metfCnn htsaboh:;rl  hm,'wh,s abAd n
 i ;Hbe dbdo  I ,lahnhetteeh+o o, adrmbs leh Peao:loekRos tpsoetHLAroR
a fLypedIfieked
a ioIdIiiOto ,aro


100%|██████████| 175/175 [00:03<00:00, 57.01it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:1	mean(losses):3.225
The King said t, taer admn ch t
recmngreit! erordraeim g hko sbod Lmouewh,f nto tsno mEfg oy nsy hi.oeoaw ed
Mi i
Ae iaLb li, yb wCshde ahwi sccyshhscyalop esawkemo rotwh
. lo  hsl yu t d?ydturMplayr
e IIt  maesfon 


100%|██████████| 175/175 [00:03<00:00, 55.20it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:2	mean(losses):2.817
The King said fdl. it at tuy ixh bt erutorl mhnaldy Bon thOise

MI&KOYUOP
AOESEW:
(asl Nhiort anl maons whoe eos anilr iriuir bidnpyirdr; oa,,r tisn,
Tiou stnanesls.

: aws agideso ft Slhhn tooe seg! msfeld mritd gv


100%|██████████| 175/175 [00:02<00:00, 63.27it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:3	mean(losses):2.525
The King said alt
:
Anst core hir to lam, in on to maes fis adon, awlrff te by I lifmgiglrre,,
Mourh cmie
Whinm be le ades I puourwendh
I done coas hiclinerifu shamin se, tetell mo?

COve yor fat the,
 Iens chanr th


100%|██████████| 175/175 [00:02<00:00, 62.51it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:4	mean(losses):2.377
The King said hy whjy thet wis agend; cimasheed Sorle arle bi!
3ate be to the tive thee to;
Ecfos id.
iran, mam socut wore heatfr ame ace tmewr
oicasd chany oaet thke rith soun:
Wy hougln: yEminco?
Thaam
Bhave chive


100%|██████████| 175/175 [00:02<00:00, 59.48it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:5	mean(losses):2.292
The King said ward han,
And unnetass
Thetae enkinchrerd blees Rothend tereacilchad,
Sotid.

DUMINGS:
Irthe seel ad than not gipde:
MGoun lench yog ber
Tese tho siy faas wowt,, havet tinot bonneand,
An the les id Pth


100%|██████████| 175/175 [00:02<00:00, 60.50it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:6	mean(losses):2.228
The King said forlirlaip wot mround Fout-toreom
Thke chicle fod,
'ime nicfems thevt teabbut,
Iu and,
engri'd Got thtife,
Ansls tinves of promind ba marrhaos saaritwewthe the hath futy.
Lat mine, math sard mee he the


100%|██████████| 175/175 [00:02<00:00, 62.37it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:7	mean(losses):2.173
The King said kopchers not swade pestenk, freculy a yaat, whour thitiy thoo!
the thir, igart wulc,
To a withey thou:
Nou-s
'er sroolld a theese or mapaulches thop inge he the dleestare!
O I fut ad arghlofsru nelleal


100%|██████████| 175/175 [00:03<00:00, 57.97it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:8	mean(losses):2.124
The King said hagh of Reestaly thee hir hid a, hitlommhy bu'd stue, alk a angt throm hundy thou to linty then I hout im wap, whors, frad! Agt'ch.
/wet, lileoned the in,
I be thou, heuch betertor tucpnof;
dod sore,
A


100%|██████████| 175/175 [00:02<00:00, 58.62it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:9	mean(losses):2.083
The King said ite wim begtealter, war, fo titarteascernd the filling, fo sto to cnourer,
There? thiid gowtllore deringury wouth
eres' vatace 3Bhobly buth alk, of thoe all,
I wici-till all? Felut
Freerse so he cipend


100%|██████████| 175/175 [00:03<00:00, 58.31it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:10	mean(losses):2.049
The King said the stloud:
And shorthlee se cardems:
incostiag ure,
Burt.

COLANGUES:
Bothing
theit me steen foly hy burs and ticfoys will.

I:
Bowe lering, by Trouvour's on dile goaod
Thince my nasty we'd lors soaru


100%|██████████| 175/175 [00:02<00:00, 59.78it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:11	mean(losses):2.021
The King said it the skakhend the wheiliy she fay, I my mam,
Buts, tey is thach hen tirke,
Our the, of hy he drothars, ort not ix Ore cunad suach to not with thant not you and butwac hiturict make your fon toBcroon



100%|██████████| 175/175 [00:02<00:00, 58.92it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:12	mean(losses):1.996
The King said the Rerisse-ne perod tell in shing by wirdos guos, at not fake! to'mere Frold hove inge, Fost here coneritpres-

PLOBENTIA:
So oice
Tfor it I legiend.

IUKEN ERFIRA:
I he for to not and in y bath.

LOU


100%|██████████| 175/175 [00:02<00:00, 60.70it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:13	mean(losses):1.974
The King said Privedtickmezets a wory lecherst heart huner's my thee balle: I darlin bires, so I ar's dands,
And nelia pribe brebent will lal!
Wide cunchs!'
Rattent siad will ut gigattelt
the spach Appyge daingald, 


100%|██████████| 175/175 [00:03<00:00, 57.53it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:14	mean(losses):1.952
The King said men have to you his kavimy will and harly Maranridess. telk of who wurse thnows you to peinding nor with to beim
ese a ruth themstelt; cordarts,
Rith he ashousour, and for frar be mime, stleims and sha


100%|██████████| 175/175 [00:02<00:00, 58.95it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:15	mean(losses):1.930
The King said ud.
Ard did's as be bladed
Thage cosk withperows shals deaphore nod
Wo mes hing the storling,
Did had thee bleer.

LEONTESS:
To go bo-wipe of why tay you heoy sinknn
to thou so no whal the bede,
And na


100%|██████████| 175/175 [00:03<00:00, 57.81it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:16	mean(losses):1.910
The King said wort me thathc manry.

CYORETCY:
Nell stat, and heaegour to haow and witip, will five, be lired one so eljropielks:
With youf hon, you that her fain to fritiegen are maruty: my gands,--wors accost sard


100%|██████████| 175/175 [00:02<00:00, 59.18it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:17	mean(losses):1.894
The King said thuft elver not mere.

WAMYINIU:
Toakes that knowe if what cather perange.

TROMICOLIAR:
Whit of by Folled's, nos not with. O redast, of mes acerou!

PLAMIAS:
O molch well thou thrights hatayen mone re


100%|██████████| 175/175 [00:02<00:00, 59.13it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:18	mean(losses):1.878
The King said Haaded as sow; my the somfe.

JOMTENETIAT:
No, then this hiwt
the kifince there thou a gone in sot.

Fir rod:
A whas to sice for, home ackmut, ceak, alllings 'wide.

Sillning

TEUNE:
Now your it me of 


100%|██████████| 175/175 [00:02<00:00, 60.25it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:19	mean(losses):1.865
The King said for tirke,
Then ear, and that; I syraed is's sfatksbel freants of fortion, hast they vave me all.

BALFCANT:
God thry eak sand to there wost evint---withto duds gosbendie.

AUPESTIP:
Ay man not is'ing 


100%|██████████| 175/175 [00:02<00:00, 63.02it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:20	mean(losses):1.854
The King said of combrecs'se sood
Whie pyeluve uney; whoth porse, Kall sives: I ga; speir preed you landiod
hir popn that howe her home, swey waindly as is herp thy prvarkt.

KING ET:
Id canafeivever farse;
You, thy


100%|██████████| 175/175 [00:02<00:00, 59.71it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:21	mean(losses):1.842
The King said lirching shatled were at,
Lot a sach your vevere hid of whres.

PETUTEBBOUPE:
Whipce the shome Gearlys goy reat?

ROMHUKH:
Hard'd sterrious insh your douting
Ulloy wear you karent a what her misall age


100%|██████████| 175/175 [00:02<00:00, 60.55it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:22	mean(losses):1.831
The King said ruspers; be lie your you, say;
hir preanpoming owes, the lain gour?

QUPEO:
To wichy Creake, lors matiess, the sore inour:
Why he he gruch my word thes and best your mishich
It, igh! and'lling riess il


100%|██████████| 175/175 [00:03<00:00, 56.97it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:23	mean(losses):1.822
The King said and troier it as wates,
With I cort's tese is connowd.

ENRANT:
I hast fisure, not as some.

First ewsen:
Your Rome think
Withimass, and to what woltome liusens art.

BRINCET

IZENEK:
She krop praief, 


100%|██████████| 175/175 [00:02<00:00, 61.15it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:24	mean(losses):1.813
The King said are slonnenca our worldese
The fispury's, I know,: I coanlnoneng
For dive he shy 'hiss staughtil whigh therethice and fead.
He camLing no singelf: not are, cholid;
Droovous, blick, and what rewsters hi


100%|██████████| 175/175 [00:02<00:00, 61.13it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:25	mean(losses):1.805
The King said yourerthand tepter.

LUCIO:
'I own do is, so his, and your wards,
Ewpornay, decuttle hampelcied!

JULIET:

RADIO:
O; sad spatis anitur imon encere firlaliave,
Whose, mesean: a kone:
If madione montio d


100%|██████████| 175/175 [00:02<00:00, 59.03it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:26	mean(losses):1.798
The King said bly, had tust but:
God's not weres that to untrue detclame', his depied, anly,
Beal confired he lids, yours, be athill I argha:
he this my wall be offy me the deif.

RICHESS

PuonneschtAll on you
Hould


100%|██████████| 175/175 [00:02<00:00, 58.80it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:27	mean(losses):1.789
The King said shall he have ible
For terury with that furrot the piluck;
That as desall
The pay thou in shlaised. Where thear
If and this the sears I day to so thes; to bees,
As seen for love with offment my wroy ha


100%|██████████| 175/175 [00:03<00:00, 58.30it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:28	mean(losses):1.781
The King said of stry then, no willfus,
Nuvish her call lickcant ender,
Whis is the gother sopd the paster,
On, Calneds, nobly was the and forguar the hare.

VOUTBET:
There Cmasse tell his heath, felling's
Transs ha


100%|██████████| 175/175 [00:02<00:00, 58.72it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:29	mean(losses):1.775
The King said dlining to the peyrard all
Sishialy dock and gising abodey there dight
mish in it's is a fhatter!

COMILANDZHARI:
Now Epwicle insting it Bpay dispersin.

RALINS:
And of!
to, no perore cleish the qounke


100%|██████████| 175/175 [00:02<00:00, 60.55it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:30	mean(losses):1.768
The King said the braidher that so.

CSAMENCO:
My
Im you says? goung anings baves cather,
He woulder nef instay not it it the rancose:
I, Rastharks, fhal a kire ut that thus eeps,
And Bhenmy for upent with's did. Th


100%|██████████| 175/175 [00:02<00:00, 62.78it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:31	mean(losses):1.761
The King said Dand rike,
This mishated my means so a rainer;
That the live, yours or thy like thou: atfied scones.

TLANCE:
I grom the ficcee, brys
by a firrut lick wall in a throp doye
If I envivign my burd
cerchio


100%|██████████| 175/175 [00:02<00:00, 64.43it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:32	mean(losses):1.755
The King said that, you knong,
My voak. Now-sear than here, and dedsistein;
It chattion House have-batter's, a
he the casilly shamr.

CILIUBS:
Qish sin word the moles once thou wisulate told.

AUTOLYCE:
What I know 


100%|██████████| 175/175 [00:02<00:00, 58.72it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:33	mean(losses):1.749
The King said shave on will:
Let a lustress my near?

GOcEONZALE:
Her farell vift is and words be tone hath inks notes,
Wruch of hese 'wave of that unthak's are light that.

LADRY BIO:
Yeir thee the maunote wordiadh


100%|██████████| 175/175 [00:03<00:00, 55.58it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:34	mean(losses):1.742
The King said your the are epless!
That Evire brechone we sin thich it in loml daith,
The gone ries reer cacked haf in the comer of I
Ast tish range, there micules a reveqireld much.

HARTINGO:
Ega, my and his that 


100%|██████████| 175/175 [00:03<00:00, 58.02it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:35	mean(losses):1.737
The King said on thy nabrought thou in shows
Out disparin the and to pentreating pentered us.

POMPEY:
Shall for Gecough of onour Jone,
And ay than yoarfore be thou more:
An be to Rolwry, the paid great, that beap e


100%|██████████| 175/175 [00:02<00:00, 61.95it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:36	mean(losses):1.731
The King said the lize. Yet Jento eets, priet;
With of is dich les, he loves of you hasbrank
O king us doth lerness my for her inting.

JORY:
I'll bear shore his Lords his slembrom to pleent
A baisheb be sulden no s


100%|██████████| 175/175 [00:02<00:00, 60.92it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:37	mean(losses):1.726
The King said presise:
I'll that siny you shick tither make man
That came evidtles Rikele and Wolday'sw molef-more
Fria canciest with she comect'd saul?

QUEEN ELIZABESTW:
It hold and I cumeing, no must ip did,
Aada


100%|██████████| 175/175 [00:02<00:00, 60.73it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:38	mean(losses):1.721
The King said be a malf flangets more
by seel the pleadlentieiss the will, thy chinger.

Sirwron:
Whefely whrawta, not is sice;
To-rong, my good my not marry welln their wad,
Aidss afcurea; for himtail shonest queen


100%|██████████| 175/175 [00:02<00:00, 58.48it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:39	mean(losses):1.717
The King said did hear is losour see.

GLOUCESTER:
Now nor the to honour I all: the be in that you this to his
Illy let adey my prarjoit, pofer we the
I comancion's nofence mine no makself,
Baly I hasseings ustiage,


100%|██████████| 175/175 [00:02<00:00, 59.61it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:40	mean(losses):1.711
The King said when Jaice add ip the
Friendous life poor you sporkble ever uping
That you his py had a steze on ward.

Pwond:
But you see, what way grow by the king.

Weren:
How, deestly that we was wind your more an


100%|██████████| 175/175 [00:02<00:00, 58.53it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:41	mean(losses):1.708
The King said there or the look show, and, can
To is I whill dight lived in to stoplicord you hother,
And dasemortsed it that love me! eld.

PETRUCHIO:
Kill the parries of vause; king firs to be
This will that Marem


100%|██████████| 175/175 [00:02<00:00, 59.73it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:42	mean(losses):1.704
The King said barible hath pumes to deed her me.

ALBOGKE:
Sand aming; which mean, and begent line pleated,

KING RICHARD III:
Do beal not faossed when did did his green.
Way you cal upon is a gachich be hang
Dand a


100%|██████████| 175/175 [00:02<00:00, 62.02it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:43	mean(losses):1.700
The King said a goodnd of 'tud then.

TERKEN:
Frant all tall shand of find my friends, that clows
Come time chance bestell from alcels give poor upbook.

CAPRLOLINS:
May clows, father are pranganet, plidigpevil,
At,


100%|██████████| 175/175 [00:02<00:00, 59.58it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:44	mean(losses):1.696
The King said thy crosted fly it passiis.

YORK:
Hin all to tims his king badot: take mister that creef,
Thind your dost man when that I have as blomry:
And in thou that men, he cloise craee as, she nother
For a gre


100%|██████████| 175/175 [00:02<00:00, 62.05it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:45	mean(losses):1.693
The King said the from. Whrichle his age,
Day dear come with not our will fioder beast
The tright all whithal kils,
The sorry world for our austy, Maredhe'd
Mighnd is their weave Engech shord, bids what
not this my 


100%|██████████| 175/175 [00:02<00:00, 60.00it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:46	mean(losses):1.689
The King said on are, stonemy loonted
Min to she reber love to panous, I duke,
This will aticers tooth thuch to verrelice and heaved:
I was a not of it my minesth of I bond
of caught.

First Srovenger:
Aseer Larecio


100%|██████████| 175/175 [00:02<00:00, 59.03it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:47	mean(losses):1.686
The King said and in vieple, and the'chelds;
Where dean his pariss talk the gave but to cheace there
what up with enteoune parwar Lord lick Pay
Even so ip: I heeg in the juce to make my lords
Nou all thich that wela


100%|██████████| 175/175 [00:02<00:00, 58.40it/s]
  0%|          | 0/175 [00:00<?, ?it/s]

epoch:48	mean(losses):1.683
The King said curnen som lay featot them.
Make seal not a kibt is grace, and I love
Tuves and holy say with haw parn my lord,
My tome miseling beaster frummy.

LADY BAFDE:
That father alver me, her sir, it their hai


100%|██████████| 175/175 [00:02<00:00, 59.48it/s]


epoch:49	mean(losses):1.679
The King said was banith!

CLOMENORIO:
And as not this wold I holse weel: should drows:
And sains bebit of the fruster: strive
But you it chance, and not blegnicuponts.

HENRY BOLWARCE:
What take vincence!

KING RIC


### Encoder-Decoderモデルによる機械翻訳

In [20]:
import re
import collections
import itertools

remove_marks_regex = re.compile("[\,\(\)\[\]\*:;¿¡]|<.*?>")
shift_marks_regex = re.compile("([?!\.])")

unk = 0
sos = 1
eos = 2

def normalize(text):
    text = text.lower()
    text = remove_marks_regex.sub("", text)
    text = shift_marks_regex.sub(r" \1 ", text)
    return text
    
def parse_line(line):
    line = normalize(line.strip())
    src, trg, othor = line.split("\t")
    src_tokens = src.strip().split()
    trg_tokens = trg.strip().split()
    return src_tokens, trg_tokens

def build_vocab(tokens):
    counts = collections.Counter(tokens)
    sorted_counts = sorted(counts.items(), key=lambda c: c[1], reverse=True)
    word_list = ["<UNK>", "<SOS>", "<EOS>"] + [x[0] for x in sorted_counts]
    word_dict = dict((w, i) for i, w in enumerate(word_list))
    return word_list, word_dict

def words2tensor(words, word_dict, max_len, padding=0):
    words = words + ["<EOS>"]
    words = [word_dict.get(w, 0) for w in words]
    seq_len = len(words)
    if seq_len < max_len + 1:
        words = words + [padding] * (max_len + 1 - seq_len)
    return torch.tensor(words,dtype=torch.int64), seq_len

In [21]:
class TranslationPairDataset(Dataset):
    def __init__(self, path, max_len=15):
        def filter_pair(p):
            return not (len(p[0]) > max_len or len(p[1]) > max_len)
        
        with open(path) as fp:
            pairs = map(parse_line, fp)
            pairs = filter(filter_pair, pairs)
            pairs = list(pairs)
        '''
        pairs = []
        with open(path) as fp:
            lines = fp.readlines()
            for line in lines:
                pair = map(parse_line, line)
                pair = filter(filter_pair, pair)
                pairs.append(list(pair))
        '''
        src = [p[0] for p in pairs]
        trg = [p[1] for p in pairs]
        self.src_word_list, self.src_word_dict = \
            build_vocab(itertools.chain.from_iterable(src))
        self.trg_word_list, self.trg_word_dict = \
            build_vocab(itertools.chain.from_iterable(trg))
        self.src_data = [words2tensor(words, self.src_word_dict, max_len)
                         for words in src]
        self.trg_data = [words2tensor(words, self.trg_word_dict, max_len, -100)
                         for words in trg]
        
    def __len__(self):
        return len(self.src_data)
    
    def __getitem__(self, idx):
        src, lsrc = self.src_data[idx]
        trg, ltrg = self.trg_data[idx]
        return src, lsrc, trg, ltrg

In [22]:
batch_size = 64
max_len = 10
path = "./data/spa-eng/spa.txt"
ds = TranslationPairDataset(path, max_len=max_len)
loader = DataLoader(ds, batch_size=batch_size, shuffle=True, num_workers=8)

In [23]:
class Encoder(nn.Module):
    def __init__(self, num_embeddings, embedding_dim, hidden_size=50,
                 num_layers=1, dropout=0.2):
        super().__init__()
        self.emb = nn.Embedding(num_embeddings, embedding_dim, padding_idx=0)
        self.lstm = nn.LSTM(embedding_dim, hidden_size, num_layers,
                            batch_first=True, dropout=dropout)
        
    def forward(self, x, h0=None, l=None):
        x = self.emb(x)
        if l is not None:
            x = nn.utils.rnn.pack_padded_sequence(x, l, batch_first=True)
        _, h = self.lstm(x, h0)
        return h

In [24]:
class Decoder(nn.Module):
    def __init__(self, num_embeddings, embedding_dim=50, hidden_size=50,
                 num_layers=1, dropout=0.2):
        super().__init__()
        self.emb = nn.Embedding(num_embeddings, embedding_dim, padding_idx=0)
        self.lstm = nn.LSTM(embedding_dim, hidden_size, num_layers,
                            batch_first=True, dropout=dropout)
        self.linear = nn.Linear(hidden_size, num_embeddings)
        
    def forward(self, x, h, l=None):
        x = self.emb(x)
        if l is not None:
            x = nn.utils.rnn.pack_padded_sequence(x, l, batch_first=True)
        x, h = self.lstm(x, h)
        if l is not None:
            x = nn.utils.rnn.pad_packed_sequence(
                x, batch_first=True, padding_value=0)[0]
        x = self.linear(x)
        return x, h

In [25]:
def translate(input_str, enc, dec, max_len=15, device="cpu"):
    words = normalize(input_str).split()
    input_tensor, seq_len = words2tensor(words, ds.src_word_dict,
                                         max_len=max_len)
    input_tensor = input_tensor.unsqueeze(0)
    seq_len = [seq_len]
    sos_inputs = torch.tensor(sos, dtype=torch.int64)
    input_tensor = input_tensor.to(device)
    sos_inputs = sos_inputs.to(device)
    
    ctx = enc(input_tensor, l=seq_len)
    z = sos_inputs
    h = ctx
    results = []
    for i in range(max_len):
        o, h = dec(z.view(1, 1), h)
        wi = o.detach().view(-1).max(0)[1]
        if wi.item() == eos:
            break
        results.append(wi.item())
        z = wi
    return " ".join(ds.trg_word_list[i] for i in results)

In [26]:
enc = Encoder(len(ds.src_word_list), 100, 100, 2)
dec = Decoder(len(ds.trg_word_list), 100, 100, 2)
translate("I am a student.", enc, dec)

'pulpo pulpo caliéntese ubicaron ubicaron ubicaron ubicaron rescatado rescatado caminamos defendí carcajadas malgastado alegres alegres'

In [27]:
enc.to("cuda:0")
dec.to("cuda:0")
opt_enc = optim.Adam(enc.parameters(), 0.002)
opt_dec = optim.Adam(dec.parameters(), 0.002)
loss_f = nn.CrossEntropyLoss()

In [28]:
def to2D(x):
    shapes = x.shape
    return x.reshape(shapes[0] * shapes[1], -1)

for epoch in range(30):
    enc.train(), dec.train()
    losses = []
    for x, lx, y, ly in tqdm.tqdm(loader):
        lx, sort_idx = lx.sort(descending=True)
        x, y, ly = x[sort_idx], y[sort_idx], ly[sort_idx]
        x, y = x.to("cuda:0"), y.to("cuda:0")
        ctx = enc(x, l=lx)
        
        ly, sort_idx = ly.sort(descending=True)
        y = y[sort_idx]
        h0 = (ctx[0][:, sort_idx, :], ctx[1][:, sort_idx, :])
        z = y[:, :-1].detach()
        z[z==-100] = 0
        
        o, _ = dec(z, h0, l=ly-1)
        loss = loss_f(to2D(o[:]), to2D(y[:, 1:max(ly)]).squeeze())
        enc.zero_grad(), dec.zero_grad()
        loss.backward()
        opt_enc.step(), opt_dec.step()
        losses.append(loss.item())
        
    enc.eval(), dec.eval()
    print(epoch, mean(losses))
    with torch.no_grad():
        print(translate("I am a student.",
                        enc, dec, max_len=max_len, device="cuda:0"))
        print(translate("He likes to eat pizza.",
                        enc, dec, max_len=max_len, device="cuda:0"))
        print(translate("She is my mother.",
                        enc, dec, max_len=max_len, device="cuda:0"))

100%|██████████| 1666/1666 [00:32<00:00, 51.88it/s]
  0%|          | 0/1666 [00:00<?, ?it/s]

0 5.3914137713763175
un gran .
que tom se se se se se se se se
mi padre .


100%|██████████| 1666/1666 [00:31<00:00, 52.19it/s]
  0%|          | 0/1666 [00:00<?, ?it/s]

1 3.38359257181724
un gran .
que tom se se se se se quedó .
mi padre .


100%|██████████| 1666/1666 [00:31<00:00, 52.38it/s]
  0%|          | 0/1666 [00:00<?, ?it/s]

2 2.1486862899304007
un estudiante .
a tom .
mi nombre .


100%|██████████| 1666/1666 [00:32<00:00, 51.92it/s]
  0%|          | 0/1666 [00:00<?, ?it/s]

3 1.6488154805293318
un estudiante .
tom .
mi madre .


100%|██████████| 1666/1666 [00:32<00:00, 51.85it/s]
  0%|          | 0/1666 [00:00<?, ?it/s]

4 1.4034806683498557
un estudiante .
como bailar .
mi madre .


100%|██████████| 1666/1666 [00:32<00:00, 51.56it/s]
  0%|          | 0/1666 [00:00<?, ?it/s]

5 1.2369211019158792
un estudiante .
tom como comer .
mi madre .


100%|██████████| 1666/1666 [00:32<00:00, 51.63it/s]
  0%|          | 0/1666 [00:00<?, ?it/s]

6 1.108269841194439
un estudiante .
como comer .
mi madre .


100%|██████████| 1666/1666 [00:32<00:00, 52.03it/s]
  0%|          | 0/1666 [00:00<?, ?it/s]

7 1.008827165073278
un estudiante .
como comer .
mi madre .


100%|██████████| 1666/1666 [00:32<00:00, 52.05it/s]
  0%|          | 0/1666 [00:00<?, ?it/s]

8 0.9271537867628512
un estudiante .
como mary .
mi madre .


100%|██████████| 1666/1666 [00:31<00:00, 52.40it/s]
  0%|          | 0/1666 [00:00<?, ?it/s]

9 0.8612521870010326
un estudiante .
como comer .
mi madre .


100%|██████████| 1666/1666 [00:32<00:00, 51.98it/s]
  0%|          | 0/1666 [00:00<?, ?it/s]

10 0.805917329719516
ser estudiante .
como comer estúpido .
mi madre .


100%|██████████| 1666/1666 [00:32<00:00, 51.87it/s]
  0%|          | 0/1666 [00:00<?, ?it/s]

11 0.7603191016387253
ser estudiante .
como comer estúpido .
mi madre .


100%|██████████| 1666/1666 [00:31<00:00, 53.61it/s]
  0%|          | 0/1666 [00:00<?, ?it/s]

12 0.7203978111012643
un estudiante .
como practicar estúpido .
mi madre .


100%|██████████| 1666/1666 [00:31<00:00, 52.66it/s]
  0%|          | 0/1666 [00:00<?, ?it/s]

13 0.6858058252326008
un estudiante .
como comer .
mi madre .


100%|██████████| 1666/1666 [00:31<00:00, 52.59it/s]
  0%|          | 0/1666 [00:00<?, ?it/s]

14 0.656260742896459
ser estudiante .
como comer estúpido .
mi madre .


100%|██████████| 1666/1666 [00:31<00:00, 52.24it/s]
  0%|          | 0/1666 [00:00<?, ?it/s]

15 0.6294121833301248
ser estudiante .
como comer estúpido .
mi madre .


100%|██████████| 1666/1666 [00:30<00:00, 53.96it/s]
  0%|          | 0/1666 [00:00<?, ?it/s]

16 0.6056016509880682
un estudiante .
comer como carne .
mi madre .


100%|██████████| 1666/1666 [00:31<00:00, 52.99it/s]
  0%|          | 0/1666 [00:00<?, ?it/s]

17 0.5843026137688294
ser estudiante .
como comer .
mi madre .


100%|██████████| 1666/1666 [00:31<00:00, 52.98it/s]
  0%|          | 0/1666 [00:00<?, ?it/s]

18 0.5659981964587593
ser estudiante .
como comer estúpido .
mi madre .


100%|██████████| 1666/1666 [00:31<00:00, 53.12it/s]
  0%|          | 0/1666 [00:00<?, ?it/s]

19 0.5471558037961947
ser un estudiante .
como comer estúpido .
mi madre .


100%|██████████| 1666/1666 [00:31<00:00, 52.83it/s]
  0%|          | 0/1666 [00:00<?, ?it/s]

20 0.53202794045031
un estudiante .
como comer pizza .
mi madre .


100%|██████████| 1666/1666 [00:31<00:00, 52.66it/s]
  0%|          | 0/1666 [00:00<?, ?it/s]

21 0.5172774225187187
ser un estudiante .
comer como él .
mi madre .


100%|██████████| 1666/1666 [00:30<00:00, 53.79it/s]
  0%|          | 0/1666 [00:00<?, ?it/s]

22 0.5038751446220483
ser estudiante .
comer como jackson .
mi madre .


100%|██████████| 1666/1666 [00:30<00:00, 54.26it/s]
  0%|          | 0/1666 [00:00<?, ?it/s]

23 0.4920305671418557
ser un estudiante .
comer como come .
mi madre .


100%|██████████| 1666/1666 [00:31<00:00, 53.44it/s]
  0%|          | 0/1666 [00:00<?, ?it/s]

24 0.47908452176460986
ser estudiante .
comer como come .
mi madre .


100%|██████████| 1666/1666 [00:31<00:00, 52.38it/s]
  0%|          | 0/1666 [00:00<?, ?it/s]

25 0.46872306137489955
.
comer como tú .
mi madre .


100%|██████████| 1666/1666 [00:32<00:00, 52.01it/s]
  0%|          | 0/1666 [00:00<?, ?it/s]

26 0.45927732143582417
.
comer como jackson .
mi madre .


100%|██████████| 1666/1666 [00:31<00:00, 52.29it/s]
  0%|          | 0/1666 [00:00<?, ?it/s]

27 0.44919021558217787
.
comer como el tonto .
mi madre .


100%|██████████| 1666/1666 [00:31<00:00, 53.22it/s]
  0%|          | 0/1666 [00:00<?, ?it/s]

28 0.4406930407484611
.
comer como un tonto .
mi madre .


100%|██████████| 1666/1666 [00:31<00:00, 53.70it/s]

29 0.4322027559755515
.
comer como un tonto .
mi madre .



