In [1]:
import os
from tqdm import tqdm_notebook, tqdm
import csv
import more_itertools as mit  # not built-in package
from collections import Counter
import torch
try:
    _tqdm = tqdm_notebook
except ImportError:
    _tqdm = tqdm
        

train_list = []
with open('snli_train.tsv', 'r') as trainfile:
    train_reader = csv.reader(trainfile, delimiter='\t')
    
    for row in _tqdm(train_reader):
        train_list.append(row)
        
train_list = train_list[1:]

valid_list = []
with open('snli_val.tsv', 'r') as valfile:
    val_reader = csv.reader(valfile, delimiter='\t')
    
    for row in _tqdm(val_reader):
        valid_list.append(row)
        
valid_list = valid_list[1:]


# making a vocabulary
all_tokens = list(mit.flatten([[ll.split() for ll in l[:2]] for l in train_list] + [[ll.split() for ll in l[:2]] for l in valid_list]))
all_tokens = list(mit.flatten(all_tokens))
all_tokens = [t.lower() for t in all_tokens]

counted_tokens = Counter(all_tokens)

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




In [2]:
max_vocab_size=None
PAD_IDX=0
UNK_IDX=1

In [3]:
train_list[0]

['A young girl in a pink shirt sitting on a dock viewing a body of water .',
 'A young girl watching the sunset over the water .',
 'neutral']

In [4]:
#  init fasttext weights
import io

def load_vectors(fname):
    fin = io.open(fname, 'r', encoding='utf-8', newline='\n', errors='ignore')
    n, d = map(int, fin.readline().split())
    data = {}
    for line in _tqdm(fin):
        tokens = line.rstrip().split(' ')
        data[tokens[0]] = map(float, tokens[1:])
    return data

words = load_vectors('./wiki-news-300d-1M.vec')
words_dict = {}

for w in _tqdm(words):
    words_dict[w] = torch.Tensor([c for c in words[w]])

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=0, max=999994), HTML(value='')))




In [5]:
import torch
from torch.utils.data import Dataset, DataLoader
from operator import itemgetter
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence


vocab, count = zip(*counted_tokens.most_common(max_vocab_size))
id2token = list(vocab)
token2id = dict(zip(vocab, range(2,2+len(vocab)))) 
id2token = ['<pad>', '<unk>'] + id2token
token2id['<pad>'] = PAD_IDX 
token2id['<unk>'] = UNK_IDX

def _text2id(doc):
    return [token2id[t] if t in token2id else UNK_IDX for t in doc]

def _id2text(vec):
    return [id2token[i] for i in vec]

class SnliDataset(Dataset):
    def __init__(self, data_list, device='cpu'):
        """
        data_list is a list of tuples: (x,y) where x is a list of ids and y is a label
        """
        labels = {
            'neutral': 0,
            'entailment': 1,
            'contradiction': 2
        }
        self.data_tensors = []
        for (t1, t2, label) in _tqdm(data_list):
            _t1 = torch.LongTensor(_text2id([w.lower() for w in t1.split()])).to(device)
            _t2 = torch.LongTensor(_text2id([w.lower() for w in t2.split()])).to(device)
            _t1_length = len(t1.split())
            _t2_length = len(t2.split())
            _label = torch.LongTensor([labels[label]]).to(device)
            self.data_tensors.append([_t1, _t1_length, _t2, _t2_length, _label])
              
    def __getitem__(self, key):
        (t1, t1_len, t2, t2_len, label) = self.data_tensors[key]
        
        return t1, t1_len, t2, t2_len, label

    def __len__(self):
        return len(self.data_tensors)

def pad(tensor, length, dim=0, pad=0):
    """Pad tensor to a specific length.
    :param tensor: vector to pad
    :param length: new length
    :param dim: (default 0) dimension to pad
    :returns: padded tensor if the tensor is shorter than length
    """
    if tensor.size(dim) < length:
        return torch.cat(
            [tensor, tensor.new(*tensor.size()[:dim],
                                length - tensor.size(dim),
                                *tensor.size()[dim + 1:]).fill_(pad)],
            dim=dim)
    else:
        return tensor

In [6]:
def batchify(batch):
    maxlen_t1 = max(batch, key = itemgetter(1))[1]
    maxlen_t2 = max(batch, key = itemgetter(3))[3]
    t1_batch_list = []
    t2_batch_list = []
    target_list = []
    for b in batch:
        t1_batch_list.append(pad(b[0], maxlen_t1, dim=0, pad=PAD_IDX))
        t2_batch_list.append(pad(b[2], maxlen_t2, dim=0, pad=PAD_IDX))

        target_list.append(b[4])
    
    lengths = torch.LongTensor([(b[1],b[3]) for b in batch]).to(b[0][0].device)
    
    lens_inds = torch.sort(lengths, 0, descending=True)
    
    t1_batch_list = torch.stack(t1_batch_list, 0).index_select(0, lens_inds[1][:,0])
    t2_batch_list = torch.stack(t2_batch_list, 0).index_select(0, lens_inds[1][:,1])

    #t1_input_batch = torch.stack(t1_batch_list, 0)
    #t2_input_batch = torch.stack(t2_batch_list, 0)

    target_batch = torch.stack(target_list, 0)
    
    return t1_batch_list, t2_batch_list, target_batch, lens_inds

In [7]:
snli_train_dataset = SnliDataset(train_list, device='cuda')
train_loader = DataLoader(snli_train_dataset, batch_size=512, collate_fn=batchify, shuffle=True)

snli_valid_dataset = SnliDataset(valid_list, device='cuda')
valid_loader = DataLoader(snli_valid_dataset, batch_size=512, collate_fn=batchify, shuffle=False)

HBox(children=(IntProgress(value=0, max=100000), HTML(value='')))




HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))




In [177]:
import torch.nn as nn
#  copy fasttext vectors into embeddings weights given the vocabulary

class RNNEncoder(nn.Module):
    def __init__(self, hidden_size, id2token, embedding_size=300, num_layers=1, classifier_hidden=512, dropout=0.0):
        super().__init__()
        self.embedding = self._copy_embedding_weights(words, id2token, embedding_size)
        self.encoder = nn.GRU(num_layers=num_layers, hidden_size=hidden_size, bias=True, input_size=embedding_size, batch_first=True,
                             bidirectional=True)
        
        self.classifier = nn.ModuleList([nn.Linear(4*hidden_size, classifier_hidden), nn.Tanh(), nn.Dropout(p=dropout), nn.Linear(classifier_hidden, 3)])
        

    def _copy_embedding_weights(self, words, id2token, embedding_size=300):
        embedding = nn.Embedding(embedding_dim=embedding_size, num_embeddings=len(id2token), padding_idx=0)
        cnt = 0
        for i,w in enumerate(id2token):
            if w in words:
                embedding.weight.data[i] = words_dict[w]
                cnt +=1 
        print('{} words copied into nn.Embedding'.format(cnt))
        return embedding
        
    def forward(self, t1, t2, lens_inds):
        t1_embedded = self.embedding(t1)
        t2_embedded = self.embedding(t2)
        t1_packed = pack_padded_sequence(t1_embedded, lens_inds[0][:,0], batch_first=True)
        t2_packed = pack_padded_sequence(t2_embedded, lens_inds[0][:,1], batch_first=True)
        
        t1_grued = self.encoder(t1_packed)
        t2_grued = self.encoder(t2_packed)
        t1_out = torch.cat(torch.unbind(t1_grued[1], 0), dim=-1)
        t2_out = torch.cat(torch.unbind(t2_grued[1], 0), dim=-1)
        #import ipdb; ipdb.set_trace()
        # need to reorder back firstly
        #assert len(sorted_indices[0]) == len(sorted_indices[1])
        t1_out = t1_out.index_select(dim=0, index=torch.sort(lens_inds[1][:,0])[1])
        t2_out = t2_out.index_select(dim=0, index=torch.sort(lens_inds[1][:,1])[1])
        
        classifier_input = torch.cat([t1_out, t2_out], dim=-1)

        x = classifier_input
        for l in self.classifier:
            x = l(x)
        
        class_out = x
        
        return class_out

In [9]:
encoder = RNNEncoder(512, id2token)
encoder = encoder.cuda()

19027 words copied into nn.Embedding


In [12]:
num_epochs=10

loss = nn.CrossEntropyLoss()
#optimizer = torch.optim.Adam(encoder.parameters(), amsgrad=True, lr=0.001)
optimizer = torch.optim.SGD(encoder.parameters(), momentum=0.99, nesterov=True, lr=0.001)

def _do_valid(encoder, valid_loader, mode='valid'):
    correct = 0
    processed = 0
    for i,(t1,t2,l,lens_inds) in enumerate(valid_loader):
        output = encoder(t1,t2,lens_inds)
        predicted = torch.argmax(output, 1)
        correct += (l.view(-1) == predicted.view(-1)).sum().item()
        processed += predicted.size(0)
    print('Acc {}: {}'.format(mode, correct/processed))
        
for e in range(num_epochs):
    encoder.train()
    for i,(t1,t2,l,lens_inds) in enumerate(_tqdm(train_loader)):
        optimizer.zero_grad()
        output = encoder(t1,t2,lens_inds)
        _loss = loss(output, l.view(-1))
        _loss.backward()
        #import ipdb; ipdb.set_trace()
        optimizer.step()
    print('Epoch {} loss: {}'.format(e, _loss.item()))
    _do_valid(encoder, train_loader, 'train')
    _do_valid(encoder, valid_loader)
    

HBox(children=(IntProgress(value=0, max=196), HTML(value='')))


Epoch 0 loss: 0.7707651257514954


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))


Acc valid: 0.65809


HBox(children=(IntProgress(value=0, max=2), HTML(value='')))


Acc valid: 0.636


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

KeyboardInterrupt: 

In [40]:
ipdb.pm()

NameError: name 'ipdb' is not defined

In [13]:
def batchify_withoutsort(batch):
    # make it for cnn
    maxlen_t1 = max(batch, key = itemgetter(1))[1]
    maxlen_t2 = max(batch, key = itemgetter(3))[3]
    t1_batch_list = []
    t2_batch_list = []
    target_list = []
    for b in batch:
        t1_batch_list.append(pad(b[0], maxlen_t1, dim=0, pad=PAD_IDX))
        t2_batch_list.append(pad(b[2], maxlen_t2, dim=0, pad=PAD_IDX))

        target_list.append(b[4])
    
#     lengths = torch.LongTensor([(b[1],b[3]) for b in batch]).to(b[0][0].device)
    
#     lens_inds = torch.sort(lengths, 0, descending=True)
    
#     t1_batch_list = torch.stack(t1_batch_list, 0).index_select(0, lens_inds[1][:,0])
#     t2_batch_list = torch.stack(t2_batch_list, 0).index_select(0, lens_inds[1][:,1])

    t1_input_batch = torch.stack(t1_batch_list, 0)
    t2_input_batch = torch.stack(t2_batch_list, 0)

    target_batch = torch.stack(target_list, 0)
    
    return t1_input_batch, t2_input_batch, target_batch

In [14]:
# no sorting in loader for CNN

snli_train_dataset = SnliDataset(train_list, device='cuda')
train_loader = DataLoader(snli_train_dataset, batch_size=512, collate_fn=batchify_withoutsort, shuffle=True)

snli_valid_dataset = SnliDataset(valid_list, device='cuda')
valid_loader = DataLoader(snli_valid_dataset, batch_size=512, collate_fn=batchify_withoutsort, shuffle=False)

HBox(children=(IntProgress(value=0, max=100000), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

In [115]:
import torch.nn.functional as F

class CNNEncoder(nn.Module):
    def __init__(self, hidden_size, id2token, embedding_size=300, num_layers=1, classifier_hidden=512, kernel_size=3, pool_kernel_size=2, dropout=0.5):
        super().__init__()
        self.embedding = self._copy_embedding_weights(words, id2token, embedding_size)
        self.embedding.weight.requires_grad=False
        self.encoder = nn.ModuleList([nn.Conv1d(in_channels=embedding_size, out_channels=hidden_size, kernel_size=kernel_size, stride=1), nn.MaxPool1d(stride=1, kernel_size=pool_kernel_size) , nn.ReLU(), nn.Conv1d(in_channels=hidden_size, out_channels=hidden_size, kernel_size=kernel_size, stride=1), nn.Dropout(p=dropout)])
        
        self.classifier = nn.ModuleList([nn.Linear(2*hidden_size, classifier_hidden), nn.Tanh(), nn.Dropout(p=dropout), nn.Linear(classifier_hidden, 3)])
        

    def _copy_embedding_weights(self, words, id2token, embedding_size=300):
        embedding = nn.Embedding(embedding_dim=embedding_size, num_embeddings=len(id2token), padding_idx=0)
        cnt = 0
        for i,w in enumerate(id2token):
            if w in words:
                embedding.weight.data[i] = words_dict[w]
                cnt +=1 
        print('{} words copied into nn.Embedding'.format(cnt))
        return embedding
    
    def _encode(self, x):
        for l in self.encoder:
            x = l(x)
            
        return x
    
    def _classify(self, x):
        for l in self.classifier:
            x = l(x)
            
        return x
        
    def forward(self, t1, t2):
        embedded = []
        embedded.append(self.embedding(t1))
        embedded.append(self.embedding(t2))
           
        cnn_out = []
        
        for i in range(2):
            _out = self._encode(embedded[i].permute(0,2,1))
            cnn_out.append(F.max_pool1d(_out , kernel_size=_out.size(-1)).squeeze(-1))
            #import ipdb; ipdb.set_trace()
            
        #import ipdb; ipdb.set_trace()

        classifier_input = torch.cat(cnn_out, dim=1)
        classifier_out = self._classify(classifier_input)
        
        
        return classifier_out

In [119]:
cnnencoder = CNNEncoder(128, id2token)
cnnencoder = cnnencoder.cuda()

19027 words copied into nn.Embedding


In [121]:
num_epochs=10

loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnnencoder.parameters(), amsgrad=True, lr=0.001)
#optimizer = torch.optim.SGD(cnnencoder.parameters(), momentum=0.99, nesterov=True, lr=0.001)

def _do_valid(cnnencoder, valid_loader, mode='valid'):
    cnnencoder.eval()
    correct = 0
    processed = 0
    for i,(t1,t2,target) in enumerate(valid_loader):
        output = cnnencoder(t1,t2)
        predicted = torch.argmax(output, 1)
        correct += (target.view(-1) == predicted.view(-1)).sum().item()
        processed += predicted.size(0)
        if i == 25 and mode == 'train':
            break
    print('Acc {}: {}'.format(mode, correct/processed))
        
for e in range(num_epochs):
    cnnencoder.train()
    for i,(t1,t2,l) in enumerate(_tqdm(train_loader)):
        optimizer.zero_grad()
        output = cnnencoder(t1,t2)
        _loss = loss(output, l.view(-1))
        _loss.backward()
        #import ipdb; ipdb.set_trace()
        optimizer.step()
    print('Epoch {} loss: {}'.format(e, _loss.item()))
    _do_valid(cnnencoder, train_loader, 'train')
    _do_valid(cnnencoder, valid_loader)

HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 0 loss: 0.5395132899284363
Acc train: 0.80859375
Acc valid: 0.654


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 1 loss: 0.45488959550857544
Acc train: 0.7980769230769231
Acc valid: 0.626


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 2 loss: 0.47037428617477417
Acc train: 0.8390174278846154
Acc valid: 0.623


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 3 loss: 0.5807957649230957
Acc train: 0.8417217548076923
Acc valid: 0.62


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 4 loss: 0.47192734479904175
Acc train: 0.8214393028846154
Acc valid: 0.618


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 5 loss: 0.4737427234649658
Acc train: 0.8553185096153846
Acc valid: 0.624


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 6 loss: 0.4501590132713318
Acc train: 0.8617788461538461
Acc valid: 0.623


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 7 loss: 0.46411752700805664
Acc train: 0.8542668269230769
Acc valid: 0.625


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 8 loss: 0.41724902391433716
Acc train: 0.8650090144230769
Acc valid: 0.618


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 9 loss: 0.4552728235721588
Acc train: 0.8704927884615384
Acc valid: 0.626


## CNN GRID SEARCH

In [None]:
snli_train_dataset = SnliDataset(train_list, device='cuda')
train_loader = DataLoader(snli_train_dataset, batch_size=512, collate_fn=batchify_withoutsort, shuffle=True)

snli_valid_dataset = SnliDataset(valid_list, device='cuda')
valid_loader = DataLoader(snli_valid_dataset, batch_size=512, collate_fn=batchify_withoutsort, shuffle=False)

In [144]:
cnn_grid_opts = {
    'dropout': [0.0,0.5],
    'cnn_hidden': [128,256],
    'kernel_size': [3,7],
}

def _do_valid(cnnencoder, valid_loader, metrics, mode='valid'):
    cnnencoder.eval()
    correct = 0
    processed = 0
    for i,(t1,t2,target) in enumerate(valid_loader):
        output = cnnencoder(t1,t2)
        predicted = torch.argmax(output, 1)
        correct += (target.view(-1) == predicted.view(-1)).sum().item()
        processed += predicted.size(0)
        if i == 25 and mode == 'train':
            break
    print('Acc {}: {}'.format(mode, correct/processed))
    metrics['{}_acc'.format(mode)].append(correct/processed)

def do_train(configuration, num_epochs=10):
    model = CNNEncoder(configuration['cnn_hidden'], id2token, classifier_hidden=512, dropout=configuration['dropout'], embedding_size=300)
    model = model.cuda()
    
    metrics = {
        'train_loss': [],
        'train_acc': [],
        'valid_acc': []
    }
    
    best_model = {'valid_acc': 0,
                  'configuration': configuration,
                  'model_dict': None
                 }
    
    loss = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), amsgrad=True, lr=0.001)
    
    for e in range(num_epochs):
        model.train()
        for i,(t1,t2,l) in enumerate(_tqdm(train_loader)):
            optimizer.zero_grad()
            output = model(t1,t2)
            _loss = loss(output, l.view(-1))
            _loss.backward()
            #import ipdb; ipdb.set_trace()
            optimizer.step()
        print('Epoch {} loss: {}'.format(e, _loss.item()))
        _do_valid(model, train_loader, metrics, 'train')
        _do_valid(model, valid_loader, metrics)
        metrics['train_loss'].append(_loss.item())
        if best_model['valid_acc'] < metrics['valid_acc'][-1]:
            print('Saving best model...')
            best_model['valid_acc'] = metrics['valid_acc'][-1]
            best_model['configuration'] = configuration
            best_model['model_dict'] = model.state_dict()
    
    return metrics, best_model

In [125]:
import itertools
indp = [[{key: value} for value in values] for key, values in cnn_grid_opts.items()]
product_options = list(itertools.product(*indp))
prod_dicts = []
for t in product_options:
    prod_dicts.append({ k: v for d in t for k, v in d.items() })

In [139]:
res = do_train(prod_dicts[0])

19027 words copied into nn.Embedding


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 0 loss: 0.7807811498641968
Acc train: 0.6596304086538461
Acc valid: 0.62
Saving best model...


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 1 loss: 0.8736655116081238
Acc train: 0.6889272836538461
Acc valid: 0.648
Saving best model...


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 2 loss: 0.7233630418777466
Acc train: 0.7062049278846154
Acc valid: 0.641


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 3 loss: 0.7288001179695129
Acc train: 0.7319711538461539
Acc valid: 0.637


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 4 loss: 0.6687719225883484
Acc train: 0.7635216346153846
Acc valid: 0.653
Saving best model...


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 5 loss: 0.5779821276664734
Acc train: 0.7872596153846154
Acc valid: 0.652


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 6 loss: 0.5587182641029358
Acc train: 0.8070162259615384
Acc valid: 0.649


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 7 loss: 0.5147265195846558
Acc train: 0.8248948317307693
Acc valid: 0.645


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 8 loss: 0.5286622643470764
Acc train: 0.8465294471153846
Acc valid: 0.635


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 9 loss: 0.4644191265106201
Acc train: 0.8635817307692307
Acc valid: 0.643


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 10 loss: 0.40260377526283264
Acc train: 0.8770282451923077
Acc valid: 0.643


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 11 loss: 0.44322094321250916
Acc train: 0.8918269230769231
Acc valid: 0.632


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 12 loss: 0.39769890904426575
Acc train: 0.9028695913461539
Acc valid: 0.615


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 13 loss: 0.3817237317562103
Acc train: 0.9112830528846154
Acc valid: 0.625


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 14 loss: 0.3540833294391632
Acc train: 0.91796875
Acc valid: 0.617


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 15 loss: 0.19627270102500916
Acc train: 0.9239032451923077
Acc valid: 0.626


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 16 loss: 0.3991050124168396
Acc train: 0.9333683894230769
Acc valid: 0.606


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 17 loss: 0.24662485718727112
Acc train: 0.9423076923076923
Acc valid: 0.608


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 18 loss: 0.17846353352069855
Acc train: 0.9407301682692307
Acc valid: 0.597


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 19 loss: 0.28216373920440674
Acc train: 0.9465144230769231
Acc valid: 0.628


In [141]:
results = [res]

In [142]:
for conf in prod_dicts[1:]:
    results.append(do_train(conf))

19027 words copied into nn.Embedding


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 0 loss: 0.7933014631271362
Acc train: 0.6432542067307693
Acc valid: 0.623
Saving best model...


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 1 loss: 0.7997827529907227
Acc train: 0.6869741586538461
Acc valid: 0.641
Saving best model...


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 2 loss: 0.8000414967536926
Acc train: 0.7205528846153846
Acc valid: 0.65
Saving best model...


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 3 loss: 0.8215698003768921
Acc train: 0.751953125
Acc valid: 0.643


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 4 loss: 0.6759891510009766
Acc train: 0.7670522836538461
Acc valid: 0.645


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 5 loss: 0.5690852403640747
Acc train: 0.7855318509615384
Acc valid: 0.632


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 6 loss: 0.6323990225791931
Acc train: 0.8085186298076923
Acc valid: 0.638


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 7 loss: 0.6064950823783875
Acc train: 0.8240685096153846
Acc valid: 0.628


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 8 loss: 0.527915358543396
Acc train: 0.8381911057692307
Acc valid: 0.642


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 9 loss: 0.5880738496780396
Acc train: 0.8495342548076923
Acc valid: 0.624


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 10 loss: 0.4770032465457916
Acc train: 0.8816105769230769
Acc valid: 0.608


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 11 loss: 0.4510144591331482
Acc train: 0.8889723557692307
Acc valid: 0.613


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 12 loss: 0.42074671387672424
Acc train: 0.9009164663461539
Acc valid: 0.611


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 13 loss: 0.3519744575023651
Acc train: 0.9118840144230769
Acc valid: 0.606


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 14 loss: 0.2856065034866333
Acc train: 0.9221754807692307
Acc valid: 0.627


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 15 loss: 0.35913974046707153
Acc train: 0.9228515625
Acc valid: 0.623


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 16 loss: 0.2685835063457489
Acc train: 0.93359375
Acc valid: 0.607


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 17 loss: 0.28331178426742554
Acc train: 0.9349459134615384
Acc valid: 0.608


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 18 loss: 0.26758092641830444
Acc train: 0.9388521634615384
Acc valid: 0.611


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 19 loss: 0.18307915329933167
Acc train: 0.9460637019230769
Acc valid: 0.61
19027 words copied into nn.Embedding


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 0 loss: 0.8561557531356812
Acc train: 0.6615835336538461
Acc valid: 0.639
Saving best model...


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 1 loss: 0.8240270614624023
Acc train: 0.6999699519230769
Acc valid: 0.637


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 2 loss: 0.5972060561180115
Acc train: 0.7291165865384616
Acc valid: 0.631


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 3 loss: 0.6287404298782349
Acc train: 0.7659254807692307
Acc valid: 0.639


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 4 loss: 0.7063338756561279
Acc train: 0.8004807692307693
Acc valid: 0.634


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 5 loss: 0.6033953428268433
Acc train: 0.8310546875
Acc valid: 0.63


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 6 loss: 0.4974757134914398
Acc train: 0.8544921875
Acc valid: 0.62


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 7 loss: 0.5006827712059021
Acc train: 0.8809344951923077
Acc valid: 0.612


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 8 loss: 0.46031469106674194
Acc train: 0.8987379807692307
Acc valid: 0.623


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 9 loss: 0.4269087314605713
Acc train: 0.9193960336538461
Acc valid: 0.61


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 10 loss: 0.31102508306503296
Acc train: 0.9309645432692307
Acc valid: 0.61


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 11 loss: 0.2656104564666748
Acc train: 0.9417067307692307
Acc valid: 0.6


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 12 loss: 0.29407888650894165
Acc train: 0.9425330528846154
Acc valid: 0.621


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 13 loss: 0.2700766324996948
Acc train: 0.9403545673076923
Acc valid: 0.591


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 14 loss: 0.20266786217689514
Acc train: 0.9598858173076923
Acc valid: 0.609


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 15 loss: 0.21694794297218323
Acc train: 0.9641676682692307
Acc valid: 0.612


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 16 loss: 0.2099033147096634
Acc train: 0.9668719951923077
Acc valid: 0.616


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 17 loss: 0.15579184889793396
Acc train: 0.9667217548076923
Acc valid: 0.608


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 18 loss: 0.08592008799314499
Acc train: 0.9752103365384616
Acc valid: 0.595


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 19 loss: 0.1017363891005516
Acc train: 0.9756610576923077
Acc valid: 0.609
19027 words copied into nn.Embedding


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 0 loss: 0.729027271270752
Acc train: 0.6646634615384616
Acc valid: 0.635
Saving best model...


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 1 loss: 0.7260875105857849
Acc train: 0.7073317307692307
Acc valid: 0.656
Saving best model...


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 2 loss: 0.6779547333717346
Acc train: 0.7359525240384616
Acc valid: 0.663
Saving best model...


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 3 loss: 0.6927205324172974
Acc train: 0.7674278846153846
Acc valid: 0.661


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 4 loss: 0.5629647970199585
Acc train: 0.7996544471153846
Acc valid: 0.669
Saving best model...


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 5 loss: 0.6461130380630493
Acc train: 0.837890625
Acc valid: 0.661


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 6 loss: 0.45474910736083984
Acc train: 0.8560697115384616
Acc valid: 0.645


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 7 loss: 0.4138783812522888
Acc train: 0.8771784855769231
Acc valid: 0.626


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 8 loss: 0.547460675239563
Acc train: 0.9033203125
Acc valid: 0.633


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 9 loss: 0.34896111488342285
Acc train: 0.9036959134615384
Acc valid: 0.599


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 10 loss: 0.3191346824169159
Acc train: 0.9299879807692307
Acc valid: 0.633


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 11 loss: 0.24994969367980957
Acc train: 0.9411808894230769
Acc valid: 0.622


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 12 loss: 0.24689428508281708
Acc train: 0.9562049278846154
Acc valid: 0.627


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 13 loss: 0.3516639471054077
Acc train: 0.9523737980769231
Acc valid: 0.603


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 14 loss: 0.2852811813354492
Acc train: 0.9507211538461539
Acc valid: 0.59


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 15 loss: 0.20948195457458496
Acc train: 0.9610126201923077
Acc valid: 0.624


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 16 loss: 0.1579096019268036
Acc train: 0.9710036057692307
Acc valid: 0.628


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 17 loss: 0.13218431174755096
Acc train: 0.9709284855769231
Acc valid: 0.628


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 18 loss: 0.15210440754890442
Acc train: 0.9751352163461539
Acc valid: 0.63


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 19 loss: 0.09668377786874771
Acc train: 0.9758112980769231
Acc valid: 0.612
19027 words copied into nn.Embedding


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 0 loss: 0.8484417200088501
Acc train: 0.6328125
Acc valid: 0.627
Saving best model...


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 1 loss: 0.7691774964332581
Acc train: 0.6585036057692307
Acc valid: 0.626


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 2 loss: 0.7275975942611694
Acc train: 0.6856971153846154
Acc valid: 0.63
Saving best model...


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 3 loss: 0.7245598435401917
Acc train: 0.7162710336538461
Acc valid: 0.641
Saving best model...


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 4 loss: 0.6566281318664551
Acc train: 0.7347506009615384
Acc valid: 0.651
Saving best model...


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 5 loss: 0.7106016278266907
Acc train: 0.7430889423076923
Acc valid: 0.649


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 6 loss: 0.6675001382827759
Acc train: 0.7615685096153846
Acc valid: 0.649


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 7 loss: 0.701291024684906
Acc train: 0.7623948317307693
Acc valid: 0.615


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 8 loss: 0.6217033863067627
Acc train: 0.7737379807692307
Acc valid: 0.647


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 9 loss: 0.5283071994781494
Acc train: 0.7848557692307693
Acc valid: 0.616


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 10 loss: 0.5442051291465759
Acc train: 0.7888371394230769
Acc valid: 0.629


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 11 loss: 0.5200611352920532
Acc train: 0.8064152644230769
Acc valid: 0.631


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 12 loss: 0.5364323854446411
Acc train: 0.8220402644230769
Acc valid: 0.639


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 13 loss: 0.5506794452667236
Acc train: 0.8343599759615384
Acc valid: 0.638


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 14 loss: 0.5227320790290833
Acc train: 0.8274489182692307
Acc valid: 0.628


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 15 loss: 0.5317319631576538
Acc train: 0.8568960336538461
Acc valid: 0.63


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 16 loss: 0.4416835904121399
Acc train: 0.8433743990384616
Acc valid: 0.621


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 17 loss: 0.433035671710968
Acc train: 0.8590745192307693
Acc valid: 0.626


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 18 loss: 0.42696014046669006
Acc train: 0.8755258413461539
Acc valid: 0.639


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 19 loss: 0.4062269628047943
Acc train: 0.8692157451923077
Acc valid: 0.625
19027 words copied into nn.Embedding


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

KeyboardInterrupt: 

In [145]:
for conf in prod_dicts[5:]:
    results.append(do_train(conf))

19027 words copied into nn.Embedding


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 0 loss: 0.8610424995422363
Acc train: 0.6358173076923077
Acc valid: 0.612
Saving best model...


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 1 loss: 0.7673264741897583
Acc train: 0.6776592548076923
Acc valid: 0.643
Saving best model...


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 2 loss: 0.7682939171791077
Acc train: 0.7014723557692307
Acc valid: 0.638


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 3 loss: 0.7528776526451111
Acc train: 0.7182241586538461
Acc valid: 0.643


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 4 loss: 0.7194159626960754
Acc train: 0.7408353365384616
Acc valid: 0.643


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 5 loss: 0.6146603226661682
Acc train: 0.7608173076923077
Acc valid: 0.642


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 6 loss: 0.5352078676223755
Acc train: 0.7594651442307693
Acc valid: 0.637


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 7 loss: 0.6307328939437866
Acc train: 0.7752403846153846
Acc valid: 0.639


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 8 loss: 0.6268765330314636
Acc train: 0.7918419471153846
Acc valid: 0.628


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 9 loss: 0.6341923475265503
Acc train: 0.8170072115384616
Acc valid: 0.636
19027 words copied into nn.Embedding


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 0 loss: 0.857288658618927
Acc train: 0.6440054086538461
Acc valid: 0.629
Saving best model...


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 1 loss: 0.827795684337616
Acc train: 0.6925330528846154
Acc valid: 0.654
Saving best model...


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 2 loss: 0.7040460109710693
Acc train: 0.7230318509615384
Acc valid: 0.646


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 3 loss: 0.7402437925338745
Acc train: 0.7549579326923077
Acc valid: 0.657
Saving best model...


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 4 loss: 0.8173075914382935
Acc train: 0.7491736778846154
Acc valid: 0.646


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 5 loss: 0.7022057175636292
Acc train: 0.7838792067307693
Acc valid: 0.638


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 6 loss: 0.5479670166969299
Acc train: 0.8013822115384616
Acc valid: 0.639


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 7 loss: 0.5596460700035095
Acc train: 0.8189603365384616
Acc valid: 0.64


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 8 loss: 0.4629267156124115
Acc train: 0.8363882211538461
Acc valid: 0.646


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 9 loss: 0.5313253998756409
Acc train: 0.8511117788461539
Acc valid: 0.628
19027 words copied into nn.Embedding


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 0 loss: 0.883948028087616
Acc train: 0.6101262019230769
Acc valid: 0.585
Saving best model...


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 1 loss: 0.7388103604316711
Acc train: 0.6566256009615384
Acc valid: 0.633
Saving best model...


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 2 loss: 0.7983345985412598
Acc train: 0.6740534855769231
Acc valid: 0.629


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 3 loss: 0.786353588104248
Acc train: 0.6768329326923077
Acc valid: 0.617


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 4 loss: 0.6526815891265869
Acc train: 0.7036508413461539
Acc valid: 0.625


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 5 loss: 0.5619288086891174
Acc train: 0.7575871394230769
Acc valid: 0.622


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 6 loss: 0.593924880027771
Acc train: 0.7437650240384616
Acc valid: 0.622


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 7 loss: 0.6109336614608765
Acc train: 0.8013070913461539
Acc valid: 0.625


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 8 loss: 0.6084578037261963
Acc train: 0.8349609375
Acc valid: 0.633


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 9 loss: 0.5897485017776489
Acc train: 0.8209134615384616
Acc valid: 0.608


In [148]:
import pickle
with open('cnn_grid.pkl', 'wb') as f: pickle.dump(results, f)

In [166]:
snli_train_dataset = SnliDataset(train_list, device='cuda')
train_loader = DataLoader(snli_train_dataset, batch_size=512, collate_fn=batchify, shuffle=True)

snli_valid_dataset = SnliDataset(valid_list, device='cuda')
valid_loader = DataLoader(snli_valid_dataset, batch_size=512, collate_fn=batchify, shuffle=False)

HBox(children=(IntProgress(value=0, max=100000), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

In [182]:
rnn_grid_opts = {
    'rnn_hidden': [256,512],
    'dropout': [0.0,0.5],
}

def _do_valid(rnnencoder, valid_loader, metrics, mode='valid'):
    rnnencoder.eval()
    correct = 0
    processed = 0
    for i,(t1,t2,target, lens) in enumerate(valid_loader):
        output = rnnencoder(t1,t2, lens)
        predicted = torch.argmax(output, 1)
        correct += (target.view(-1) == predicted.view(-1)).sum().item()
        processed += predicted.size(0)
        if i == 25 and mode == 'train':
            break
    print('Acc {}: {}'.format(mode, correct/processed))
    metrics['{}_acc'.format(mode)].append(correct/processed)

def do_train(configuration, num_epochs=10):
    model = RNNEncoder(configuration['rnn_hidden'], id2token, classifier_hidden=512, embedding_size=300, dropout=configuration['dropout'])
    model = model.cuda()
    
    metrics = {
        'train_loss': [],
        'train_acc': [],
        'valid_acc': []
    }
    
    best_model = {'valid_acc': 0,
                  'configuration': configuration,
                  'model_dict': None
                 }
    
    loss = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), amsgrad=True, lr=0.001)
    
    for e in range(num_epochs):
        model.train()
        for i,(t1,t2,l, lens) in enumerate(_tqdm(train_loader)):
            optimizer.zero_grad()
            output = model(t1,t2, lens)
            _loss = loss(output, l.view(-1))
            _loss.backward()
            #import ipdb; ipdb.set_trace()
            optimizer.step()
        print('Epoch {} loss: {}'.format(e, _loss.item()))
        _do_valid(model, train_loader, metrics, 'train')
        _do_valid(model, valid_loader, metrics)
        metrics['train_loss'].append(_loss.item())
        if best_model['valid_acc'] < metrics['valid_acc'][-1]:
            print('Saving best model...')
            best_model['valid_acc'] = metrics['valid_acc'][-1]
            best_model['configuration'] = configuration
            best_model['model_dict'] = model.state_dict()
    
    return metrics, best_model

In [183]:
indp = [[{key: value} for value in values] for key, values in rnn_grid_opts.items()]
product_options = list(itertools.product(*indp))
prod_dicts = []
for t in product_options:
    prod_dicts.append({ k: v for d in t for k, v in d.items() })

In [184]:
prod_dicts

[{'rnn_hidden': 256, 'dropout': 0.0},
 {'rnn_hidden': 256, 'dropout': 0.5},
 {'rnn_hidden': 512, 'dropout': 0.0},
 {'rnn_hidden': 512, 'dropout': 0.5}]

In [185]:
rnn_results = []
for conf in prod_dicts:
    rnn_results.append(do_train(conf))

19027 words copied into nn.Embedding


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 0 loss: 0.8464523553848267
Acc train: 0.6906550480769231
Acc valid: 0.645
Saving best model...


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 1 loss: 0.750110387802124
Acc train: 0.7265625
Acc valid: 0.634


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 2 loss: 0.7041967511177063
Acc train: 0.7620192307692307
Acc valid: 0.626


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 3 loss: 0.6543221473693848
Acc train: 0.7908653846153846
Acc valid: 0.635


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 4 loss: 0.5994908809661865
Acc train: 0.8185847355769231
Acc valid: 0.619


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 5 loss: 0.5007762908935547
Acc train: 0.8485576923076923
Acc valid: 0.609


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 6 loss: 0.4842374324798584
Acc train: 0.8716947115384616
Acc valid: 0.623


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 7 loss: 0.42139238119125366
Acc train: 0.8917518028846154
Acc valid: 0.622


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 8 loss: 0.3030606210231781
Acc train: 0.9012920673076923
Acc valid: 0.619


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 9 loss: 0.2844225764274597
Acc train: 0.9169921875
Acc valid: 0.621
19027 words copied into nn.Embedding


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 0 loss: 0.7422458529472351
Acc train: 0.6890024038461539
Acc valid: 0.644
Saving best model...


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 1 loss: 0.7326134443283081
Acc train: 0.7263371394230769
Acc valid: 0.642


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 2 loss: 0.8196423649787903
Acc train: 0.7560847355769231
Acc valid: 0.632


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 3 loss: 0.6865135431289673
Acc train: 0.7869591346153846
Acc valid: 0.632


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 4 loss: 0.5167494416236877
Acc train: 0.8163311298076923
Acc valid: 0.629


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 5 loss: 0.47295770049095154
Acc train: 0.8422475961538461
Acc valid: 0.617


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 6 loss: 0.5801465511322021
Acc train: 0.8586237980769231
Acc valid: 0.607


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 7 loss: 0.48218274116516113
Acc train: 0.8754507211538461
Acc valid: 0.619


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 8 loss: 0.3050581216812134
Acc train: 0.8922776442307693
Acc valid: 0.595


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 9 loss: 0.3421807289123535
Acc train: 0.9077524038461539
Acc valid: 0.614
19027 words copied into nn.Embedding


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 0 loss: 0.8284231424331665
Acc train: 0.6915564903846154
Acc valid: 0.641
Saving best model...


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 1 loss: 0.7108291387557983
Acc train: 0.7354266826923077
Acc valid: 0.646
Saving best model...


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 2 loss: 0.6935036778450012
Acc train: 0.7621694711538461
Acc valid: 0.623


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 3 loss: 0.5011296272277832
Acc train: 0.7990534855769231
Acc valid: 0.636


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 4 loss: 0.4562285542488098
Acc train: 0.8292518028846154
Acc valid: 0.624


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 5 loss: 0.5033654570579529
Acc train: 0.8643329326923077
Acc valid: 0.637


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 6 loss: 0.43989038467407227
Acc train: 0.8835637019230769
Acc valid: 0.637


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 7 loss: 0.35275501012802124
Acc train: 0.9094801682692307
Acc valid: 0.644


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 8 loss: 0.3634081482887268
Acc train: 0.9195462740384616
Acc valid: 0.626


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 9 loss: 0.2794835567474365
Acc train: 0.9359224759615384
Acc valid: 0.618
19027 words copied into nn.Embedding


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 0 loss: 0.7209060192108154
Acc train: 0.6904296875
Acc valid: 0.646
Saving best model...


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 1 loss: 0.7123340368270874
Acc train: 0.7193509615384616
Acc valid: 0.631


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 2 loss: 0.7272710800170898
Acc train: 0.7458683894230769
Acc valid: 0.63


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 3 loss: 0.6489281058311462
Acc train: 0.7909405048076923
Acc valid: 0.617


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 4 loss: 0.5065102577209473
Acc train: 0.8185847355769231
Acc valid: 0.635


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 5 loss: 0.5590348243713379
Acc train: 0.8466045673076923
Acc valid: 0.603


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 6 loss: 0.4096681475639343
Acc train: 0.8719951923076923
Acc valid: 0.621


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 7 loss: 0.37648552656173706
Acc train: 0.8941556490384616
Acc valid: 0.616


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 8 loss: 0.3689036965370178
Acc train: 0.9120342548076923
Acc valid: 0.617


HBox(children=(IntProgress(value=0, max=196), HTML(value='')))

Epoch 9 loss: 0.45309916138648987
Acc train: 0.9273587740384616
Acc valid: 0.609


In [186]:
len(rnn_results)

4