In [1]:
%load_ext autoreload
%autoreload 2

import sys
import time
import random
import torch

import numpy as np
import torch.nn as nn
import torch.optim as optim

from src.model import *
from src.load_data import load_data

from pathlib import Path
from torchtext import data
from torchtext import datasets
from torchtext.vocab import Vectors

In [2]:
SEED = 1234
MIN_FREQ = 1
BATCH_SIZE = 64

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [3]:
windows = [2, 5, 10]
vector_sizes = [0, 50, 100, 500]
languages = ['bxr']#['bxr', 'myv', 'kv']#['bxr', 'myv', 'kv']
methods = ['cbow', 'sg', 'glove']#[-1, 'cbow', 'sg', 'glove', 'pmi']

In [5]:
scores_d = {}
for language in languages:
    data_generator = load_data(language = language, SEED = SEED)
    
    scores_d[language] = {}
    for method in methods:
        if method not in scores_d[language]:
            scores_d[language][method] = {}
        
        for vector_size in vector_sizes:
            if vector_size not in scores_d[language][method]:
                scores_d[language][method][vector_size] = {}
                    
            for window in windows:            
                if window not in scores_d[language][method][vector_size]:
                    scores_d[language][method][vector_size][window] = []
                
                print("language:", language, "method:", method, "vector_size:", vector_size, "window:", window)
                if method != -1:
                    path = Path("./embeddings/"+language+"/"+str(vector_size)+"/"+str(window)+"/"+method)
                    if not path.is_file():
                        print("File", path, "doesn't exist")
                        continue
                    else:
                        print("loaded word embeddings", path)
                else:
                    if vector_size == 0:
                        print("skipping")
                        continue
                
                for NUM, TEXT, LEMMA, UD_TAGS, train_data, val_data in data_generator.get_fold_data():
                    
                    print("train_data", len(train_data.examples), "val_data", len(val_data.examples))
                    
                    if method == -1:
                        TEXT.build_vocab(train_data, min_freq = MIN_FREQ)
                        emb_size = vector_size
                    else:
                        !rm -rf /tmp/vec
                        _vectors = Vectors(name=path, cache='/tmp/vec')
                        TEXT.build_vocab(train_data,
                            min_freq = MIN_FREQ,
                            vectors = _vectors,
                            unk_init = torch.Tensor.normal_
                        )
                        emb_size = TEXT.vocab.vectors[1].shape[0]
                        
                        if vector_size !=0:
                            assert vector_size==TEXT.vocab.vectors[1].shape[0], "Different sizes"
                        
                    LEMMA.build_vocab(train_data)
                    NUM.build_vocab(train_data)
                    UD_TAGS.build_vocab(train_data)
                    
                    print(f"Unique tokens in TEXT vocabulary: {len(TEXT.vocab)}")
                    print(f"Unique tokens in UD_TAG vocabulary: {len(UD_TAGS.vocab)}")
                    
                    train_iterator, val_iterator = data.BucketIterator.splits(
                                                        (train_data, val_data),
                                                        sort=True,
                                                        sort_key=lambda x: len(x.text),
                                                        sort_within_batch=False,
                                                        batch_size=BATCH_SIZE, 
                                                        repeat=False,
                                                        shuffle=True,
                                                        device=device)
                    
                    pos_score = pos(TEXT, UD_TAGS, train_iterator, val_iterator, emb_size, epochs=30, verbose = False)
                    print("score:", pos_score)
                    scores_d[language][method][vector_size][window].append(pos_score)
                scores_d[language][method][vector_size][window] = np.average(scores_d[language][method][vector_size][window])

language: bxr method: cbow vector_size: 0 window: 2
File embeddings/bxr/0/2/cbow doesn't exist
language: bxr method: cbow vector_size: 0 window: 5
File embeddings/bxr/0/5/cbow doesn't exist
language: bxr method: cbow vector_size: 0 window: 10
File embeddings/bxr/0/10/cbow doesn't exist
language: bxr method: cbow vector_size: 50 window: 2
loaded word embeddings embeddings/bxr/50/2/cbow
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 47169.85it/s]


Unique tokens in TEXT vocabulary: 3655
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3655, 50])


  'precision', 'predicted', average, warn_for)


score: 0.2833191737042905
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 38369.66it/s]


Unique tokens in TEXT vocabulary: 3680
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3680, 50])
score: 0.28438961919610584
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 48752.93it/s]


Unique tokens in TEXT vocabulary: 3708
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3708, 50])
score: 0.2913016831122501
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 40909.09it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 50])
score: 0.2632733797655026
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 48853.91it/s]


Unique tokens in TEXT vocabulary: 3689
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3689, 50])
score: 0.22993895658977453
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 49800.05it/s]


Unique tokens in TEXT vocabulary: 3693
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3693, 50])
score: 0.25905695201402434
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 39878.46it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 50])
score: 0.306864594920094
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 50031.98it/s]


Unique tokens in TEXT vocabulary: 3672
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3672, 50])
score: 0.26191616709283455
train_data 773 val_data 85


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 50997.82it/s]


Unique tokens in TEXT vocabulary: 3666
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3666, 50])
score: 0.2829851557874693
train_data 773 val_data 85


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 50310.78it/s]


Unique tokens in TEXT vocabulary: 3692
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3692, 50])
score: 0.2816376095739704
language: bxr method: cbow vector_size: 50 window: 5
loaded word embeddings embeddings/bxr/50/5/cbow
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 39778.42it/s]


Unique tokens in TEXT vocabulary: 3655
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3655, 50])
score: 0.2732734854896194
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 50228.53it/s]


Unique tokens in TEXT vocabulary: 3680
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3680, 50])
score: 0.2790946647884159
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 50035.00it/s]


Unique tokens in TEXT vocabulary: 3708
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3708, 50])
score: 0.25028976542368475
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 37905.07it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 50])
score: 0.24936369053172774
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 43952.26it/s]


Unique tokens in TEXT vocabulary: 3689
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3689, 50])
score: 0.2050130753565893
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 51374.92it/s]


Unique tokens in TEXT vocabulary: 3693
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3693, 50])
score: 0.258521772066408
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 39783.16it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 50])
score: 0.29235277891160505
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 51021.62it/s]


Unique tokens in TEXT vocabulary: 3672
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3672, 50])
score: 0.25338046892533916
train_data 773 val_data 85


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 40300.49it/s]


Unique tokens in TEXT vocabulary: 3666
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3666, 50])
score: 0.24517033791507334
train_data 773 val_data 85


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 37530.62it/s]


Unique tokens in TEXT vocabulary: 3692
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3692, 50])
score: 0.26285637743858103
language: bxr method: cbow vector_size: 50 window: 10
loaded word embeddings embeddings/bxr/50/10/cbow
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 39629.40it/s]
  'recall', 'true', average, warn_for)


Unique tokens in TEXT vocabulary: 3655
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3655, 50])
score: 0.2931927766593742
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 40864.12it/s]


Unique tokens in TEXT vocabulary: 3680
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3680, 50])
score: 0.3186934678952964
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 50760.88it/s]


Unique tokens in TEXT vocabulary: 3708
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3708, 50])
score: 0.3297623905346947
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 41383.98it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 50])
score: 0.32225100434900217
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 40104.73it/s]


Unique tokens in TEXT vocabulary: 3689
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3689, 50])
score: 0.2682638959352373
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 50505.46it/s]


Unique tokens in TEXT vocabulary: 3693
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3693, 50])
score: 0.31007265128834627
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 50590.32it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 50])
score: 0.32642808954864405
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 35454.72it/s]


Unique tokens in TEXT vocabulary: 3672
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3672, 50])
score: 0.30066325300098684
train_data 773 val_data 85


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 47620.66it/s]


Unique tokens in TEXT vocabulary: 3666
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3666, 50])
score: 0.30187508719896344
train_data 773 val_data 85


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 39981.69it/s]


Unique tokens in TEXT vocabulary: 3692
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3692, 50])
score: 0.31953935222110175
language: bxr method: cbow vector_size: 100 window: 2
loaded word embeddings embeddings/bxr/100/2/cbow
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 28871.58it/s]


Unique tokens in TEXT vocabulary: 3655
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3655, 100])
score: 0.3077054950732317
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 28947.33it/s]


Unique tokens in TEXT vocabulary: 3680
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3680, 100])
score: 0.28146917862221976
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 29988.35it/s]


Unique tokens in TEXT vocabulary: 3708
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3708, 100])
score: 0.30893317385976815
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 28518.42it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 100])
score: 0.29361187983465076
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 30364.94it/s]


Unique tokens in TEXT vocabulary: 3689
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3689, 100])
score: 0.23811537408538896
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 29490.62it/s]


Unique tokens in TEXT vocabulary: 3693
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3693, 100])
score: 0.2736018449017047
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 33317.19it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 100])
score: 0.31810873979984877
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 28689.76it/s]


Unique tokens in TEXT vocabulary: 3672
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3672, 100])
score: 0.26195769726631607
train_data 773 val_data 85


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 30164.11it/s]


Unique tokens in TEXT vocabulary: 3666
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3666, 100])
score: 0.2910221293192062
train_data 773 val_data 85


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 33523.16it/s]


Unique tokens in TEXT vocabulary: 3692
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3692, 100])
score: 0.305674423749468
language: bxr method: cbow vector_size: 100 window: 5
loaded word embeddings embeddings/bxr/100/5/cbow
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 32271.34it/s]


Unique tokens in TEXT vocabulary: 3655
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3655, 100])
score: 0.28355606114951526
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 33767.36it/s]


Unique tokens in TEXT vocabulary: 3680
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3680, 100])
score: 0.2852147470897443
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 33787.79it/s]


Unique tokens in TEXT vocabulary: 3708
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3708, 100])
score: 0.28751696474177335
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 33286.57it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 100])
score: 0.28335011993022857
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 29595.41it/s]


Unique tokens in TEXT vocabulary: 3689
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3689, 100])
score: 0.24327307134289727
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 32981.28it/s]


Unique tokens in TEXT vocabulary: 3693
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3693, 100])
score: 0.2827759755066544
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 28988.29it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 100])
score: 0.30805505009005757
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 33600.92it/s]


Unique tokens in TEXT vocabulary: 3672
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3672, 100])
score: 0.25629628677195787
train_data 773 val_data 85


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 27649.65it/s]


Unique tokens in TEXT vocabulary: 3666
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3666, 100])
score: 0.28914052645362
train_data 773 val_data 85


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 31326.24it/s]


Unique tokens in TEXT vocabulary: 3692
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3692, 100])
score: 0.27161960957285286
language: bxr method: cbow vector_size: 100 window: 10
loaded word embeddings embeddings/bxr/100/10/cbow
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 31771.81it/s]


Unique tokens in TEXT vocabulary: 3655
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3655, 100])
score: 0.3037020642685057
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 33197.09it/s]


Unique tokens in TEXT vocabulary: 3680
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3680, 100])
score: 0.3248669464774619
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 28620.49it/s]


Unique tokens in TEXT vocabulary: 3708
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3708, 100])
score: 0.341478416296768
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 32439.37it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 100])
score: 0.32181703994532185
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 31160.28it/s]


Unique tokens in TEXT vocabulary: 3689
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3689, 100])
score: 0.25425067841871046
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 29941.71it/s]


Unique tokens in TEXT vocabulary: 3693
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3693, 100])
score: 0.3183990044017076
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 29172.76it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 100])
score: 0.34098580109512006
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 28785.55it/s]


Unique tokens in TEXT vocabulary: 3672
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3672, 100])
score: 0.2783643560008395
train_data 773 val_data 85


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 33300.34it/s]


Unique tokens in TEXT vocabulary: 3666
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3666, 100])
score: 0.3191415055186918
train_data 773 val_data 85


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 28953.07it/s]


Unique tokens in TEXT vocabulary: 3692
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3692, 100])
score: 0.30943232379550556
language: bxr method: cbow vector_size: 500 window: 2
loaded word embeddings embeddings/bxr/500/2/cbow
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 8959.31it/s]


Unique tokens in TEXT vocabulary: 3655
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3655, 500])
score: 0.33293299856528213
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9152.22it/s]


Unique tokens in TEXT vocabulary: 3680
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3680, 500])
score: 0.33778744726437054
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9227.81it/s]


Unique tokens in TEXT vocabulary: 3708
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3708, 500])
score: 0.3273990955492975
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9229.48it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 500])
score: 0.32598503691405745
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9140.88it/s]


Unique tokens in TEXT vocabulary: 3689
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3689, 500])
score: 0.25499601908462416
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9218.41it/s]


Unique tokens in TEXT vocabulary: 3693
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3693, 500])
score: 0.3078377808737541
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9220.15it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 500])
score: 0.33085995276807706
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9029.96it/s]


Unique tokens in TEXT vocabulary: 3672
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3672, 500])
score: 0.2986804642275747
train_data 773 val_data 85


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9170.70it/s]


Unique tokens in TEXT vocabulary: 3666
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3666, 500])
score: 0.3062491830858539
train_data 773 val_data 85


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 8616.52it/s]


Unique tokens in TEXT vocabulary: 3692
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3692, 500])
score: 0.3128134572129294
language: bxr method: cbow vector_size: 500 window: 5
loaded word embeddings embeddings/bxr/500/5/cbow
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9198.87it/s]


Unique tokens in TEXT vocabulary: 3655
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3655, 500])
score: 0.32923733129202554
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9170.45it/s]


Unique tokens in TEXT vocabulary: 3680
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3680, 500])
score: 0.34771177186591445
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9111.83it/s]


Unique tokens in TEXT vocabulary: 3708
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3708, 500])
score: 0.363523685502062
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 8981.74it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 500])
score: 0.32356576534548587
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9033.70it/s]


Unique tokens in TEXT vocabulary: 3689
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3689, 500])
score: 0.285282652719146
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9095.54it/s]


Unique tokens in TEXT vocabulary: 3693
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3693, 500])
score: 0.3131061188149138
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9022.16it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 500])
score: 0.3505934659172574
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9044.38it/s]


Unique tokens in TEXT vocabulary: 3672
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3672, 500])
score: 0.3160779175492237
train_data 773 val_data 85


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9261.13it/s]


Unique tokens in TEXT vocabulary: 3666
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3666, 500])
score: 0.3234584813403056
train_data 773 val_data 85


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9118.57it/s]


Unique tokens in TEXT vocabulary: 3692
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3692, 500])
score: 0.3137498174937965
language: bxr method: cbow vector_size: 500 window: 10
loaded word embeddings embeddings/bxr/500/10/cbow
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9192.01it/s]


Unique tokens in TEXT vocabulary: 3655
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3655, 500])
score: 0.3453691653850501
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9250.07it/s]


Unique tokens in TEXT vocabulary: 3680
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3680, 500])
score: 0.3566501716480172
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9250.64it/s]


Unique tokens in TEXT vocabulary: 3708
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3708, 500])
score: 0.3793432561318543
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9056.17it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 500])
score: 0.3445498021076846
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9128.59it/s]


Unique tokens in TEXT vocabulary: 3689
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3689, 500])
score: 0.2923549801805604
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9249.04it/s]


Unique tokens in TEXT vocabulary: 3693
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3693, 500])
score: 0.3417634834029727
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9199.10it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 500])
score: 0.3830347787266067
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9245.83it/s]


Unique tokens in TEXT vocabulary: 3672
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3672, 500])
score: 0.3526945237756219
train_data 773 val_data 85


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9086.66it/s]


Unique tokens in TEXT vocabulary: 3666
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3666, 500])
score: 0.3490290102116264
train_data 773 val_data 85


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9050.24it/s]


Unique tokens in TEXT vocabulary: 3692
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3692, 500])
score: 0.36155239209287743
language: bxr method: sg vector_size: 0 window: 2
File embeddings/bxr/0/2/sg doesn't exist
language: bxr method: sg vector_size: 0 window: 5
File embeddings/bxr/0/5/sg doesn't exist
language: bxr method: sg vector_size: 0 window: 10
File embeddings/bxr/0/10/sg doesn't exist
language: bxr method: sg vector_size: 50 window: 2
loaded word embeddings embeddings/bxr/50/2/sg
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 50086.52it/s]


Unique tokens in TEXT vocabulary: 3655
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3655, 50])
score: 0.3071406444607525
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 40056.14it/s]


Unique tokens in TEXT vocabulary: 3680
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3680, 50])
score: 0.300376940519673
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 44119.40it/s]


Unique tokens in TEXT vocabulary: 3708
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3708, 50])
score: 0.32020190662250714
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 49891.07it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 50])
score: 0.29104914364827755
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 49925.69it/s]


Unique tokens in TEXT vocabulary: 3689
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3689, 50])
score: 0.240105263640558
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 38712.06it/s]


Unique tokens in TEXT vocabulary: 3693
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3693, 50])
score: 0.2860203956636793
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 50218.03it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 50])
score: 0.33681617490540194
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 40868.73it/s]


Unique tokens in TEXT vocabulary: 3672
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3672, 50])
score: 0.2964810242505454
train_data 773 val_data 85


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 50450.61it/s]


Unique tokens in TEXT vocabulary: 3666
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3666, 50])
score: 0.3083826576497539
train_data 773 val_data 85


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 40899.71it/s]


Unique tokens in TEXT vocabulary: 3692
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3692, 50])
score: 0.28912120882904696
language: bxr method: sg vector_size: 50 window: 5
loaded word embeddings embeddings/bxr/50/5/sg
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 39781.25it/s]


Unique tokens in TEXT vocabulary: 3655
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3655, 50])
score: 0.30218976211612053
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 49402.17it/s]


Unique tokens in TEXT vocabulary: 3680
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3680, 50])
score: 0.3140923527998092
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 40192.97it/s]


Unique tokens in TEXT vocabulary: 3708
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3708, 50])
score: 0.31936631478782396
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 50551.49it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 50])
score: 0.3150339332929324
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 46175.85it/s]


Unique tokens in TEXT vocabulary: 3689
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3689, 50])
score: 0.23640369827044735
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 50396.07it/s]


Unique tokens in TEXT vocabulary: 3693
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3693, 50])
score: 0.2832605536340049
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 47920.95it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 50])
score: 0.34677525459834335
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 41449.49it/s]


Unique tokens in TEXT vocabulary: 3672
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3672, 50])
score: 0.27231497890031053
train_data 773 val_data 85


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 51007.54it/s]


Unique tokens in TEXT vocabulary: 3666
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3666, 50])
score: 0.28792211435149523
train_data 773 val_data 85


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 47939.64it/s]


Unique tokens in TEXT vocabulary: 3692
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3692, 50])
score: 0.2913653285579111
language: bxr method: sg vector_size: 50 window: 10
loaded word embeddings embeddings/bxr/50/10/sg
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 37150.05it/s]


Unique tokens in TEXT vocabulary: 3655
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3655, 50])
score: 0.2748850322729679
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 41201.16it/s]


Unique tokens in TEXT vocabulary: 3680
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3680, 50])
score: 0.2514211236613247
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 41310.23it/s]


Unique tokens in TEXT vocabulary: 3708
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3708, 50])
score: 0.2818832946593651
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 38890.77it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 50])
score: 0.30045206375274053
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 41731.66it/s]


Unique tokens in TEXT vocabulary: 3689
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3689, 50])
score: 0.2572016759610265
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 39708.80it/s]


Unique tokens in TEXT vocabulary: 3693
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3693, 50])
score: 0.27535715838378544
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 37792.80it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 50])
score: 0.3431085044762028
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 40410.17it/s]


Unique tokens in TEXT vocabulary: 3672
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3672, 50])
score: 0.2774692380700362
train_data 773 val_data 85


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 39192.94it/s]


Unique tokens in TEXT vocabulary: 3666
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3666, 50])
score: 0.28060318495680037
train_data 773 val_data 85


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 41387.91it/s]


Unique tokens in TEXT vocabulary: 3692
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3692, 50])
score: 0.287542112439633
language: bxr method: sg vector_size: 100 window: 2
loaded word embeddings embeddings/bxr/100/2/sg
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 28152.16it/s]


Unique tokens in TEXT vocabulary: 3655
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3655, 100])
score: 0.36152628684390975
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 25020.72it/s]


Unique tokens in TEXT vocabulary: 3680
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3680, 100])
score: 0.3411207687966412
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 28745.47it/s]


Unique tokens in TEXT vocabulary: 3708
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3708, 100])
score: 0.35397942007865874
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 30076.06it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 100])
score: 0.3038675123812153
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 30358.48it/s]


Unique tokens in TEXT vocabulary: 3689
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3689, 100])
score: 0.2704779787321039
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 33365.74it/s]


Unique tokens in TEXT vocabulary: 3693
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3693, 100])
score: 0.33000393297781627
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 28905.56it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 100])
score: 0.36975689138332674
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 32922.56it/s]


Unique tokens in TEXT vocabulary: 3672
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3672, 100])
score: 0.3354634186464959
train_data 773 val_data 85


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 28980.57it/s]


Unique tokens in TEXT vocabulary: 3666
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3666, 100])
score: 0.33294294252369405
train_data 773 val_data 85


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 33314.29it/s]


Unique tokens in TEXT vocabulary: 3692
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3692, 100])
score: 0.32272893595217483
language: bxr method: sg vector_size: 100 window: 5
loaded word embeddings embeddings/bxr/100/5/sg
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 33057.26it/s]


Unique tokens in TEXT vocabulary: 3655
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3655, 100])
score: 0.3863765097140561
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 31637.38it/s]


Unique tokens in TEXT vocabulary: 3680
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3680, 100])
score: 0.3833054805425772
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 27920.72it/s]


Unique tokens in TEXT vocabulary: 3708
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3708, 100])
score: 0.3728404326350229
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 28833.17it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 100])
score: 0.33413458269121393
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 32961.85it/s]


Unique tokens in TEXT vocabulary: 3689
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3689, 100])
score: 0.28855232798020725
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 32900.04it/s]


Unique tokens in TEXT vocabulary: 3693
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3693, 100])
score: 0.32700875936763757
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 29566.86it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 100])
score: 0.401214927945358
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 29146.08it/s]


Unique tokens in TEXT vocabulary: 3672
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3672, 100])
score: 0.36405970396648873
train_data 773 val_data 85


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 27521.96it/s]


Unique tokens in TEXT vocabulary: 3666
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3666, 100])
score: 0.3447074553902679
train_data 773 val_data 85


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 32311.73it/s]


Unique tokens in TEXT vocabulary: 3692
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3692, 100])
score: 0.36093654376094136
language: bxr method: sg vector_size: 100 window: 10
loaded word embeddings embeddings/bxr/100/10/sg
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 33281.87it/s]


Unique tokens in TEXT vocabulary: 3655
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3655, 100])
score: 0.3867628102344429
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 28517.70it/s]


Unique tokens in TEXT vocabulary: 3680
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3680, 100])
score: 0.38847569522158865
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 33501.22it/s]


Unique tokens in TEXT vocabulary: 3708
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3708, 100])
score: 0.37721861208345064
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 33149.15it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 100])
score: 0.3568393322385885
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 33395.07it/s]


Unique tokens in TEXT vocabulary: 3689
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3689, 100])
score: 0.2915421090689347
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 33051.98it/s]


Unique tokens in TEXT vocabulary: 3693
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3693, 100])
score: 0.3257697534579676
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 33315.76it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 100])
score: 0.3889370153352774
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 29481.75it/s]


Unique tokens in TEXT vocabulary: 3672
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3672, 100])
score: 0.3622111978668089
train_data 773 val_data 85


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 33331.28it/s]


Unique tokens in TEXT vocabulary: 3666
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3666, 100])
score: 0.36414935423536404
train_data 773 val_data 85


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 33466.69it/s]


Unique tokens in TEXT vocabulary: 3692
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3692, 100])
score: 0.3612562890247508
language: bxr method: sg vector_size: 500 window: 2
loaded word embeddings embeddings/bxr/500/2/sg
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9158.82it/s]


Unique tokens in TEXT vocabulary: 3655
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3655, 500])
score: 0.39765567551771414
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9242.05it/s]


Unique tokens in TEXT vocabulary: 3680
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3680, 500])
score: 0.42237689689743274
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9236.61it/s]


Unique tokens in TEXT vocabulary: 3708
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3708, 500])
score: 0.38906581963524567
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9141.55it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 500])
score: 0.39701793462680673
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9182.29it/s]


Unique tokens in TEXT vocabulary: 3689
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3689, 500])
score: 0.2969385861212023
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 8692.77it/s]


Unique tokens in TEXT vocabulary: 3693
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3693, 500])
score: 0.3637946596884024
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9072.17it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 500])
score: 0.42675381984083915
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 8946.14it/s]


Unique tokens in TEXT vocabulary: 3672
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3672, 500])
score: 0.38657762454606953
train_data 773 val_data 85


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 8809.36it/s]


Unique tokens in TEXT vocabulary: 3666
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3666, 500])
score: 0.3849639552015867
train_data 773 val_data 85


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 7279.11it/s]


Unique tokens in TEXT vocabulary: 3692
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3692, 500])
score: 0.385671084407469
language: bxr method: sg vector_size: 500 window: 5
loaded word embeddings embeddings/bxr/500/5/sg
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9181.73it/s]


Unique tokens in TEXT vocabulary: 3655
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3655, 500])
score: 0.428528957613188
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9217.21it/s]


Unique tokens in TEXT vocabulary: 3680
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3680, 500])
score: 0.426715377254266
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 8824.19it/s]


Unique tokens in TEXT vocabulary: 3708
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3708, 500])
score: 0.41260430620869076
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9158.77it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 500])
score: 0.3892605647676272
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9302.96it/s]


Unique tokens in TEXT vocabulary: 3689
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3689, 500])
score: 0.3297024177342678
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9213.42it/s]


Unique tokens in TEXT vocabulary: 3693
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3693, 500])
score: 0.365725579397631
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9310.67it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 500])
score: 0.4228090153042572
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 8959.99it/s]


Unique tokens in TEXT vocabulary: 3672
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3672, 500])
score: 0.4200324703845426
train_data 773 val_data 85


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 8879.68it/s]


Unique tokens in TEXT vocabulary: 3666
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3666, 500])
score: 0.39848422846358855
train_data 773 val_data 85


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9048.89it/s]


Unique tokens in TEXT vocabulary: 3692
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3692, 500])
score: 0.41953780451573053
language: bxr method: sg vector_size: 500 window: 10
loaded word embeddings embeddings/bxr/500/10/sg
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9122.61it/s]


Unique tokens in TEXT vocabulary: 3655
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3655, 500])
score: 0.42855763352938026
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9185.15it/s]


Unique tokens in TEXT vocabulary: 3680
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3680, 500])
score: 0.4537579167225739
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9106.74it/s]


Unique tokens in TEXT vocabulary: 3708
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3708, 500])
score: 0.4213476430532955
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9265.53it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 500])
score: 0.4148846049987135
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9254.56it/s]


Unique tokens in TEXT vocabulary: 3689
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3689, 500])
score: 0.3366954532738893
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9257.36it/s]


Unique tokens in TEXT vocabulary: 3693
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3693, 500])
score: 0.3753397306470047
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 8943.92it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 500])
score: 0.44576933414133296
train_data 772 val_data 86


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9054.27it/s]


Unique tokens in TEXT vocabulary: 3672
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3672, 500])
score: 0.4457846188049965
train_data 773 val_data 85


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9188.70it/s]


Unique tokens in TEXT vocabulary: 3666
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3666, 500])
score: 0.39512940510656824
train_data 773 val_data 85


  0%|          | 0/6124 [00:00<?, ?it/s]Skipping token b'6124' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6124/6124 [00:00<00:00, 9034.36it/s]


Unique tokens in TEXT vocabulary: 3692
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3692, 500])
score: 0.4259300897572642
language: bxr method: glove vector_size: 0 window: 2
File embeddings/bxr/0/2/glove doesn't exist
language: bxr method: glove vector_size: 0 window: 5
File embeddings/bxr/0/5/glove doesn't exist
language: bxr method: glove vector_size: 0 window: 10
File embeddings/bxr/0/10/glove doesn't exist
language: bxr method: glove vector_size: 50 window: 2
loaded word embeddings embeddings/bxr/50/2/glove
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 51821.95it/s]


Unique tokens in TEXT vocabulary: 3655
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3655, 50])
score: 0.35588754299671194
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 39741.40it/s]


Unique tokens in TEXT vocabulary: 3680
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3680, 50])
score: 0.3922738782163197
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 51368.61it/s]


Unique tokens in TEXT vocabulary: 3708
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3708, 50])
score: 0.36906554025357974
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 41345.37it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 50])
score: 0.35383548868943204
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 51651.81it/s]


Unique tokens in TEXT vocabulary: 3689
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3689, 50])
score: 0.26383099623420025
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 41544.01it/s]


Unique tokens in TEXT vocabulary: 3693
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3693, 50])
score: 0.332279481636825
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 48222.97it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 50])
score: 0.38511372578573916
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 37628.73it/s]


Unique tokens in TEXT vocabulary: 3672
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3672, 50])
score: 0.3735535994704577
train_data 773 val_data 85


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 37806.76it/s]


Unique tokens in TEXT vocabulary: 3666
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3666, 50])
score: 0.31488209187766
train_data 773 val_data 85


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 50401.63it/s]


Unique tokens in TEXT vocabulary: 3692
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3692, 50])
score: 0.3452617978137198
language: bxr method: glove vector_size: 50 window: 5
loaded word embeddings embeddings/bxr/50/5/glove
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 39853.85it/s]


Unique tokens in TEXT vocabulary: 3655
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3655, 50])
score: 0.33732736157587806
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 43271.49it/s]


Unique tokens in TEXT vocabulary: 3680
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3680, 50])
score: 0.37213080809069266
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 47038.31it/s]


Unique tokens in TEXT vocabulary: 3708
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3708, 50])
score: 0.3742937491546326
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 41212.65it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 50])
score: 0.3499230640050505
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 40004.29it/s]


Unique tokens in TEXT vocabulary: 3689
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3689, 50])
score: 0.2849037674324607
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 39102.15it/s]


Unique tokens in TEXT vocabulary: 3693
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3693, 50])
score: 0.33991917900403223
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 41759.91it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 50])
score: 0.3736284581385343
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 39090.25it/s]


Unique tokens in TEXT vocabulary: 3672
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3672, 50])
score: 0.33446516511090124
train_data 773 val_data 85


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 39323.00it/s]


Unique tokens in TEXT vocabulary: 3666
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3666, 50])
score: 0.3219701123604072
train_data 773 val_data 85


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 40230.31it/s]


Unique tokens in TEXT vocabulary: 3692
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3692, 50])
score: 0.37063589843551675
language: bxr method: glove vector_size: 50 window: 10
loaded word embeddings embeddings/bxr/50/10/glove
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 39759.73it/s]


Unique tokens in TEXT vocabulary: 3655
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3655, 50])
score: 0.3741457642727528
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 39813.71it/s]


Unique tokens in TEXT vocabulary: 3680
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3680, 50])
score: 0.3829371857435996
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 40414.74it/s]


Unique tokens in TEXT vocabulary: 3708
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3708, 50])
score: 0.38658792654044005
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 42031.15it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 50])
score: 0.32626312776556765
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 41747.36it/s]


Unique tokens in TEXT vocabulary: 3689
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3689, 50])
score: 0.28789364160072445
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 41489.33it/s]


Unique tokens in TEXT vocabulary: 3693
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3693, 50])
score: 0.3232024254602106
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 51293.95it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 50])
score: 0.3884947454020649
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 39631.72it/s]


Unique tokens in TEXT vocabulary: 3672
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3672, 50])
score: 0.37868589322584395
train_data 773 val_data 85


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 44082.01it/s]


Unique tokens in TEXT vocabulary: 3666
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3666, 50])
score: 0.3271131642499598
train_data 773 val_data 85


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'50']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 42534.07it/s]


Unique tokens in TEXT vocabulary: 3692
Unique tokens in UD_TAG vocabulary: 17
The model has 94,353 trainable parameters
load pretrained embs
embeddings torch.Size([3692, 50])
score: 0.360784155401023
language: bxr method: glove vector_size: 100 window: 2
loaded word embeddings embeddings/bxr/100/2/glove
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 24924.97it/s]


Unique tokens in TEXT vocabulary: 3655
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3655, 100])
score: 0.39467311311684405
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 33311.61it/s]


Unique tokens in TEXT vocabulary: 3680
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3680, 100])
score: 0.43935384108864356
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 28638.66it/s]


Unique tokens in TEXT vocabulary: 3708
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3708, 100])
score: 0.40602478554390625
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 29709.58it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 100])
score: 0.37745706161524406
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 29609.08it/s]


Unique tokens in TEXT vocabulary: 3689
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3689, 100])
score: 0.2863031882308972
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 34121.32it/s]


Unique tokens in TEXT vocabulary: 3693
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3693, 100])
score: 0.35627774850934446
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 28730.74it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 100])
score: 0.4120968064622683
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 28456.10it/s]


Unique tokens in TEXT vocabulary: 3672
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3672, 100])
score: 0.3845648283198831
train_data 773 val_data 85


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 29943.53it/s]


Unique tokens in TEXT vocabulary: 3666
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3666, 100])
score: 0.3465132758167744
train_data 773 val_data 85


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 29528.29it/s]


Unique tokens in TEXT vocabulary: 3692
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3692, 100])
score: 0.40480162966439354
language: bxr method: glove vector_size: 100 window: 5
loaded word embeddings embeddings/bxr/100/5/glove
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 30461.84it/s]


Unique tokens in TEXT vocabulary: 3655
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3655, 100])
score: 0.398125958322964
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 28875.77it/s]


Unique tokens in TEXT vocabulary: 3680
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3680, 100])
score: 0.4280305347673376
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 29750.07it/s]


Unique tokens in TEXT vocabulary: 3708
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3708, 100])
score: 0.401660949660548
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 33449.36it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 100])
score: 0.3824388056973559
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 30439.50it/s]


Unique tokens in TEXT vocabulary: 3689
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3689, 100])
score: 0.31123207957008364
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 30169.00it/s]


Unique tokens in TEXT vocabulary: 3693
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3693, 100])
score: 0.35582832716606183
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 30464.44it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 100])
score: 0.42983448888126485
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 29239.13it/s]


Unique tokens in TEXT vocabulary: 3672
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3672, 100])
score: 0.4093634772094628
train_data 773 val_data 85


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 29289.93it/s]


Unique tokens in TEXT vocabulary: 3666
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3666, 100])
score: 0.3779856703701232
train_data 773 val_data 85


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 29534.13it/s]


Unique tokens in TEXT vocabulary: 3692
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3692, 100])
score: 0.39678587992295417
language: bxr method: glove vector_size: 100 window: 10
loaded word embeddings embeddings/bxr/100/10/glove
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 31655.14it/s]


Unique tokens in TEXT vocabulary: 3655
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3655, 100])
score: 0.42654367791184117
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 27590.46it/s]


Unique tokens in TEXT vocabulary: 3680
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3680, 100])
score: 0.45965372949440453
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 28945.76it/s]


Unique tokens in TEXT vocabulary: 3708
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3708, 100])
score: 0.4297885739272657
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 34457.42it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 100])
score: 0.4006019860774009
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 31429.06it/s]


Unique tokens in TEXT vocabulary: 3689
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3689, 100])
score: 0.32865185938497904
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 34033.49it/s]


Unique tokens in TEXT vocabulary: 3693
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3693, 100])
score: 0.37814209162601914
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 30334.33it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 100])
score: 0.44880596236604564
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 23745.34it/s]


Unique tokens in TEXT vocabulary: 3672
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3672, 100])
score: 0.4201931995024814
train_data 773 val_data 85


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 28947.94it/s]


Unique tokens in TEXT vocabulary: 3666
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3666, 100])
score: 0.3695438022337204
train_data 773 val_data 85


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 32472.80it/s]


Unique tokens in TEXT vocabulary: 3692
Unique tokens in UD_TAG vocabulary: 17
The model has 119,953 trainable parameters
load pretrained embs
embeddings torch.Size([3692, 100])
score: 0.429049503153179
language: bxr method: glove vector_size: 500 window: 2
loaded word embeddings embeddings/bxr/500/2/glove
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 9076.39it/s]


Unique tokens in TEXT vocabulary: 3655
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3655, 500])
score: 0.4151134632147946
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 9534.72it/s]


Unique tokens in TEXT vocabulary: 3680
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3680, 500])
score: 0.44386896694841355
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 9263.77it/s]


Unique tokens in TEXT vocabulary: 3708
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3708, 500])
score: 0.43020732215704555
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 9572.34it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 500])
score: 0.3790876283198778
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 9654.20it/s]


Unique tokens in TEXT vocabulary: 3689
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3689, 500])
score: 0.31085392553731894
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 9603.85it/s]


Unique tokens in TEXT vocabulary: 3693
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3693, 500])
score: 0.371953911826412
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 9614.15it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 500])
score: 0.43513848683123685
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 9555.06it/s]


Unique tokens in TEXT vocabulary: 3672
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3672, 500])
score: 0.42030319819656015
train_data 773 val_data 85


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 9686.99it/s]


Unique tokens in TEXT vocabulary: 3666
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3666, 500])
score: 0.3939002804888072
train_data 773 val_data 85


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 9473.75it/s]


Unique tokens in TEXT vocabulary: 3692
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3692, 500])
score: 0.43835431913424566
language: bxr method: glove vector_size: 500 window: 5
loaded word embeddings embeddings/bxr/500/5/glove
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 9417.67it/s]


Unique tokens in TEXT vocabulary: 3655
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3655, 500])
score: 0.4395245744902957
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 9707.59it/s]


Unique tokens in TEXT vocabulary: 3680
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3680, 500])
score: 0.4843514090730584
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 9341.14it/s]


Unique tokens in TEXT vocabulary: 3708
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3708, 500])
score: 0.450459999178191
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 9592.56it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 500])
score: 0.4413792149774312
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 9249.78it/s]


Unique tokens in TEXT vocabulary: 3689
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3689, 500])
score: 0.34458573452116936
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 9587.72it/s]


Unique tokens in TEXT vocabulary: 3693
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3693, 500])
score: 0.4683480826656789
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 9286.66it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 500])
score: 0.4774109382969414
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 9484.12it/s]


Unique tokens in TEXT vocabulary: 3672
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3672, 500])
score: 0.5244551818721528
train_data 773 val_data 85


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 9357.34it/s]


Unique tokens in TEXT vocabulary: 3666
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3666, 500])
score: 0.41561213801836705
train_data 773 val_data 85


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 9179.28it/s]


Unique tokens in TEXT vocabulary: 3692
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3692, 500])
score: 0.4562269882490942
language: bxr method: glove vector_size: 500 window: 10
loaded word embeddings embeddings/bxr/500/10/glove
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 9450.96it/s]


Unique tokens in TEXT vocabulary: 3655
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3655, 500])
score: 0.4892389889006683
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 9205.83it/s]


Unique tokens in TEXT vocabulary: 3680
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3680, 500])
score: 0.5385702594620597
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 9390.25it/s]


Unique tokens in TEXT vocabulary: 3708
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3708, 500])
score: 0.5209211267207198
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 9184.42it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 500])
score: 0.49399606343605834
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 9251.82it/s]


Unique tokens in TEXT vocabulary: 3689
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3689, 500])
score: 0.3577540313814306
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 9205.03it/s]


Unique tokens in TEXT vocabulary: 3693
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3693, 500])
score: 0.470886637948992
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 7211.44it/s]


Unique tokens in TEXT vocabulary: 3660
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3660, 500])
score: 0.5515792103498847
train_data 772 val_data 86


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 9531.02it/s]


Unique tokens in TEXT vocabulary: 3672
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3672, 500])
score: 0.5257485161162305
train_data 773 val_data 85


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 9452.47it/s]


Unique tokens in TEXT vocabulary: 3666
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3666, 500])
score: 0.46979363020727116
train_data 773 val_data 85


  0%|          | 0/6125 [00:00<?, ?it/s]Skipping token b'6125' with 1-dimensional vector [b'500']; likely a header
100%|██████████| 6125/6125 [00:00<00:00, 9551.24it/s]


Unique tokens in TEXT vocabulary: 3692
Unique tokens in UD_TAG vocabulary: 17
The model has 324,753 trainable parameters
load pretrained embs
embeddings torch.Size([3692, 500])
score: 0.472614666542615


In [153]:
count = 0
for idx, vec in enumerate(TEXT.vocab.vectors):
    if vec[0] == 0 and not TEXT.vocab.itos[idx].isdigit():
#         print(TEXT.vocab.itos[idx])
        count += 1

print('словарь', len(TEXT.vocab.vectors))
print('не найдено', count)

словарь 3655
не найдено 1329


In [236]:
TEXT.vocab.stoi[TEXT.pad_token]

1

In [4]:
def pos(TEXT, UD_TAGS, train_iterator, val_iterator, emb_size, epochs=10, verbose = True):
      
    INPUT_DIM = len(TEXT.vocab)
    EMBEDDING_DIM = emb_size
    HIDDEN_DIM = 128
    OUTPUT_DIM = len(UD_TAGS.vocab)
    N_LAYERS = 1
    BIDIRECTIONAL = False
    DROPOUT = 0.25
    PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

    model = BiLSTMPOSTagger(INPUT_DIM, 
                        EMBEDDING_DIM, 
                        HIDDEN_DIM, 
                        OUTPUT_DIM, 
                        N_LAYERS, 
                        BIDIRECTIONAL, 
                        DROPOUT,
                        PAD_IDX)
    
    model.apply(init_weights)
    print(f'The model has {count_parameters(model):,} trainable parameters')
    
    if TEXT.vocab.vectors is not None:
        print("load pretrained embs")
        pretrained_embeddings = TEXT.vocab.vectors
        print('embeddings', pretrained_embeddings.shape)
    
        model.embedding.weight.data.copy_(pretrained_embeddings)
        model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)
    else:
        print("no loading any pretrained embs")
    
    optimizer = optim.Adam(model.parameters())
    TAG_PAD_IDX = UD_TAGS.vocab.stoi[UD_TAGS.pad_token]
    criterion = nn.CrossEntropyLoss(ignore_index = TAG_PAD_IDX)
    
    model = model.to(device)
    criterion = criterion.to(device)
    
    for epoch in range(epochs):

        start_time = time.time()
    
        train_loss = train(model, train_iterator, optimizer, criterion, TAG_PAD_IDX)
        valid_loss, valid_acc = evaluate(model, val_iterator, criterion, TAG_PAD_IDX)
    
        end_time = time.time()
        epoch_mins, epoch_secs = epoch_time(start_time, end_time)
            
        if verbose:
            print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
            print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')

    test_loss, test_acc = evaluate(model, val_iterator, criterion, TAG_PAD_IDX)
    return test_acc

In [7]:
from src.eval import tabular
tabular(scores_d, 'bxr')

0
& 50  &  0.132 & 0.132 & 0.132 \\
& 100  &  0.205 & 0.205 & 0.205 \\
& 500  &  0.484 & 0.484 & 0.484 \\
cbow
& 50  &  0.299 & 0.298 & 0.322 \\
& 100  &  0.307 & 0.314 & 0.329 \\
& 500  &  0.332 & 0.347 & 0.362 \\
sg
& 50  &  0.325 & 0.32 & 0.327 \\
& 100  &  0.349 & 0.369 & 0.378 \\
& 500  &  0.398 & 0.412 & 0.424 \\
glove
& 50  &  0.361 & 0.362 & 0.371 \\
& 100  &  0.389 & 0.396 & 0.419 \\
& 500  &  0.411 & 0.482 & 0.49 \\
pmi
& 50  &  0.402 & 0.417 & 0.467 \\
& 100  &  0.491 & 0.502 & 0.515 \\
& 500  &  0.56 & 0.564 & 0.569 \\




True

In [None]:
0
& 50  &  0.132 & 0.132 & 0.132 \\
& 100  &  0.205 & 0.205 & 0.205 \\
& 500  &  0.484 & 0.484 & 0.484 \\
cbow
& 50  &  0.299 & 0.298 & 0.322 \\
& 100  &  0.307 & 0.314 & 0.329 \\
& 500  &  0.332 & 0.347 & 0.362 \\
sg
& 50  &  0.325 & 0.32 & 0.327 \\
& 100  &  0.349 & 0.369 & 0.378 \\
& 500  &  0.398 & 0.412 & 0.424 \\
glove
& 50  &  0.361 & 0.362 & 0.371 \\
& 100  &  0.389 & 0.396 & 0.419 \\
& 500  &  0.411 & 0.482 & 0.49 \\
pmi
& 50  &  0.402 & 0.417 & 0.467 \\
& 100  &  0.491 & 0.502 & 0.515 \\
& 500  &  0.56 & 0.564 & 0.569 \\

In [8]:
from src.eval import tabular
tabular(scores_d, 'myv')

0
& 50  &  0.268 & 0.268 & 0.268 \\
& 100  &  0.353 & 0.353 & 0.353 \\
& 500  &  0.535 & 0.535 & 0.535 \\
cbow
& 50  &  0.351 & 0.368 & 0.386 \\
& 100  &  0.367 & 0.382 & 0.407 \\
& 500  &  0.388 & 0.396 & 0.429 \\
sg
& 50  &  0.392 & 0.388 & 0.377 \\
& 100  &  0.413 & 0.41 & 0.423 \\
& 500  &  0.435 & 0.442 & 0.455 \\
glove
& 50  &  0.415 & 0.426 & 0.425 \\
& 100  &  0.444 & 0.469 & 0.465 \\
& 500  &  0.472 & 0.491 & 0.508 \\
pmi
& 50  &  0.429 & 0.473 & 0.458 \\
& 100  &  0.479 & 0.501 & 0.502 \\
& 500  &  0.539 & 0.541 & 0.55 \\




True

In [9]:
from src.eval import tabular

tabular(scores_d, 'kv')

-1




TypeError: type list doesn't define __round__ method

In [6]:
import pprint
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(scores_d)

{   'bxr': {   'cbow': {   0: {2: [], 5: [], 10: []},
                           50: {   2: 0.2744683291756316,
                                   5: 0.25693164168470434,
                                   10: 0.30907419686316473},
                           100: {   2: 0.28801999365118036,
                                    5: 0.27907984126493013,
                                    10: 0.31124381362186326},
                           500: {   2: 0.31355414355458205,
                                    5: 0.3266307007840131,
                                    10: 0.35063415636628714}},
               'glove': {   0: {2: [], 5: [], 10: []},
                            50: {   2: 0.3485984142974646,
                                    5: 0.3459197563308106,
                                    10: 0.35361080296621866},
                            100: {   2: 0.3808066278368199,
                                     5: 0.38912861715681557,
                                     10: 0.40909

In [13]:
import json
with open('extrinsic.json', 'w', encoding='utf-8') as f:
    json.dump(scores_d, f, sort_keys=False, indent=4, ensure_ascii=False)