# Наумкин Владимир, С01-119.

## Задание 2. Анализ модели LSTM.

### Подключим библиотеки

In [1]:
from tqdm.notebook import tqdm
import numpy as np
import torch
from torch.utils.data import Dataset
from torch.utils.tensorboard import SummaryWriter
from prettytable import PrettyTable
from nerus import load_nerus

### Уберём предупреждения

In [2]:
import warnings
warnings.filterwarnings("ignore")

### Зададим устройство исполнения кода (вычисления провожу на своём ПК)

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

### Функции для работы с датасетом

In [4]:
def get_sent_tags(docs, size=10000):
    list_of_sent = []
    list_of_tags = []
    for doc in tqdm(docs):
        for sent in doc.sents:
            list_of_sent_toks = []
            list_of_sent_tags = []
            for tok in sent.tokens:
                list_of_sent_toks.append(tok.text)
                list_of_sent_tags.append(tok.pos)
        list_of_sent.append(list_of_sent_toks)
        list_of_tags.append(list_of_sent_tags)
        if len(list_of_sent) > size:
            break
    return list_of_sent, list_of_tags

In [5]:
def pos_dict(list_of_tags, test_size=100):
    pos2idx = {'<PAD>' : 0}
    idx2pos = ['<PAD>']
    for tags in list_of_tags[:-test_size]:
        for word in tags:
            if word not in pos2idx:
                pos2idx[word] = len(idx2pos)
                idx2pos.append(word)
    return pos2idx, idx2pos

In [6]:
def word_dict(list_of_tags, test_size=100):
    word2idx = {'<PAD>': 0, '<UNK>': 1}
    idx2word = ['<PAD>', '<UNK>']
    for sent in list_of_sent[:-test_size]:
        for word in sent:
            if word not in word2idx:
                word2idx[word] = len(idx2word)
                idx2word.append(word)
    return word2idx, idx2word

In [7]:
class NerusDataset(Dataset):
    def __init__(self, list_of_sent, list_of_tags, word2idx, pos2idx, train=True, test_size=100):
        self.X = []
        self.y = []
        if train:
            for sent in list_of_sent[:-test_size]:
                data = []
                for word in sent:
                    data.append(word2idx.get(word, 0))
                self.X.append(data)
            for tags in list_of_tags[:-test_size]:
                data = []
                for word in tags:
                    data.append(pos2idx.get(word, 0))
                self.y.append(data)
        else:
            for sent in list_of_sent[-test_size:]:
                data = []
                for word in sent:
                    data.append(word2idx.get(word, 0))
                self.X.append(data)
            for tags in list_of_tags[-test_size:]:
                data = []
                for word in tags:
                    data.append(pos2idx.get(word, 0))
                self.y.append(data)
    def __len__(self):
        return len(self.y)
    def __getitem__(self, idx):
        return torch.Tensor(self.X[idx]), torch.Tensor(self.y[idx])

In [8]:
def collate_fn(data):
    X, Y = [], []
    for x, y in data:
        X.append(x)
        Y.append(y)
    x_batch = torch.zeros((len(X), max(list(map(len, X)))), dtype=torch.long)
    y_batch = torch.zeros((len(Y), max(list(map(len, Y)))), dtype=torch.long)
    for i, sent in enumerate(X):
        x_batch[i, :len(sent)] = sent
    for i, sent in enumerate(Y):
        y_batch[i, :len(sent)] = sent
    return x_batch, y_batch

### Функция проверки качества модели

In [9]:
def check_model(batch_size, dataset, model, loss_function, idx2word, idx2pos):
    model.eval()
    batch_generator = torch.utils.data.DataLoader(dataset=dataset, batch_size=batch_size, collate_fn=collate_fn)
    test_acc = 0
    test_loss = 0
    count = 0
    for it, (x_batch, y_batch) in enumerate(batch_generator):
        x_batch = x_batch.to(model.device)
        y_batch = y_batch.to(model.device)
        mask = (y_batch != 0)
        count += mask.sum()
        with torch.no_grad():
            output = model(x_batch)
        test_loss += loss_function(output.transpose(1,2), y_batch).cpu().item()*len(x_batch)
        test_acc += (torch.argmax(output, dim=-1).cpu() == y_batch)[mask].sum().item()
    test_loss /= len(dataset)
    test_acc /= count
    print(f'loss: {test_loss}, acc: {test_acc}')
    dataloader = torch.utils.data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
    x, y = next(iter(dataloader))
    x = x.to(device)
    y = y.to(device)
    with torch.no_grad():
        outputs = model(x)
    one_x = x[0].cpu().numpy()
    one_y = y[0].cpu().numpy()
    one_output = outputs[0].argmax(dim=-1).cpu().numpy()
    words = [idx2word[idx] for idx in one_x]
    true_tags = [idx2pos[idx] for idx in one_y]
    pred_tags = [idx2pos[idx] for idx in one_output]
    table = PrettyTable(["Word", "True tag", "Predicted tag"])
    table.align["Word"], table.align["True tag"], table.align["Predicted tag"] = "l", "l", "l"
    for word, true_tag, pred_tag in zip(words, true_tags, pred_tags):
        if word != idx2word[word2idx['<PAD>']]:
            table.add_row([word, true_tag, pred_tag])
    print(table)
    return test_loss, test_acc

### Код для обучения модели

In [10]:
def train_on_batch(model, x_batch, y_batch, optimizer, loss_function):
    model.train()
    model.zero_grad()
    output = model(x_batch.to(device))
    loss = loss_function(output.transpose(1, 2), y_batch.to(device))
    loss.backward()
    optimizer.step()
    return loss.cpu().item()

In [11]:
def train_epoch(train_generator, model, loss_function, optimizer, callback = None):
    epoch_loss = 0
    total = 0
    for it, (batch_of_x, batch_of_y) in enumerate(train_generator):
        batch_loss = train_on_batch(model, batch_of_x.to(device), batch_of_y.to(device), optimizer, loss_function)
        train_generator.set_postfix({'train batch loss': batch_loss})
        if callback is not None:
            callback(model, batch_loss)
        epoch_loss += batch_loss*len(batch_of_x)
        total += len(batch_of_x)
    return epoch_loss/total

In [12]:
def trainer(count_of_epoch, 
            batch_size, 
            dataset,
            model, 
            loss_function,
            optimizer,
            lr = 0.001,
            callback = None):
    optima = optimizer(model.parameters(), lr=lr)
    iterations = tqdm(range(count_of_epoch), desc='epoch')
    iterations.set_postfix({'train epoch loss': np.nan})
    for it in iterations:
        batch_generator = tqdm(
            torch.utils.data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn), 
            leave=False, total=len(dataset)//batch_size+(len(dataset)%batch_size> 0))
        epoch_loss = train_epoch(
                    train_generator=batch_generator, 
                    model=model, 
                    loss_function=loss_function, 
                    optimizer=optima, 
                    callback=callback)
        iterations.set_postfix({'train epoch loss': epoch_loss})

### Отслеживание обучения

In [13]:
class callback():
    def __init__(self, writer, dataset, loss_function, delimeter = 300, batch_size=64):
        self.step = 0
        self.writer = writer
        self.delimeter = delimeter
        self.loss_function = loss_function
        self.batch_size = batch_size
        self.dataset = dataset
    def forward(self, model, loss):
        self.step += 1
        self.writer.add_scalar('LOSS/train', loss, self.step)
        if self.step % self.delimeter == 0:
            model.eval()
            batch_generator = torch.utils.data.DataLoader(dataset=self.dataset,
                                                          batch_size=self.batch_size,
                                                          collate_fn=collate_fn)
            test_acc = 0
            test_loss = 0
            count = 0
            for it, (x_batch, y_batch) in enumerate(batch_generator):
                x_batch = x_batch.to(model.device)
                y_batch = y_batch.to(model.device)
                mask = (y_batch != 0)
                count += mask.sum()
                with torch.no_grad():
                    output = model(x_batch)
                test_loss += self.loss_function(output.transpose(1,2), y_batch).cpu().item()*len(x_batch)
                test_acc += (torch.argmax(output, dim=-1).cpu() == y_batch)[mask].sum().item()
            test_loss /= len(self.dataset)
            test_acc /= count
            print(f"\t step={self.step}, train_loss={loss}, val_loss={test_loss}, val_acc={test_acc}")
            self.writer.add_scalar('LOSS/test', test_loss, self.step)
            self.writer.add_scalar('ACC/test', test_acc, self.step)
    def __call__(self, model, loss):
        return self.forward(model, loss)

## Модель LSTM

In [14]:
class LSTM(torch.nn.Module):
    @property
    def device(self):
        return next(self.parameters()).device
    def __init__(self,
                 vocab_dim,
                 output_dim = 18,
                 emb_dim = 10, 
                 hidden_dim = 10,
                 num_layers = 3,
                 dropout = 0,
                 batch_norm = False,
                 bidirectional = False):
        super(LSTM, self).__init__()
        self.embedding = torch.nn.Embedding(vocab_dim, emb_dim, padding_idx=0)
        self.encoder = torch.nn.LSTM(emb_dim, hidden_dim, num_layers, bidirectional = bidirectional, batch_first=True)
        if batch_norm:
            self.batch_norm = torch.nn.BatchNorm1d(hidden_dim)
        else:
            self.batch_norm = None
        self.dropout = torch.nn.Dropout(p=dropout)
        self.linear = torch.nn.Linear(hidden_dim, output_dim)
    def forward(self, input):
        out = self.embedding(input)
        out, _ = self.encoder(out)
        if self.batch_norm is not None:
            out = self.batch_norm(out.transpose(1, 2)).transpose(1, 2)
        out = self.dropout(out)
        out = self.linear(out)
        return out

### Загрузка датасета (предварительно скачаем сам архив)

In [18]:
docs = load_nerus("nerus_lenta.conllu.gz")

In [19]:
list_of_sent, list_of_tags = get_sent_tags(docs, 10000)

0it [00:00, ?it/s]

In [20]:
pos2idx, idx2pos = pos_dict(list_of_tags)

In [21]:
word2idx, idx2word = word_dict(list_of_sent)

In [22]:
train_data = NerusDataset(list_of_sent, list_of_tags, word2idx, pos2idx, train=True)
test_data = NerusDataset(list_of_sent, list_of_tags, word2idx, pos2idx, train=False)

### Обучение модели

In [23]:
loss_function = torch.nn.CrossEntropyLoss(ignore_index=0)
optimizer = torch.optim.Adam

Перебираемые параметры для анализа зависимости модели от них:

In [30]:
dim_layers_params = [10, 15, 20]
num_layers_params = [3, 5, 7]
dropout_params = [0, 0.25, 0.5]
batch_norm_params = [False, True]
len_dict_params = [10000, 25000, 50000]

Разные размеры слоя:

In [29]:
for dim in dim_layers_params:
    print(f'dim = {dim}')
    model = LSTM(vocab_dim=len(word2idx), emb_dim=dim, hidden_dim=dim, output_dim=len(pos2idx))
    model.to(device)
    writer = SummaryWriter(log_dir=f'tensorboard2/dim_{dim}')
    call = callback(writer, test_data, loss_function)
    check_model(64, test_data, model, loss_function, idx2word, idx2pos)
    trainer(count_of_epoch=10, 
            batch_size=64, 
            dataset=train_data,
            model=model, 
            loss_function=loss_function,
            optimizer = optimizer,
            callback=call)
    check_model(64, test_data, model, loss_function, idx2word, idx2pos)

dim = 10
loss: 2.9790517902374267, acc: 0.022016221657395363
+----------+----------+---------------+
| Word     | True tag | Predicted tag |
+----------+----------+---------------+
| За       | ADP      | ADV           |
| семь     | NUM      | ADV           |
| месяцев  | NOUN     | ADV           |
| с        | ADP      | ADV           |
| начала   | NOUN     | ADV           |
| акции    | NOUN     | ADV           |
| в        | ADP      | ADV           |
| ней      | PRON     | ADV           |
| приняли  | VERB     | ADV           |
| участие  | NOUN     | ADV           |
| человек  | NOUN     | ADV           |
| в        | ADP      | ADV           |
| 52       | NUM      | ADV           |
| городах  | NOUN     | ADV           |
| России   | PROPN    | ADV           |
| ,        | PUNCT    | ADV           |
| они      | PRON     | ADV           |
| посетили | VERB     | ADV           |
| ,        | PUNCT    | ADV           |
| и        | CCONJ    | ADV           |
| детей    | NOUN  

epoch:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=300, train_loss=2.2013754844665527, val_loss=2.248526210784912, val_acc=0.2665121555328369


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=600, train_loss=2.19882869720459, val_loss=2.2273713970184326, val_acc=0.26940903067588806


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=900, train_loss=1.9736965894699097, val_loss=1.9875718402862548, val_acc=0.26940903067588806


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=1200, train_loss=1.4881247282028198, val_loss=1.5253090810775758, val_acc=0.47392815351486206


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=1500, train_loss=1.144326090812683, val_loss=1.260096125602722, val_acc=0.6031286120414734
loss: 1.2126078701019287, acc: 0.6512166857719421
+-----------+----------+---------------+
| Word      | True tag | Predicted tag |
+-----------+----------+---------------+
| В         | ADP      | ADP           |
| 2018      | ADJ      | ADJ           |
| году      | NOUN     | NOUN          |
| Forbes    | PROPN    | VERB          |
| оценил    | VERB     | VERB          |
| состояние | NOUN     | NOUN          |
| в         | ADP      | ADP           |
| 2,2       | NUM      | ADJ           |
| миллиарда | NOUN     | NOUN          |
| долларов  | NOUN     | NOUN          |
| .         | PUNCT    | PUNCT         |
+-----------+----------+---------------+
dim = 15
loss: 2.9377783489227296, acc: 0.06546929478645325
+--------------+----------+---------------+
| Word         | True tag | Predicted tag |
+--------------+----------+---------------+
| На           | ADP      | ADP           |
|

epoch:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=300, train_loss=2.126230478286743, val_loss=2.2228517150878906, val_acc=0.26998841762542725


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=600, train_loss=2.0409188270568848, val_loss=2.107631492614746, val_acc=0.3342989385128021


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=900, train_loss=1.5555475950241089, val_loss=1.5580001163482666, val_acc=0.5098493695259094


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=1200, train_loss=1.1254302263259888, val_loss=1.1919739055633545, val_acc=0.6181923151016235


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=1500, train_loss=0.9431555867195129, val_loss=0.9833147573471069, val_acc=0.6906141042709351
loss: 0.9498584222793579, acc: 0.6998841166496277
+-------------+----------+---------------+
| Word        | True tag | Predicted tag |
+-------------+----------+---------------+
| После       | ADP      | ADP           |
| этого       | PRON     | PRON          |
| в           | ADP      | ADP           |
| Госдуме     | PROPN    | VERB          |
| заявили     | VERB     | VERB          |
| ,           | PUNCT    | PUNCT         |
| что         | SCONJ    | SCONJ         |
| возможность | NOUN     | NOUN          |
| оплаты      | NOUN     | NOUN          |
| расходов    | NOUN     | VERB          |
| .           | PUNCT    | PUNCT         |
+-------------+----------+---------------+
dim = 20
loss: 2.904795389175415, acc: 0.010428736917674541
+-------------+----------+---------------+
| Word        | True tag | Predicted tag |
+-------------+----------+---------------+
| После       | 

epoch:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=300, train_loss=1.9229785203933716, val_loss=1.982489700317383, val_acc=0.41193509101867676


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=600, train_loss=1.53652024269104, val_loss=1.5905735683441162, val_acc=0.47566625475883484


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=900, train_loss=1.1539238691329956, val_loss=1.1943800163269043, val_acc=0.6263035535812378


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=1200, train_loss=0.9579474329948425, val_loss=0.9585205030441284, val_acc=0.7161065936088562


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=1500, train_loss=0.6939226984977722, val_loss=0.7853385519981384, val_acc=0.7757821083068848
loss: 0.7838173341751099, acc: 0.7850521206855774
+----------------+----------+---------------+
| Word           | True tag | Predicted tag |
+----------------+----------+---------------+
| Российский     | ADJ      | ADJ           |
| министр        | NOUN     | NOUN          |
| финансов       | NOUN     | NOUN          |
| и              | CCONJ    | CCONJ         |
| вице-премьер   | NOUN     | NOUN          |
| Антон          | PROPN    | PROPN         |
| Силуанов       | PROPN    | PROPN         |
| пообещал       | VERB     | VERB          |
| обратиться     | VERB     | VERB          |
| в              | ADP      | ADP           |
| Верховный      | ADJ      | ADJ           |
| суд            | NOUN     | NOUN          |
| Великобритании | PROPN    | PROPN         |
| .              | PUNCT    | PUNCT         |
+----------------+----------+---------------+


Чем больше размер слоя, тем лучше.

Разное число слоёв:

In [31]:
for num_layers in num_layers_params:
    print(f'num_layers = {num_layers}')
    model = LSTM(vocab_dim=len(word2idx), num_layers=num_layers)
    model.to(device)
    writer = SummaryWriter(log_dir=f'tensorboard2/num_layers_{num_layers}')
    call = callback(writer, test_data, loss_function)
    check_model(64, test_data, model, loss_function, idx2word, idx2pos)
    trainer(count_of_epoch=10, 
            batch_size=64, 
            dataset=train_data,
            model=model, 
            loss_function=loss_function,
            optimizer = optimizer,
            callback=call)
    check_model(64, test_data, model, loss_function, idx2word, idx2pos)

num_layers = 3
loss: 2.852248439788818, acc: 0.05446118116378784
+-------------+----------+---------------+
| Word        | True tag | Predicted tag |
+-------------+----------+---------------+
| Игра        | NOUN     | ADJ           |
| начнется    | VERB     | ADJ           |
| в           | ADP      | ADJ           |
| 22          | NUM      | ADJ           |
| :           | PUNCT    | PART          |
| 00          | NUM      | PART          |
| по          | ADP      | PART          |
| московскому | ADJ      | PART          |
| времени     | NOUN     | PART          |
| .           | PUNCT    | PART          |
+-------------+----------+---------------+


epoch:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=300, train_loss=2.2201104164123535, val_loss=2.2465128803253176, val_acc=0.26998841762542725


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=600, train_loss=2.057480573654175, val_loss=2.093374423980713, val_acc=0.4090382158756256


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=900, train_loss=1.8026366233825684, val_loss=1.788724045753479, val_acc=0.42931631207466125


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=1200, train_loss=1.5593254566192627, val_loss=1.6113996601104736, val_acc=0.42989569902420044


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=1500, train_loss=1.3253298997879028, val_loss=1.3785790634155273, val_acc=0.57184237241745
loss: 1.3360270595550536, acc: 0.5782155394554138
+--------+----------+---------------+
| Word   | True tag | Predicted tag |
+--------+----------+---------------+
| Первое | ADJ      | NOUN          |
| место  | NOUN     | NOUN          |
| в      | ADP      | ADP           |
| 450    | NUM      | NOUN          |
| тысяч  | NOUN     | NOUN          |
| рублей | NOUN     | NOUN          |
| .      | PUNCT    | PUNCT         |
+--------+----------+---------------+
num_layers = 5
loss: 2.9009469413757323, acc: 0.10196986794471741
+-------------+----------+---------------+
| Word        | True tag | Predicted tag |
+-------------+----------+---------------+
| В           | ADP      | VERB          |
| частности   | NOUN     | VERB          |
| ,           | PUNCT    | VERB          |
| в           | ADP      | VERB          |
| 2018        | ADJ      | VERB          |
| году        | NOUN    

epoch:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=300, train_loss=2.219787120819092, val_loss=2.2424987888336183, val_acc=0.26940903067588806


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=600, train_loss=2.2401657104492188, val_loss=2.218400115966797, val_acc=0.26998841762542725


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=900, train_loss=2.1815345287323, val_loss=2.2044293308258056, val_acc=0.27056777477264404


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=1200, train_loss=2.1702799797058105, val_loss=2.1949933338165284, val_acc=0.2711471617221832


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=1500, train_loss=1.9356715679168701, val_loss=1.9356672525405885, val_acc=0.4258400797843933
loss: 1.8596412706375123, acc: 0.43568944931030273
+--------------+----------+---------------+
| Word         | True tag | Predicted tag |
+--------------+----------+---------------+
| В            | ADP      | ADP           |
| мае          | NOUN     | NOUN          |
| The          | X        | NOUN          |
| National     | X        | PUNCT         |
| Interest     | X        | NOUN          |
| отмечал      | VERB     | NOUN          |
| ,            | PUNCT    | PUNCT         |
| что          | SCONJ    | PUNCT         |
| системы      | NOUN     | NOUN          |
| поиска       | NOUN     | NOUN          |
| и            | CCONJ    | PUNCT         |
| ,            | PUNCT    | PUNCT         |
| в            | ADP      | NOUN          |
| области      | NOUN     | NOUN          |
| излучения    | NOUN     | NOUN          |
| ,            | PUNCT    | PUNCT         |
| Китаем     

epoch:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=300, train_loss=2.203721761703491, val_loss=2.2339455699920654, val_acc=0.26998841762542725


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=600, train_loss=2.150705099105835, val_loss=2.2101818561553954, val_acc=0.26998841762542725


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=900, train_loss=2.175626754760742, val_loss=2.1964842224121095, val_acc=0.2885283827781677


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=1200, train_loss=2.1368587017059326, val_loss=2.1873823261260985, val_acc=0.2885283827781677


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=1500, train_loss=2.181196689605713, val_loss=2.1840294456481932, val_acc=0.2885283827781677
loss: 2.182241611480713, acc: 0.2885283827781677
+---------------+----------+---------------+
| Word          | True tag | Predicted tag |
+---------------+----------+---------------+
| Ранее         | ADV      | ADP           |
| в             | ADP      | NOUN          |
| воскресенье   | NOUN     | NOUN          |
| ,             | PUNCT    | NOUN          |
| 30            | ADJ      | NOUN          |
| сентября      | NOUN     | NOUN          |
| ,             | PUNCT    | NOUN          |
| объявил       | VERB     | NOUN          |
| ,             | PUNCT    | NOUN          |
| что           | SCONJ    | NOUN          |
| у             | ADP      | NOUN          |
| ВМС           | PROPN    | NOUN          |
| США           | PROPN    | NOUN          |
| «             | PUNCT    | NOUN          |
| есть          | VERB     | NOUN          |
| возможность   | NOUN     | NOUN         

Увеличивать число слоёв не надо.

Зависимость от dropout:

In [32]:
for dropout in dropout_params:
    print(f'dropout = {dropout}')
    model = LSTM(vocab_dim=len(word2idx), dropout=dropout)
    model.to(device)
    writer = SummaryWriter(log_dir=f'tensorboard2/dropout_{dropout}')
    call = callback(writer, test_data, loss_function)
    check_model(64, test_data, model, loss_function, idx2word, idx2pos)
    trainer(count_of_epoch=10, 
            batch_size=64, 
            dataset=train_data,
            model=model, 
            loss_function=loss_function,
            optimizer = optimizer,
            callback=call)
    check_model(64, test_data, model, loss_function, idx2word, idx2pos)

dropout = 0
loss: 2.945477523803711, acc: 0.03244495764374733
+------------+----------+---------------+
| Word       | True tag | Predicted tag |
+------------+----------+---------------+
| В          | ADP      | PRON          |
| нем        | PRON     | PRON          |
| они        | PRON     | PRON          |
| опровергли | VERB     | PRON          |
| информацию | NOUN     | PRON          |
| о          | ADP      | PRON          |
| связи      | NOUN     | PRON          |
| с          | ADP      | PRON          |
| российской | ADJ      | PRON          |
| разведкой  | NOUN     | PRON          |
| и          | CCONJ    | PRON          |
| утверждали | VERB     | PRON          |
| ,          | PUNCT    | PRON          |
| что        | SCONJ    | PRON          |
| с          | ADP      | PRON          |
| визитом    | NOUN     | PRON          |
| и          | CCONJ    | PRON          |
| собирались | VERB     | PRON          |
| посмотреть | VERB     | PRON          |
| на         |

epoch:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=300, train_loss=2.2123637199401855, val_loss=2.2403217792510985, val_acc=0.26998841762542725


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=600, train_loss=2.1260454654693604, val_loss=2.212393674850464, val_acc=0.26998841762542725


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=900, train_loss=2.1005024909973145, val_loss=2.14947811126709, val_acc=0.2960602343082428


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=1200, train_loss=1.8126873970031738, val_loss=1.821110439300537, val_acc=0.45249128341674805


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=1500, train_loss=1.5282050371170044, val_loss=1.5877122211456298, val_acc=0.5428736805915833
loss: 1.5314189672470093, acc: 0.5544611811637878
+--------------+----------+---------------+
| Word         | True tag | Predicted tag |
+--------------+----------+---------------+
| В            | ADP      | ADP           |
| нем          | PRON     | NOUN          |
| представлены | VERB     | NOUN          |
| сезона       | NOUN     | NOUN          |
| :            | PUNCT    | PUNCT         |
| для          | ADP      | ADP           |
| мужчин       | NOUN     | NOUN          |
| и            | CCONJ    | PUNCT         |
| женщин       | NOUN     | NOUN          |
| и            | CCONJ    | PUNCT         |
| кроссовки    | NOUN     | NOUN          |
| брендов      | NOUN     | NOUN          |
| Balenciaga   | PROPN    | NOUN          |
| ,            | PUNCT    | PUNCT         |
| ,            | PUNCT    | PUNCT         |
| Off-White    | X        | NOUN          |
| ,           

epoch:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=300, train_loss=2.2276530265808105, val_loss=2.2512574672698973, val_acc=0.26998841762542725


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=600, train_loss=2.0926265716552734, val_loss=2.1488070678710938, val_acc=0.26998841762542725


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=900, train_loss=1.8841774463653564, val_loss=1.8341285467147828, val_acc=0.27636152505874634


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=1200, train_loss=1.5292072296142578, val_loss=1.5010148334503173, val_acc=0.4889918863773346


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=1500, train_loss=1.3980790376663208, val_loss=1.2945236253738404, val_acc=0.5283893346786499
loss: 1.2694461011886597, acc: 0.5336036682128906
+---------+----------+---------------+
| Word    | True tag | Predicted tag |
+---------+----------+---------------+
| В       | ADP      | PUNCT         |
| ответ   | NOUN     | NOUN          |
| он      | PRON     | CCONJ         |
| :       | PUNCT    | PUNCT         |
| «       | PUNCT    | PUNCT         |
| быть    | AUX      | CCONJ         |
| не      | PART     | PRON          |
| стыдно  | ADV      | VERB          |
| ,       | PUNCT    | PUNCT         |
| стыдно  | ADV      | VERB          |
| быть    | AUX      | PRON          |
| дешевым | ADJ      | VERB          |
| »       | PUNCT    | PUNCT         |
| .       | PUNCT    | PUNCT         |
+---------+----------+---------------+
dropout = 0.5
loss: 2.818583927154541, acc: 0.26998841762542725
+------------+----------+---------------+
| Word       | True tag | Predicted tag |


epoch:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=300, train_loss=2.271939277648926, val_loss=2.2395279693603514, val_acc=0.26998841762542725


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=600, train_loss=1.9974396228790283, val_loss=1.9217410564422608, val_acc=0.421205073595047


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=900, train_loss=1.767446756362915, val_loss=1.7335862159729003, val_acc=0.4304750859737396


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=1200, train_loss=1.635204553604126, val_loss=1.6136111688613892, val_acc=0.42989569902420044


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=1500, train_loss=1.5963246822357178, val_loss=1.5101778984069825, val_acc=0.444959431886673
loss: 1.4869454336166381, acc: 0.44553881883621216
+----------+----------+---------------+
| Word     | True tag | Predicted tag |
+----------+----------+---------------+
| Поставки | NOUN     | NOUN          |
| ЗРК      | PROPN    | NOUN          |
| начались | VERB     | NOUN          |
| в        | ADP      | NOUN          |
| конце    | NOUN     | NOUN          |
| сентября | NOUN     | NOUN          |
| .        | PUNCT    | PUNCT         |
+----------+----------+---------------+


Добавление dropout 0.25 заметно уменьшило loss, но также немного упала acc, нужен дополнительный анализ.

Добавление BatchNorm:

In [33]:
for batch_norm in batch_norm_params:
    print(f'batchnorm = {batch_norm}')
    model = LSTM(vocab_dim=len(word2idx), batch_norm=batch_norm)
    model.to(device)
    writer = SummaryWriter(log_dir=f'tensorboard2/batch_norm_{batch_norm}')
    call = callback(writer, test_data, loss_function)
    check_model(64, test_data, model, loss_function, idx2word, idx2pos)
    trainer(count_of_epoch=10, 
            batch_size=64, 
            dataset=train_data,
            model=model, 
            loss_function=loss_function,
            optimizer = optimizer,
            callback=call)
    check_model(64, test_data, model, loss_function, idx2word, idx2pos)

batchnorm = False
loss: 2.8802488899230956, acc: 0.09327925741672516
+-------------+----------+---------------+
| Word        | True tag | Predicted tag |
+-------------+----------+---------------+
| Он          | PRON     | PROPN         |
| подчеркивал | VERB     | PROPN         |
| ,           | PUNCT    | PROPN         |
| что         | SCONJ    | PROPN         |
| весь        | DET      | PROPN         |
| план        | NOUN     | PROPN         |
| обойдется   | VERB     | PROPN         |
| бюджету     | NOUN     | PROPN         |
| в           | ADP      | ADP           |
| 25          | NUM      | ADP           |
| триллионов  | NOUN     | ADP           |
| рублей      | NOUN     | PROPN         |
| .           | PUNCT    | ADP           |
+-------------+----------+---------------+


epoch:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=300, train_loss=2.1776533126831055, val_loss=2.244501762390137, val_acc=0.26998841762542725


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=600, train_loss=2.1664609909057617, val_loss=2.2196861553192138, val_acc=0.26998841762542725


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=900, train_loss=1.9611331224441528, val_loss=2.0332080554962157, val_acc=0.40556198358535767


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=1200, train_loss=1.6694642305374146, val_loss=1.631487078666687, val_acc=0.5521436929702759


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=1500, train_loss=1.3432894945144653, val_loss=1.3564095163345338, val_acc=0.567207396030426
loss: 1.3256085062026977, acc: 0.5741599202156067
+----------+----------+---------------+
| Word     | True tag | Predicted tag |
+----------+----------+---------------+
| Поставки | NOUN     | NOUN          |
| ЗРК      | PROPN    | NOUN          |
| начались | VERB     | NOUN          |
| в        | ADP      | ADP           |
| конце    | NOUN     | NOUN          |
| сентября | NOUN     | NOUN          |
| .        | PUNCT    | PUNCT         |
+----------+----------+---------------+
batchnorm = True
loss: 2.987369613647461, acc: 0.03244495764374733
+--------------+----------+---------------+
| Word         | True tag | Predicted tag |
+--------------+----------+---------------+
| Ему          | PRON     | PRON          |
| предъявили   | VERB     | PRON          |
| обвинение    | NOUN     | PRON          |
| в            | ADP      | PRON          |
| хранении     | NOUN     | PRON    

epoch:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=300, train_loss=1.5837328433990479, val_loss=1.5840169477462769, val_acc=0.5301274657249451


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=600, train_loss=1.1960124969482422, val_loss=1.2515284252166747, val_acc=0.5950173735618591


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=900, train_loss=1.044409990310669, val_loss=1.0368707275390625, val_acc=0.6500579118728638


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=1200, train_loss=0.8375889658927917, val_loss=0.8608740854263306, val_acc=0.7097334861755371


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=1500, train_loss=0.6483244895935059, val_loss=0.7195430016517639, val_acc=0.764194667339325
loss: 0.7100648617744446, acc: 0.7711471319198608
+-------------+----------+---------------+
| Word        | True tag | Predicted tag |
+-------------+----------+---------------+
| Украинская  | ADJ      | ADJ           |
| компания    | NOUN     | NOUN          |
| добивается  | VERB     | VERB          |
| ареста      | NOUN     | VERB          |
| европейских | ADJ      | VERB          |
| активов     | NOUN     | NOUN          |
| в           | ADP      | ADP           |
| целях       | NOUN     | NOUN          |
| .           | PUNCT    | PUNCT         |
+-------------+----------+---------------+


Очевидно, что добавление BatchNorm значительно улучшило модель.

Разные размеры словаря:

In [34]:
for len_dict in len_dict_params:
    print(f'len_dict = {len_dict}')
    list_of_sent, list_of_tags = get_sent_tags(docs, len_dict)
    pos2idx, idx2pos = pos_dict(list_of_tags)
    word2idx, idx2word = word_dict(list_of_sent)
    train_data = NerusDataset(list_of_sent, list_of_tags, word2idx, pos2idx, train=True)
    test_data = NerusDataset(list_of_sent, list_of_tags, word2idx, pos2idx, train=False)
    model = LSTM(vocab_dim=len(word2idx))
    model.to(device)
    writer = SummaryWriter(log_dir=f'tensorboard2/len_dict_{len_dict}')
    call = callback(writer, test_data, loss_function)
    check_model(64, test_data, model, loss_function, idx2word, idx2pos)
    trainer(count_of_epoch=10, 
            batch_size=64, 
            dataset=train_data,
            model=model, 
            loss_function=loss_function,
            optimizer = optimizer,
            callback=call)
    check_model(64, test_data, model, loss_function, idx2word, idx2pos)

len_dict = 10000


0it [00:00, ?it/s]

loss: 2.895909643173218, acc: 0.06179473549127579
+------------+----------+---------------+
| Word       | True tag | Predicted tag |
+------------+----------+---------------+
| В          | ADP      | PROPN         |
| ноябре     | NOUN     | PROPN         |
| 2017       | ADJ      | PROPN         |
| года       | NOUN     | PROPN         |
| героев     | NOUN     | PROPN         |
| фото       | NOUN     | PROPN         |
| обнаружили | VERB     | PROPN         |
| и          | PART     | PROPN         |
| на         | ADP      | PROPN         |
| других     | ADJ      | PROPN         |
| снимках    | NOUN     | PROPN         |
| из         | ADP      | PROPN         |
| фотобанка  | NOUN     | PROPN         |
| ,          | PUNCT    | PROPN         |
| составив   | VERB     | PROPN         |
| из         | ADP      | PROPN         |
| них        | PRON     | PROPN         |
| «          | PUNCT    | PROPN         |
| и          | CCONJ    | PROPN         |
| »          | PUNCT    | 

epoch:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=300, train_loss=2.2059757709503174, val_loss=2.1390775871276855, val_acc=0.31058570742607117


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=600, train_loss=2.161630153656006, val_loss=2.104987907409668, val_acc=0.31058570742607117


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=900, train_loss=1.9814646244049072, val_loss=1.8798828172683715, val_acc=0.43578723073005676


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=1200, train_loss=1.7002915143966675, val_loss=1.6306676816940309, val_acc=0.4551316797733307


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]

	 step=1500, train_loss=1.5539114475250244, val_loss=1.4484354782104492, val_acc=0.48522302508354187
loss: 1.413751630783081, acc: 0.5024181008338928
+--------------+----------+---------------+
| Word         | True tag | Predicted tag |
+--------------+----------+---------------+
| якобы        | ADV      | PUNCT         |
| для          | ADP      | NOUN          |
| осмотра      | NOUN     | NOUN          |
| недвижимости | NOUN     | NOUN          |
| ,            | PUNCT    | PUNCT         |
| приставы     | NOUN     | NOUN          |
| арестовали   | VERB     | NOUN          |
| имущество    | NOUN     | NOUN          |
| мужчины      | NOUN     | NOUN          |
| .            | PUNCT    | PUNCT         |
+--------------+----------+---------------+
len_dict = 25000


0it [00:00, ?it/s]

loss: 2.9200546550750732, acc: 0.012364760041236877
+---------------+----------+---------------+
| Word          | True tag | Predicted tag |
+---------------+----------+---------------+
| Керимов       | PROPN    | CCONJ         |
| останется     | VERB     | SCONJ         |
| во            | ADP      | SCONJ         |
| Франции       | PROPN    | SCONJ         |
| до            | ADP      | SCONJ         |
| конца         | NOUN     | SCONJ         |
| расследования | NOUN     | SCONJ         |
| ,             | PUNCT    | SCONJ         |
| ему           | PRON     | SCONJ         |
| необходимо    | ADJ      | SCONJ         |
| несколько     | NUM      | SCONJ         |
| раз           | NOUN     | SCONJ         |
| в             | ADP      | SCONJ         |
| неделю        | NOUN     | SCONJ         |
| приходить     | VERB     | SCONJ         |
| в             | ADP      | SCONJ         |
| полицию       | NOUN     | SCONJ         |
| и             | CCONJ    | SCONJ         |
| с

epoch:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/390 [00:00<?, ?it/s]

	 step=300, train_loss=2.205266237258911, val_loss=2.2072782039642336, val_acc=0.2797527015209198


  0%|          | 0/390 [00:00<?, ?it/s]

	 step=600, train_loss=2.0017404556274414, val_loss=2.0153594493865965, val_acc=0.4131890535354614


  0%|          | 0/390 [00:00<?, ?it/s]

	 step=900, train_loss=1.7597544193267822, val_loss=1.7773166704177856, val_acc=0.4322513937950134


  0%|          | 0/390 [00:00<?, ?it/s]

	 step=1200, train_loss=1.7158104181289673, val_loss=1.6910619926452637, val_acc=0.433281809091568
	 step=1500, train_loss=1.5986260175704956, val_loss=1.6188884782791138, val_acc=0.4451313614845276


  0%|          | 0/390 [00:00<?, ?it/s]

	 step=1800, train_loss=1.3853150606155396, val_loss=1.4687671041488648, val_acc=0.4832560420036316


  0%|          | 0/390 [00:00<?, ?it/s]

	 step=2100, train_loss=1.2064175605773926, val_loss=1.228657946586609, val_acc=0.6084492206573486


  0%|          | 0/390 [00:00<?, ?it/s]

	 step=2400, train_loss=1.0198686122894287, val_loss=1.0842780733108521, val_acc=0.645028293132782
	 step=2700, train_loss=0.9949629902839661, val_loss=0.9655955195426941, val_acc=0.6960329413414001


  0%|          | 0/390 [00:00<?, ?it/s]

	 step=3000, train_loss=0.7577994465827942, val_loss=0.8733797526359558, val_acc=0.743946373462677


  0%|          | 0/390 [00:00<?, ?it/s]

	 step=3300, train_loss=0.7671045660972595, val_loss=0.8103506684303283, val_acc=0.7789798974990845


  0%|          | 0/390 [00:00<?, ?it/s]

	 step=3600, train_loss=0.6736979484558105, val_loss=0.7596487665176391, val_acc=0.7934054136276245
	 step=3900, train_loss=0.6595547795295715, val_loss=0.7044067120552063, val_acc=0.81658935546875
loss: 0.7044067120552063, acc: 0.81658935546875
+--------------+----------+---------------+
| Word         | True tag | Predicted tag |
+--------------+----------+---------------+
| Они          | PRON     | PRON          |
| окружили     | VERB     | VERB          |
| микроавтобус | NOUN     | NOUN          |
| ,            | PUNCT    | PUNCT         |
| в            | ADP      | ADP           |
| котором      | PRON     | PRON          |
| находился    | VERB     | VERB          |
| политик      | NOUN     | NOUN          |
| ,            | PUNCT    | PUNCT         |
| и            | CCONJ    | CCONJ         |
| взломали     | VERB     | VERB          |
| дверь        | NOUN     | NOUN          |
| машины       | NOUN     | NOUN          |
| ,            | PUNCT    | PUNCT         |
| посл

0it [00:00, ?it/s]

loss: 2.8644146633148195, acc: 0.012960082851350307
+---------------+----------+---------------+
| Word          | True tag | Predicted tag |
+---------------+----------+---------------+
| Аналитики     | NOUN     | SCONJ         |
| Российского   | ADJ      | SCONJ         |
| национального | ADJ      | SCONJ         |
| коммерческого | ADJ      | SCONJ         |
| банка         | NOUN     | SCONJ         |
| (             | PUNCT    | SCONJ         |
| РНКБ          | PROPN    | SCONJ         |
| )             | PUNCT    | SCONJ         |
| полагают      | VERB     | SCONJ         |
| ,             | PUNCT    | SCONJ         |
| что           | SCONJ    | SCONJ         |
| в             | ADP      | SCONJ         |
| ближайшие     | ADJ      | SCONJ         |
| пять          | NUM      | SCONJ         |
| лет           | NOUN     | SCONJ         |
| более         | ADV      | SCONJ         |
| 150           | NUM      | SCONJ         |
| тысяч         | NOUN     | SCONJ         |
| ж

epoch:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/780 [00:00<?, ?it/s]

	 step=300, train_loss=2.183155059814453, val_loss=2.1786864757537843, val_acc=0.27216172218322754
	 step=600, train_loss=2.1767117977142334, val_loss=2.1505710601806642, val_acc=0.27216172218322754


  0%|          | 0/780 [00:00<?, ?it/s]

	 step=900, train_loss=2.0083038806915283, val_loss=2.025839018821716, val_acc=0.28201138973236084
	 step=1200, train_loss=1.629385232925415, val_loss=1.58397611618042, val_acc=0.4815966784954071
	 step=1500, train_loss=1.351584553718567, val_loss=1.284998745918274, val_acc=0.5624675750732422


  0%|          | 0/780 [00:00<?, ?it/s]

	 step=1800, train_loss=1.093247890472412, val_loss=1.0852915096282958, val_acc=0.6448937058448792
	 step=2100, train_loss=0.9666929841041565, val_loss=0.9442495131492614, val_acc=0.7065836787223816


  0%|          | 0/780 [00:00<?, ?it/s]

	 step=2400, train_loss=0.8726097941398621, val_loss=0.8348084139823914, val_acc=0.766200065612793
	 step=2700, train_loss=0.7366381287574768, val_loss=0.7355703711509705, val_acc=0.8035251498222351
	 step=3000, train_loss=0.7304397821426392, val_loss=0.6573810315132141, val_acc=0.8211508393287659


  0%|          | 0/780 [00:00<?, ?it/s]

	 step=3300, train_loss=0.6169717907905579, val_loss=0.5969770193099976, val_acc=0.8320373296737671
	 step=3600, train_loss=0.6138284206390381, val_loss=0.5502880692481995, val_acc=0.8382581472396851
	 step=3900, train_loss=0.514554500579834, val_loss=0.5130831575393677, val_acc=0.8460341691970825


  0%|          | 0/780 [00:00<?, ?it/s]

	 step=4200, train_loss=0.4656292796134949, val_loss=0.4799578404426575, val_acc=0.8584758639335632
	 step=4500, train_loss=0.5110749006271362, val_loss=0.4557966780662537, val_acc=0.8636599183082581


  0%|          | 0/780 [00:00<?, ?it/s]

	 step=4800, train_loss=0.4479440450668335, val_loss=0.4349083018302917, val_acc=0.8683255314826965
	 step=5100, train_loss=0.4324362277984619, val_loss=0.41828259110450744, val_acc=0.8786935806274414
	 step=5400, train_loss=0.3968941867351532, val_loss=0.39643168926239014, val_acc=0.8875064849853516


  0%|          | 0/780 [00:00<?, ?it/s]

	 step=5700, train_loss=0.4182208180427551, val_loss=0.37907384753227236, val_acc=0.89217209815979
	 step=6000, train_loss=0.3642757534980774, val_loss=0.36859025597572326, val_acc=0.89217209815979


  0%|          | 0/780 [00:00<?, ?it/s]

	 step=6300, train_loss=0.33775076270103455, val_loss=0.3574409854412079, val_acc=0.8958008885383606
	 step=6600, train_loss=0.32299473881721497, val_loss=0.3458834254741669, val_acc=0.8994297385215759
	 step=6900, train_loss=0.31413009762763977, val_loss=0.3321727848052978, val_acc=0.9004665613174438


  0%|          | 0/780 [00:00<?, ?it/s]

	 step=7200, train_loss=0.32618218660354614, val_loss=0.3247833800315857, val_acc=0.9035769701004028
	 step=7500, train_loss=0.25204774737358093, val_loss=0.3175006139278412, val_acc=0.9051321744918823
	 step=7800, train_loss=0.2642120122909546, val_loss=0.30874947071075437, val_acc=0.9051321744918823
loss: 0.30874947071075437, acc: 0.9051321744918823
+--------------+----------+---------------+
| Word         | True tag | Predicted tag |
+--------------+----------+---------------+
| Когда        | SCONJ    | NUM           |
| он           | PRON     | PRON          |
| у            | ADP      | ADP           |
| вас          | PRON     | ADV           |
| в            | ADP      | ADP           |
| следственном | ADJ      | ADJ           |
| изоляторе    | NOUN     | NOUN          |
| ,            | PUNCT    | PUNCT         |
| тогда        | ADV      | ADV           |
| он           | PRON     | PRON          |
| будет        | AUX      | AUX           |
| давать       | VERB     | VE

Логично, что при большем количестве данных модель лучше, но дольше обучается.

## Выводы

Явный положительный эффект дало увеличение размера слоя, добавление BatchNorm и увеличение размера словаря.

Число слоёв, скорее всего, надо было увеличивать пропорционально росту размеров каждого слоя. А также пробовать комбинировать с другими улучшениями. Ведь в эксперименте был зафиксирован размер слоя 10, что, возможно, было слишком малым для числа слоёв 5 и 7. Либо маленького размера обучающей выборки не хватило для обучения более сложной модели.

При добавлении dropout, видимо, надо было дольше обучать модель (увеличить размер словаря или даже число эпох).

### Дополнительные выводы после просмотра tensorboard

BatchNorm заметно ускоряет обучение; увеличение размера слоя однозначно улучшает модель; dropout 0.5 слишком большой, 0.25 уменьшает loss, но почти не меняет acc, если зафиксировать размер выборки и число эпох обучения; увеличение размера словаря безоговорочно важно для качественного обучения модели; увеличение количества слоёв сильно усложняет модель, что ведёт к необходимости более долгого обучения (ближе к концу обучения модель с 5 слоями резко начала улучшаться, хотя до этого была похожа на модель с 7 слоями, а сначала даже модель с 3 слоями по графику loss на train не отличалась от 5 и 7 слоёв).