In [1]:
# import argparse

# parser = argparse.ArgumentParser(description='Change encoding from BIO to BIOES')
# parser.add_argument('input', type=str, default='./data/',help='The path to the original file with BIO encoding')
# parser.add_argument('output', type=str, default='./out_data/', help='The name of your BIOES encoded file')
# args = parser.parse_args()

# input_file = args.input
# output_file = args.output


def read_file(input_file):
    with open(input_file, 'r', encoding="utf8") as f:
        return f.read().split('\n')[:-1]


def write_line(new_label, prev_label, line_content, output_file):
    new_iob = new_label + prev_label
    line_content[-1] = new_iob
    current_line = ' '.join(line_content)
    output_file.write(current_line + '\n')


def not_same_tag(tag1, tag2):
    return tag1.split("-")[-1] != tag2.split("-")[-1]


def convert(input_file, output_path):
    output_file = open(output_path, 'w', encoding="utf8")

    for i in range(len(input_file)):

        try:
            current_line = input_file[i]

            if '-DOCSTART-' in current_line:
                output_file.write(current_line + "\n")
            elif len(current_line.strip()) == 0:
                output_file.write(current_line.strip() + "\n")
                output_file.flush()
            else:
                output_file.flush()
                if i == 280:
                    print(current_line)
                prev_iob = ""
                next_iob = ""
                prev_line = None
                next_line = None

                try:
                    prev_line = input_file[i - 1]
                    next_line = input_file[i + 1]

                    if len(prev_line.strip()) > 0:
                        prev_line_content = prev_line.split()
                        prev_iob = prev_line_content[-1]

                    if len(next_line.strip()) > 0:
                        next_line_content = next_line.split()
                        next_iob = next_line_content[-1]

                except IndexError:
                    pass

                current_line_content = current_line.strip().split()
                current_iob = current_line_content[-1]

                # Outside entities
                if current_iob == 'O':
                    output_file.write(current_line + "\n")

                # Unit length entities
                elif current_iob.startswith("B-") and \
                        (next_iob == 'O' or len(next_line.strip()) == 0 or next_iob.startswith("B-")):
                    write_line('S-', current_iob[2:], current_line_content, output_file)

                # First element of chunk
                elif current_iob.startswith("B-") and \
                        (not not_same_tag(current_iob, next_iob) and next_iob.startswith("I-")):
                    write_line('B-', current_iob[2:], current_line_content, output_file)

                # Last element of chunk
                elif current_iob.startswith("I-") and \
                        (next_iob == 'O' or len(next_line.strip()) == 0 or next_iob.startswith("B-")):
                    write_line('E-', current_iob[2:], current_line_content, output_file)

                # Inside a chunk
                elif current_iob.startswith("I-") and \
                        next_iob.startswith("I-"):
                    write_line('I-', current_iob[2:], current_line_content, output_file)

        except IndexError:
            pass

In [2]:
import torch
from torch import nn


class CRF(nn.Module):
    def __init__(self, label_size):
        '''label_size = real size + 2, included START and END '''
        super(CRF, self).__init__()

        self.label_size = label_size
        self.start = self.label_size - 2
        self.end = self.label_size - 1
        transition = torch.randn(self.label_size, self.label_size)
        self.transition = nn.Parameter(transition)
        self.initialize()

    def initialize(self):
        nn.init.uniform_(self.transition.data, -0.1, 0.1)
        self.transition.data[:, self.end] = -1000.0
        self.transition.data[self.start, :] = -1000.0

    def pad_logits(self, logits):
        batch_size, seq_len, label_num = logits.size()
        pads = logits.new_full((batch_size, seq_len, 2), -1000.0,
                               requires_grad=False)
        logits = torch.cat([logits, pads], dim=2)
        return logits

    def calc_binary_score(self, labels, predict_mask):
        '''
        Gold transition score
        :param labels: [batch_size, seq_len] LongTensor
        :param predict_mask: [batch_size, seq_len] LongTensor
        :return: [batch_size] FloatTensor
        '''
        batch_size, seq_len = labels.size()

        labels_ext = labels.new_empty((batch_size, seq_len + 2))
        labels_ext[:, 0] = self.start
        labels_ext[:, 1:-1] = labels
        labels_ext[:, -1] = self.end
        pad = predict_mask.new_ones([batch_size, 1], requires_grad=False)
        pad_stop = labels.new_full([batch_size, 1], self.end, requires_grad=False)
        mask = torch.cat([pad, predict_mask, pad], dim=-1).long()
        labels = (1 - mask) * pad_stop + mask * labels_ext

        trn = self.transition
        trn_exp = trn.unsqueeze(0).expand(batch_size, *trn.size())
        lbl_r = labels[:, 1:]
        lbl_rexp = lbl_r.unsqueeze(-1).expand(*lbl_r.size(), trn.size(0))
        trn_row = torch.gather(trn_exp, 1, lbl_rexp)

        lbl_lexp = labels[:, :-1].unsqueeze(-1)
        trn_scr = torch.gather(trn_row, 2, lbl_lexp)
        trn_scr = trn_scr.squeeze(-1)

        mask = torch.cat([pad, predict_mask], dim=-1).float()
        trn_scr = trn_scr * mask
        score = trn_scr

        return score

    def calc_unary_score(self, logits, labels, predict_mask):
        '''
        Gold logits score
        :param logits: [batch_size, seq_len, n_labels] FloatTensor
        :param labels: [batch_size, seq_len] LongTensor
        :param predict_mask: [batch_size, seq_len] LongTensor
        :return: [batch_size] FloatTensor
        '''
        labels_exp = labels.unsqueeze(-1)
        scores = torch.gather(logits, 2, labels_exp).squeeze(-1)
        scores = scores * predict_mask.float()
        return scores

    def calc_gold_score(self, logits, labels, predict_mask):
        '''
        Total score of gold sequence.
        :param logits: [batch_size, seq_len, n_labels] FloatTensor
        :param labels: [batch_size, seq_len] LongTensor
        :param predict_mask: [batch_size, seq_len] LongTensor
        :return: [batch_size] FloatTensor
        '''
        unary_score = self.calc_unary_score(logits, labels, predict_mask).sum(
            1).squeeze(-1)
        # print(unary_score)
        binary_score = self.calc_binary_score(labels, predict_mask).sum(1).squeeze(-1)
        # print(binary_score)
        return unary_score + binary_score

    def calc_norm_score(self, logits, predict_mask):
        '''
        Total score of all sequences.
        :param logits: [batch_size, seq_len, n_labels] FloatTensor
        :param predict_mask: [batch_size, seq_len] LongTensor
        :return: [batch_size] FloatTensor
        '''
        batch_size, seq_len, feat_dim = logits.size()

        alpha = logits.new_full((batch_size, self.label_size), -100.0)
        alpha[:, self.start] = 0

        predict_mask_ = predict_mask.clone()  # (batch_size, max_seq)

        logits_t = logits.transpose(1, 0)  # (max_seq, batch_size, num_labels + 2)
        predict_mask_ = predict_mask_.transpose(1, 0)  # (max_seq, batch_size)
        for word_mask_, logit in zip(predict_mask_, logits_t):
            logit_exp = logit.unsqueeze(-1).expand(batch_size,
                                                   *self.transition.size())
            alpha_exp = alpha.unsqueeze(1).expand(batch_size,
                                                  *self.transition.size())
            trans_exp = self.transition.unsqueeze(0).expand_as(alpha_exp)
            mat = logit_exp + alpha_exp + trans_exp
            alpha_nxt = log_sum_exp(mat, 2).squeeze(-1)

            mask = word_mask_.float().unsqueeze(-1).expand_as(alpha)  # (batch_size, num_labels+2)
            alpha = mask * alpha_nxt + (1 - mask) * alpha

        alpha = alpha + self.transition[self.end].unsqueeze(0).expand_as(alpha)
        norm = log_sum_exp(alpha, 1).squeeze(-1)

        return norm

    def viterbi_decode(self, logits, predict_mask):
        """
        :param logits: [batch_size, seq_len, n_labels] FloatTensor
        :param predict_mask: [batch_size, seq_len] LongTensor
        :return scores: [batch_size] FloatTensor
        :return paths: [batch_size, seq_len] LongTensor
        """
        batch_size, seq_len, n_labels = logits.size()
        vit = logits.new_full((batch_size, self.label_size), -100.0)
        vit[:, self.start] = 0
        predict_mask_ = predict_mask.clone()  # (batch_size, max_seq)
        predict_mask_ = predict_mask_.transpose(1, 0)  # (max_seq, batch_size)
        logits_t = logits.transpose(1, 0)
        pointers = []
        for ix, logit in enumerate(logits_t):
            vit_exp = vit.unsqueeze(1).expand(batch_size, n_labels, n_labels)
            trn_exp = self.transition.unsqueeze(0).expand_as(vit_exp)
            vit_trn_sum = vit_exp + trn_exp
            vt_max, vt_argmax = vit_trn_sum.max(2)

            vt_max = vt_max.squeeze(-1)
            vit_nxt = vt_max + logit
            pointers.append(vt_argmax.squeeze(-1).unsqueeze(0))

            mask = predict_mask_[ix].float().unsqueeze(-1).expand_as(vit_nxt)
            vit = mask * vit_nxt + (1 - mask) * vit

            mask = (predict_mask_[ix:].sum(0) == 1).float().unsqueeze(-1).expand_as(vit_nxt)
            vit += mask * self.transition[self.end].unsqueeze(
                0).expand_as(vit_nxt)

        pointers = torch.cat(pointers)
        scores, idx = vit.max(1)
        paths = [idx.unsqueeze(1)]
        for argmax in reversed(pointers):
            idx_exp = idx.unsqueeze(-1)
            idx = torch.gather(argmax, 1, idx_exp)
            idx = idx.squeeze(-1)

            paths.insert(0, idx.unsqueeze(1))

        paths = torch.cat(paths[1:], 1)
        scores = scores.squeeze(-1)

        return scores, paths


def log_sum_exp(tensor, dim=0):
    """LogSumExp operation."""
    m, _ = torch.max(tensor, dim)
    m_exp = m.unsqueeze(-1).expand_as(tensor)
    return m + torch.log(torch.sum(torch.exp(tensor - m_exp), dim))


def test():
    torch.manual_seed(2)
    logits = torch.tensor([[[1.2, 2.1], [2.8, 2.1], [2.2, -2.1]], [[4.1, 2.2], [2.8, 2.1], [2.2, -2.1]]])  # 2, 3, 2
    predict_mask = torch.tensor([[1, 1, 0], [1, 0, 0]])  # 2, 3
    labels = torch.tensor([[1, 0, 0], [0, 1, 1]])  # 2, 3

    crf = CRF(4)
    logits = crf.pad_logits(logits)
    norm_score = crf.calc_norm_score(logits, predict_mask)
    print(norm_score)
    gold_score = crf.calc_gold_score(logits, labels, predict_mask)
    print(gold_score)
    loglik = gold_score - norm_score
    print(loglik)
    print(crf.viterbi_decode(logits, predict_mask))

# test()

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math

class CharCNN(nn.Module):
    def __init__(self, num_filters, kernel_sizes, padding):
        super(CharCNN, self).__init__()
        self.conv = nn.Conv2d(1, num_filters, kernel_sizes, padding=padding)

    def forward(self, x):
        '''
        :param x: (batch * seq_len, 1, max_word_len, char_embed_size)
        :return: (batch * seq_len, num_filters)
        '''
        x = self.conv(x).squeeze(-1)  # (batch * seq_len, num_filters, max_word_len)
        x_max = F.max_pool1d(x, x.size(2)).squeeze(-1)  # (batch * seq_len, num_filters)
        return x_max

class BiLSTM(nn.Module):
    def __init__(self, input_size, hidden_size=128, dropout_rate=0.1, layer_num=1):
        super(BiLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.layer_num = layer_num
        if layer_num == 1:
            self.bilstm = nn.LSTM(input_size, hidden_size // 2, layer_num, batch_first=True, bidirectional=True)

        else:
            self.bilstm = nn.LSTM(input_size, hidden_size // 2, layer_num, batch_first=True, dropout=dropout_rate,
                                  bidirectional=True)
        self.init_weights()

    def init_weights(self):
        for name, p in self.bilstm._parameters.items():
            if p.dim() > 1:
                bias = math.sqrt(6 / (p.size(0) / 4 + p.size(1)))
                nn.init.uniform_(p, -bias, bias)
            else:
                p.data.zero_()
                # This is the range of indices for our forget gates for each LSTM cell
                p.data[self.hidden_size // 2: self.hidden_size] = 1

    def forward(self, x, lens):
        '''
        :param x: (batch, seq_len, input_size)
        :param lens: (batch, )
        :return: (batch, seq_len, hidden_size)
        '''
        ordered_lens, index = lens.sort(descending=True)
        ordered_x = x[index]
        packed_x = nn.utils.rnn.pack_padded_sequence(ordered_x, ordered_lens, batch_first=True)
        packed_output, _ = self.bilstm(packed_x)
        output, _ = nn.utils.rnn.pad_packed_sequence(packed_output, batch_first=True)
        recover_index = index.argsort()
        output = output[recover_index]
        return output


class SoftmaxDecoder(nn.Module):
    def __init__(self, label_size, input_dim):
        super(SoftmaxDecoder, self).__init__()
        self.input_dim = input_dim
        self.label_size = label_size
        self.linear = torch.nn.Linear(input_dim, label_size)
        self.init_weights()

    def init_weights(self):
        bias = math.sqrt(6 / (self.linear.weight.size(0) + self.linear.weight.size(1)))
        nn.init.uniform_(self.linear.weight, -bias, bias)

    def forward_model(self, inputs):
        batch_size, seq_len, input_dim = inputs.size()
        output = inputs.contiguous().view(-1, self.input_dim)
        output = self.linear(output)
        output = output.view(batch_size, seq_len, self.label_size)
        return output

    def forward(self, inputs, lens, label_ids=None):
        logits = self.forward_model(inputs)
        p = torch.nn.functional.softmax(logits, -1)  # (batch_size, max_seq_len, num_labels)
        predict_mask = (torch.arange(inputs.size(1)).expand(len(lens), inputs.size(1))).to(lens.device) < lens.unsqueeze(1)
        if label_ids is not None:
            # cross entropy loss
            p = torch.nn.functional.softmax(logits, -1)  # (batch_size, max_seq_len, num_labels)
            one_hot_labels = torch.eye(self.label_size)[label_ids].type_as(p)
            losses = -torch.log(torch.sum(one_hot_labels * p, -1))  # (batch_size, max_seq_len)
            masked_losses = torch.masked_select(losses, predict_mask)  # (batch_sum_real_len)
            return masked_losses.sum()
        else:
            return torch.argmax(logits, -1), p

class CRFDecoder(nn.Module):
    def __init__(self, label_size, input_dim):
        super(CRFDecoder, self).__init__()
        self.input_dim = input_dim
        self.linear = nn.Linear(in_features=input_dim,
                                out_features=label_size)
        self.crf = CRF(label_size + 2)
        self.label_size = label_size

        self.init_weights()

    def init_weights(self):
        bias = math.sqrt(6 / (self.linear.weight.size(0) + self.linear.weight.size(1)))
        nn.init.uniform_(self.linear.weight, -bias, bias)

    def forward_model(self, inputs):
        batch_size, seq_len, input_dim = inputs.size()
        output = inputs.contiguous().view(-1, self.input_dim)
        output = self.linear(output)
        output = output.view(batch_size, seq_len, self.label_size)
        return output

    def forward(self, inputs, lens, labels=None):
        '''
        :param inputs:(batch_size, max_seq_len, input_dim)
        :param predict_mask:(batch_size, max_seq_len)
        :param labels:(batch_size, max_seq_len)
        :return: if labels is None, return preds(batch_size, max_seq_len) and p(batch_size, max_seq_len, num_labels);
                 else return loss (scalar).
        '''
        logits = self.forward_model(inputs)  # (batch_size, max_seq_len, num_labels)
        p = torch.nn.functional.softmax(logits, -1)  # (batch_size, max_seq_len, num_labels)
        logits = self.crf.pad_logits(logits)
        predict_mask = (torch.arange(inputs.size(1)).expand(len(lens), inputs.size(1))).to(lens.device) < lens.unsqueeze(1)
        if labels is None:
            _, preds = self.crf.viterbi_decode(logits, predict_mask)
            return preds, p
        return self.neg_log_likehood(logits, predict_mask, labels)

    def neg_log_likehood(self, logits, predict_mask, labels):
        norm_score = self.crf.calc_norm_score(logits, predict_mask)
        gold_score = self.crf.calc_gold_score(logits, labels, predict_mask)
        loglik = gold_score - norm_score
        return -loglik.sum()


class NER_Model(nn.Module):
    def __init__(self, word_embed, char_embed,
                 num_labels, hidden_size, dropout_rate=(0.33, 0.5, (0.33, 0.5)),
                 lstm_layer_num=1, kernel_step=3, char_out_size=100, use_char=False,
                 freeze=False, use_crf=True):
        super(NER_Model, self).__init__()
        self.word_embed = nn.Embedding.from_pretrained(word_embed, freeze)
        self.word_embed_size = word_embed.size(-1)
        self.use_char = use_char
        if use_char:
            self.char_embed = nn.Embedding.from_pretrained(char_embed, freeze)
            self.char_embed_size = char_embed.size(-1)
            self.charcnn = CharCNN(char_out_size, (kernel_step, self.char_embed_size), (2, 0))
            self.bilstm = BiLSTM(char_out_size + self.word_embed_size, hidden_size, dropout_rate[2][1], lstm_layer_num)
        else:
            self.bilstm = BiLSTM(self.word_embed_size, hidden_size, dropout_rate[2][1], lstm_layer_num)

        self.embed_dropout = nn.Dropout(dropout_rate[0])
        self.out_dropout = nn.Dropout(dropout_rate[1])
        self.rnn_in_dropout = nn.Dropout(dropout_rate[2][0])

        if use_crf:
            self.decoder = CRFDecoder(num_labels, hidden_size)
        else:
            self.decoder = SoftmaxDecoder(num_labels, hidden_size)


    def forward(self, word_ids, char_ids, lens, label_ids=None):
        '''

        :param word_ids: (batch_size, max_seq_len)
        :param char_ids: (batch_size, max_seq_len, max_word_len)
        :param predict_mask: (batch_size, max_seq_len)
        :param label_ids: (batch_size, max_seq_len, max_word_len)
        :return: if labels is None, return preds(batch_size, max_seq_len) and p(batch_size, max_seq_len, num_labels);
                 else return loss (scalar).
        '''
        word_embed = self.word_embed(word_ids)
        if self.char_embed:
            # reshape char_embed and apply to CNN
            char_embed = self.char_embed(char_ids).reshape(-1, char_ids.size(-1), self.char_embed_size).unsqueeze(1)
            char_embed = self.embed_dropout(
                char_embed)  # a dropout layer applied before character embeddings are input to CNN.
            char_embed = self.charcnn(char_embed)
            char_embed = char_embed.reshape(char_ids.size(0), char_ids.size(1), -1)
            embed = torch.cat([word_embed, char_embed], -1)
        else:
            embed = word_embed
        x = self.rnn_in_dropout(embed)
        hidden = self.bilstm(x, lens)  # (batch_size, max_seq_len, hidden_size)
        hidden = self.out_dropout(hidden)
        return self.decoder(hidden, lens, label_ids)


In [4]:
from torchtext import data
from torchtext.data import Iterator, BucketIterator
import os
import re
import math
import torch
import numpy as np


def read_data(input_file):
    """Reads a BIO data."""
    with open(input_file) as f:
        lines = []
        words = []
        labels = []
        for line in f:
            contends = line.strip()
            # if contends.startswith("-DOCSTART-"):
            #     continue
            if len(contends) == 0:
                if len(words) == 0:
                    continue
                lines.append([words, [list(word) for word in words], labels])
                words = []
                labels = []
                continue
            tokens = line.strip().split(' ')
            assert (len(tokens) == 4)
            word = tokens[0]
            label = tokens[-1]
            words.append(word)
            labels.append(label)
        return lines


class ConllDataset(data.Dataset):

    def __init__(self, word_field, char_field, label_field, datafile, **kwargs):
        fields = [("word", word_field), ("char", char_field), ("label", label_field)]
        datas = read_data(datafile)
        examples = []
        for word, char, label in datas:
            examples.append(data.Example.fromlist([word, char, label], fields))
        super(ConllDataset, self).__init__(examples, fields, **kwargs)


def unk_init(x):
    dim = x.size(-1)
    bias = math.sqrt(3.0 / dim)
    x.uniform_(-bias, bias)
    return x


def get_char_detail(train, other, embed_vocab=None):
    char2type = {}  # type, 1:ootv, 2:ooev, 3:oobv, 4:iv
    ootv = 0
    ootv_set = set()
    ooev = 0
    oobv = 0
    iv = 0
    fuzzy_iv = 0
    for sent in other:
        for w in sent:
            for c in w:
                if c not in char2type:
                    if c not in train.stoi:
                        if embed_vocab and (c in embed_vocab.stoi or c.lower() in embed_vocab.stoi):
                            ootv += 1
                            ootv_set.add(c)
                            char2type[c] = 1
                        else:
                            oobv += 1
                            char2type[c] = 3
                    else:
                        if embed_vocab and (c in embed_vocab.stoi or c.lower() in embed_vocab.stoi):
                            fuzzy_iv += 1 if c.lower() in embed_vocab.stoi else 0
                            iv += 1
                            char2type[c] = 4
                        else:
                            ooev += 1
                            char2type[c] = 2
    print("IV {}(fuzzy {})\nOOTV {}\nOOEV {}\nOOBV {}\n".format(iv, fuzzy_iv, ootv, ooev, oobv))
    return char2type, ootv_set


def get_word_detail(train, other, embed_vocab=None):
    '''
    OOTV words are the ones do not appear in training set but in embedding vocabulary
    OOEV words are the ones do not appear in embedding vocabulary but in training set
    OOBV words are the ones do not appears in both the training and embedding vocabulary
    IV words the ones appears in both the training and embedding vocabulary
    '''
    word2type = {}  # type, 1:ootv, 2:ooev, 3:oobv, 4:iv
    ootv = 0
    ootv_set = set()
    ooev = 0
    oobv = 0
    iv = 0
    fuzzy_iv = 0
    for sent in other:
        for w in sent:
            if w not in word2type:
                if w not in train.stoi:
                    if embed_vocab and (w in embed_vocab.stoi or w.lower() in embed_vocab.stoi):
                        ootv += 1
                        ootv_set.add(w)
                        word2type[w] = 1
                    else:
                        oobv += 1
                        word2type[w] = 3
                else:
                    if embed_vocab and (w in embed_vocab.stoi or w.lower() in embed_vocab.stoi):
                        fuzzy_iv += 1 if w not in embed_vocab.stoi else 0
                        iv += 1
                        word2type[w] = 4
                    else:
                        ooev += 1
                        word2type[w] = 2
    print("IV {}(fuzzy {})\nOOTV {}\nOOEV {}\nOOBV {}\n".format(iv, fuzzy_iv, ootv, ooev, oobv))
    return word2type, ootv_set


def get_entity_detail(vocab, data, tag2id, embed_vocab=None):
    entity2type = {}  # type, 1:ootv, 2:ooev, 3:oobv, 4:iv
    ootv = 0  # every word of the entity have embedding, but at least one word not in training set
    oobv = 0  # an entity is considered as OOBV if there exists at least one word not in training set and at least one word not in embedding vocabulary
    ooev = 0  # every word of the entity is in training set, but at least one word not have embedding
    iv = 0
    for ex in data.examples:
        ens = get_chunks(ex.label, tag2id, id_format=False)
        for e in ens:
            if e not in entity2type:
                entity_words = [ex.word[ix] for ix in range(e[1], e[2])]
                entity_word = ' '.join(entity_words)
                not_in_vocab = len(list(filter(lambda w: w not in vocab.stoi, entity_words)))
                if embed_vocab:
                    not_in_embed = len(list(
                        filter(lambda w: w not in embed_vocab.stoi and w.lower() not in embed_vocab.stoi,
                               entity_words)))
                if not_in_vocab > 0:
                    if embed_vocab and not_in_embed == 0:
                        ootv += 1
                        entity2type[entity_word] = 1
                    else:
                        oobv += 1
                        entity2type[entity_word] = 3
                else:
                    if embed_vocab and not_in_embed == 0:
                        iv += 1
                        entity2type[entity_word] = 4
                    else:
                        ooev += 1
                        entity2type[entity_word] = 2

    print("IV {}\nOOTV {}\nOOEV {}\nOOBV {}\n".format(iv, ootv, ooev, oobv))
    return entity2type


def extend(vocab, v, sort=False):
    words = sorted(v) if sort else v
    for w in words:
        if w not in vocab.stoi:
            vocab.itos.append(w)
            vocab.stoi[w] = len(vocab.itos) - 1


def get_entities(vocab, data, tag2id):
    entities = {}
    unk = 0
    conflict = 0
    for ex in data.examples:
        ens = get_chunks(ex.label, tag2id, id_format=False)
        for e in ens:
            entity_words = [ex.word[ix] if ex.word[ix] in vocab.stoi else vocab.UNK for ix in range(e[1], e[2])]
            entities.setdefault(' '.join(entity_words), set())
            entities[' '.join(entity_words)].add(e[0])
            if vocab.UNK in entity_words:
                unk += 1
            if len(entities[' '.join(entity_words)]) == 2:
                conflict += 1
    print("entities contains `UNK` %d\nconflict entities %d\nall entities: %d\n" % (unk, conflict, len(entities)))
    return entities


def load_iters(word_embed_size, word_vectors, char_embedding_size, char_vectors, batch_size=32, device="cpu",
               data_path='data', word2lower=True):
    zero_char_in_word = lambda ex: [re.sub('\d', '0', w) for w in ex]
    zero_char = lambda w: [re.sub('\d', '0', c) for c in w]

    WORD_TEXT = data.Field(lower=word2lower, batch_first=True, include_lengths=True,
                           preprocessing=zero_char_in_word)
    CHAR_NESTING = data.Field(tokenize=list, preprocessing=zero_char)  # process a word in char list
    CHAR_TEXT = data.NestedField(CHAR_NESTING)
    LABEL = data.Field(unk_token=None, pad_token="O", batch_first=True)

    train_data = ConllDataset(WORD_TEXT, CHAR_TEXT, LABEL, os.path.join(data_path, "train.txt"))
    dev_data = ConllDataset(WORD_TEXT, CHAR_TEXT, LABEL, os.path.join(data_path, "dev.txt"))
    test_data = ConllDataset(WORD_TEXT, CHAR_TEXT, LABEL, os.path.join(data_path, "test.txt"))

    print("train sentence num / total word num: %d/%d" % (
        len(train_data.examples), np.array([len(_.word) for _ in train_data.examples]).sum()))
    print("dev sentence num / total word num: %d/%d" % (
        len(dev_data.examples), np.array([len(_.word) for _ in dev_data.examples]).sum()))
    print("test sentence num / total word num: %d/%d" % (
        len(test_data.examples), np.array([len(_.word) for _ in test_data.examples]).sum()))

    LABEL.build_vocab(train_data.label)
    WORD_TEXT.build_vocab(train_data.word, max_size=50000, min_freq=1)
    CHAR_TEXT.build_vocab(train_data.char, max_size=50000, min_freq=1)

    # ------------------- word oov analysis-----------------------
    print('*' * 50 + ' unique words details of dev set ' + '*' * 50)
    dev_word2type, dev_ootv_set = get_word_detail(WORD_TEXT.vocab, dev_data.word, word_vectors)
    print('#' * 110)
    print('*' * 50 + ' unique words details of test set ' + '*' * 50)
    test_word2type, test_ootv_set = get_word_detail(WORD_TEXT.vocab, test_data.word, word_vectors)
    print('#' * 110)
    WORD_TEXT.vocab.dev_word2type = dev_word2type
    WORD_TEXT.vocab.test_word2type = test_word2type

    # ------------------- entity oov analysis-----------------------
    print('*' * 50 + ' get train entities ' + '*' * 50)
    train_entities = get_entities(WORD_TEXT.vocab, train_data, LABEL.vocab.stoi)
    print('#' * 110)
    print('*' * 50 + ' get dev entities ' + '*' * 50)
    dev_entity2type = get_entity_detail(WORD_TEXT.vocab, dev_data, LABEL.vocab.stoi, word_vectors)
    print('#' * 110)
    print('*' * 50 + ' get test entities ' + '*' * 50)
    test_entity2type = get_entity_detail(WORD_TEXT.vocab, test_data, LABEL.vocab.stoi, word_vectors)
    print('#' * 110)
    WORD_TEXT.vocab.dev_entity2type = dev_entity2type
    WORD_TEXT.vocab.test_entity2type = test_entity2type

    # ------------------- extend word vocab with ootv words -----------------------
    print('*' * 50 + 'extending ootv words to vocab' + '*' * 50)
    ootv = list(dev_ootv_set.union(test_ootv_set))
    extend(WORD_TEXT.vocab, ootv)
    print('extended %d words' % len(ootv))
    print('#' * 110)

    # ------------------- generate word embedding -----------------------
    vectors_to_use = unk_init(torch.zeros((len(WORD_TEXT.vocab), word_embed_size)))
    if word_vectors is not None:
        vectors_to_use = get_vectors(vectors_to_use, WORD_TEXT.vocab, word_vectors)
    WORD_TEXT.vocab.vectors = vectors_to_use

    # ------------------- char oov analysis-----------------------
    print('*' * 50 + ' unique chars details of dev set ' + '*' * 50)
    dev_char2type, dev_ootv_set = get_char_detail(CHAR_TEXT.vocab, dev_data.char, char_vectors)
    print('#' * 110)
    print('*' * 50 + ' unique chars details of test set ' + '*' * 50)
    test_char2type, test_ootv_set = get_char_detail(CHAR_TEXT.vocab, test_data.char, char_vectors)
    print('#' * 110)
    CHAR_TEXT.vocab.dev_char2type = dev_char2type
    CHAR_TEXT.vocab.test_char2type = test_char2type

    # ------------------- extend char vocab with ootv chars -----------------------
    print('*' * 50 + 'extending ootv chars to vocab' + '*' * 50)
    ootv = list(dev_ootv_set.union(test_ootv_set))
    extend(CHAR_TEXT.vocab, ootv)
    print('extended %d chars' % len(ootv))
    print('#' * 110)

    # ------------------- generate char embedding -----------------------
    vectors_to_use = unk_init(torch.zeros((len(CHAR_TEXT.vocab), char_embedding_size)))
    if char_vectors is not None:
        vectors_to_use = get_vectors(vectors_to_use, CHAR_TEXT.vocab, char_vectors)
    CHAR_TEXT.vocab.vectors = vectors_to_use

    print("word vocab size: ", len(WORD_TEXT.vocab))
    print("char vocab size: ", len(CHAR_TEXT.vocab))
    print("label vocab size: ", len(LABEL.vocab))

    train_iter = BucketIterator(train_data, batch_size=batch_size, device=device, sort_key=lambda x: len(x.word),
                                sort_within_batch=True, repeat=False, shuffle=True)
    dev_iter = Iterator(dev_data, batch_size=batch_size, device=device, sort=False, sort_within_batch=False,
                        repeat=False, shuffle=False)
    test_iter = Iterator(test_data, batch_size=batch_size, device=device, sort=False, sort_within_batch=False,
                         repeat=False, shuffle=False)
    return train_iter, dev_iter, test_iter, WORD_TEXT, CHAR_TEXT, LABEL


def get_chunk_type(tok, idx_to_tag):
    """
    The function takes in a chunk ("B-PER") and then splits it into the tag (PER) and its class (B)
    as defined in BIOES

    Args:
        tok: id of token, ex 4
        idx_to_tag: dictionary {4: "B-PER", ...}

    Returns:
        tuple: "B", "PER"

    """

    tag_name = idx_to_tag[tok]
    tag_class = tag_name.split('-')[0]
    tag_type = tag_name.split('-')[-1]
    return tag_class, tag_type


def get_chunks(seq, tags, bioes=True, id_format=True):
    """
    Given a sequence of tags, group entities and their position
    """
    if not id_format:
        seq = [tags[_] for _ in seq]

    # We assume by default the tags lie outside a named entity
    default = tags["O"]

    idx_to_tag = {idx: tag for tag, idx in tags.items()}

    chunks = []

    chunk_class, chunk_type, chunk_start = None, None, None
    for i, tok in enumerate(seq):
        if tok == default and (chunk_class in (["E", "S"] if bioes else ["B", "I"])):
            # Add a chunk.
            chunk = (chunk_type, chunk_start, i)
            chunks.append(chunk)
            chunk_class, chunk_type, chunk_start = "O", None, None

        if tok != default:
            tok_chunk_class, tok_chunk_type = get_chunk_type(tok, idx_to_tag)
            if chunk_type is None:
                # Initialize chunk for each entity
                chunk_class, chunk_type, chunk_start = tok_chunk_class, tok_chunk_type, i
            else:
                if bioes:
                    if chunk_class in ["E", "S"]:
                        chunk = (chunk_type, chunk_start, i)
                        chunks.append(chunk)
                        if tok_chunk_class in ["B", "S"]:
                            chunk_class, chunk_type, chunk_start = tok_chunk_class, tok_chunk_type, i
                        else:
                            chunk_class, chunk_type, chunk_start = None, None, None
                    elif tok_chunk_type == chunk_type and chunk_class in ["B", "I"]:
                        chunk_class = tok_chunk_class
                    else:
                        chunk_class, chunk_type = None, None
                else:  # BIO schema
                    if tok_chunk_class == "B":
                        chunk = (chunk_type, chunk_start, i)
                        chunks.append(chunk)
                        chunk_class, chunk_type, chunk_start = tok_chunk_class, tok_chunk_type, i
                    else:
                        chunk_class, chunk_type = None, None

    if chunk_type is not None:
        chunk = (chunk_type, chunk_start, len(seq))
        chunks.append(chunk)

    return chunks


def get_vectors(embed, vocab, pretrain_embed_vocab):
    oov = 0
    for i, word in enumerate(vocab.itos):
        index = pretrain_embed_vocab.stoi.get(word, None)  # digit or None
        if index is None:
            if word.lower() in pretrain_embed_vocab.stoi:
                index = pretrain_embed_vocab.stoi[word.lower()]
        if index:
            embed[i] = pretrain_embed_vocab.vectors[index]
        else:
            oov += 1
    print('train vocab oov %d \ntrain vocab + dev ootv + test ootv: %d' % (oov, len(vocab.stoi)))
    return embed


def test_get_chunks():
    print(get_chunks([4, 2, 1, 2, 3, 3],
                     {'O': 0, "B-PER": 1, "I-PER": 2, "E-PER": 3, "S-PER": 4}))
    print(get_chunks(["S-PER", "I-PER", "B-PER", "I-PER", "E-PER", "E-PER"],
                     {'O': 0, "B-PER": 1, "I-PER": 2, "E-PER": 3, "S-PER": 4}, id_format=False))

In [5]:
# -*- coding:utf8 -*-
import torch
import torch.optim as optim
from tqdm import tqdm
from torchtext.vocab import Vectors
import codecs

torch.manual_seed(1)
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

DATA_PATH = "out_data"
PREDICT_OUT_FILE = "res.txt"
BEST_MODEL = "best_model.ckpt"
BATCH_SIZE = 32
LOWER_CASE = False
EPOCHS = 20

# embedding
WORD_VECTORS = None
# WORD_VECTORS = Vectors('glove.6B.100d.txt', '../../embeddings/glove.6B')
WORD_EMBEDDING_SIZE = 100
CHAR_VECTORS = None
CHAR_EMBEDDING_SIZE = 30  # the input char embedding to CNN
FREEZE_EMBEDDING = False

# SGD parameters
LEARNING_RATE = 0.015
DECAY_RATE = 0.05
MOMENTUM = 0.9
CLIP = 5
PATIENCE = 5

# network parameters
HIDDEN_SIZE = 400  # every LSTM's(forward and backward) hidden size is half of HIDDEN_SIZE
LSTM_LAYER_NUM = 1
DROPOUT_RATE = (0.5, 0.5, (0.5, 0.5))  # after embed layer, other case, (input to rnn, between rnn layers)
USE_CHAR = True  # use char level information
N_FILTERS = 30  # the output char embedding from CNN
KERNEL_STEP = 3  # n-gram size of CNN
USE_CRF = True


def train(train_iter, dev_iter, optimizer):
    best_dev_f1 = -1
    patience_counter = 0
    for epoch in range(1, EPOCHS + 1):
        model.train()
        total_loss = 0
        train_iter.init_epoch()
        for i, batch in enumerate(tqdm(train_iter)):
            words, lens = batch.word
            labels = batch.label
            if i < 2:
                tqdm.write(' '.join([WORD.vocab.itos[i] for i in words[0]]))
                tqdm.write(' '.join([LABEL.vocab.itos[i] for i in labels[0]]))
            model.zero_grad()
            loss = model(words, batch.char, lens, labels)
            total_loss += loss.item()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), CLIP)
            optimizer.step()
        tqdm.write("Epoch: %d, Train Loss: %d" % (epoch, total_loss))

        lr = LEARNING_RATE / (1 + DECAY_RATE * epoch)
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr

        dev_f1 = eval(dev_iter, "Dev", epoch)
        if dev_f1 < best_dev_f1:
            patience_counter += 1
            tqdm.write("No improvement, patience: %d/%d" % (patience_counter, PATIENCE))
        else:
            best_dev_f1 = dev_f1
            patience_counter = 0
            torch.save(model.state_dict(), BEST_MODEL)
            tqdm.write("New best model, saved to best_model.ckpt, patience: 0/%d" % PATIENCE)
        if patience_counter >= PATIENCE:
            tqdm.write("Early stopping: patience limit reached, stopping...")
            break


def eval(data_iter, name, epoch=None, best_model=None):
    if best_model:
        model.load_state_dict(torch.load(best_model))
    model.eval()
    with torch.no_grad():
        total_loss = 0
        res = {'ootv': [0, 0, 0], 'ooev': [0, 0, 0], 'oobv': [0, 0, 0], 'iv': [0, 0, 0],
               'total': [0, 0, 0]}  # e.g. 'iv':[correct_preds, total_preds, total_correct]
        for i, batch in enumerate(data_iter):
            words, lens = batch.word
            labels = batch.label
            predicted_seq, _ = model(words, batch.char, lens)  # predicted_seq : (batch_size, seq_len)
            loss = model(words, batch.char, lens, labels)
            total_loss += loss.item()

            orig_text = [e.word for e in data_iter.dataset.examples[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]]
            for text, ground_truth_id, predicted_id, len_ in zip(orig_text, labels.cpu().numpy(),
                                                                 predicted_seq.cpu().numpy(),
                                                                 lens.cpu().numpy()):
                lab_chunks = set(get_chunks(ground_truth_id[:len_], LABEL.vocab.stoi))
                lab_pred_chunks = set(get_chunks(predicted_id[:len_], LABEL.vocab.stoi))

                for chunk in list(lab_chunks):
                    entity_word = ' '.join([text[ix] for ix in range(chunk[1], chunk[2])])
                    # type, 1:ootv, 2:ooev, 3:oobv, 4:iv
                    entity_type = WORD.vocab.dev_entity2type[entity_word] if name == "Dev" else \
                    WORD.vocab.test_entity2type[entity_word]
                    if entity_type == 1:
                        if chunk in lab_pred_chunks:
                            res['ootv'][0] += 1
                        res['ootv'][2] += 1
                    elif entity_type == 2:
                        if chunk in lab_pred_chunks:
                            res['ooev'][0] += 1
                        res['ooev'][2] += 1
                    elif entity_type == 3:
                        if chunk in lab_pred_chunks:
                            res['oobv'][0] += 1
                        res['oobv'][2] += 1
                    else:
                        if chunk in lab_pred_chunks:
                            res['iv'][0] += 1
                        res['iv'][2] += 1
                    if chunk in lab_pred_chunks:
                        res['total'][0] += 1
                    res['total'][2] += 1
                for chunk in list(lab_pred_chunks):
                    entity_word = ' '.join([text[ix] for ix in range(chunk[1], chunk[2])])
                    # type, 1:ootv, 2:ooev, 3:oobv, 4:iv
                    entity_type = WORD.vocab.dev_entity2type.get(entity_word, None) if name == "Dev" else \
                        WORD.vocab.test_entity2type.get(entity_word, None)
                    if entity_type == 1:
                        res['ootv'][1] += 1
                    elif entity_type == 2:
                        res['ooev'][1] += 1
                    elif entity_type == 3:
                        res['oobv'][1] += 1
                    elif entity_type == 4:
                        res['iv'][1] += 1
                    res['total'][1] += 1

        # Calculating the F1-Score
        for k, v in res.items():
            p = v[0] / v[1] if v[1] != 0 else 0
            r = v[0] / v[2] if v[2] != 0 else 0
            micro_F1 = 2 * p * r / (p + r) if (p + r) != 0 else 0
            if epoch is not None:
                tqdm.write(
                    "Epoch: %d, %s, %s Entity Micro F1: %.3f, Loss %.3f" % (epoch, name, k, micro_F1, total_loss))
            else:
                tqdm.write(
                    "%s, %s Entity Micro F1: %.3f, Loss %.3f" % (name, k, micro_F1, total_loss))
    return micro_F1


def predict(data_iter, out_file):
    model.eval()
    with torch.no_grad():
        gold_seqs = []
        predicted_seqs = []
        word_seqs = []
        for i, batch in enumerate(data_iter):
            words, lens = batch.word
            predicted_seq, _ = model(words, batch.char, lens)  # predicted_seq : (batch_size, seq_len)
            gold_seqs.extend(batch.label.tolist())
            predicted_seqs.extend(predicted_seq.tolist())
            word_seqs.extend(words.tolist())
        write_predicted_labels(out_file, data_iter.dataset.examples, word_seqs, LABEL.vocab.itos, gold_seqs,
                               predicted_seqs)


def write_predicted_labels(output_file, orig_text, word_ids, id2label, gold_seq, predicted_seq):
    with codecs.open(output_file, 'w', encoding='utf-8') as writer:
        for text, wids, predict, gold in zip(orig_text, word_ids, predicted_seq, gold_seq):
            ix = 0
            for w_id, p_id, g_id in zip(wids, predict, gold):
                if w_id == pad_idx: break
                output_line = ' '.join([text.word[ix], id2label[g_id], id2label[p_id]])
                writer.write(output_line + '\n')
                ix += 1
            writer.write('\n')


In [6]:
train_data = read_file('./data/train.txt')
convert(train_data, './out_data/train.txt')
dev_data = read_file('./data/dev.txt')
convert(dev_data, './out_data/dev.txt')
test_data = read_file('./data/test.txt')
convert(test_data, './out_data/test.txt')

France NNP I-NP S-LOC
critics NNS I-NP O
managed VBD B-VP O


In [7]:
train_iter, dev_iter, test_iter, WORD, CHAR, LABEL = load_iters(WORD_EMBEDDING_SIZE, WORD_VECTORS,
                                                                    CHAR_EMBEDDING_SIZE, CHAR_VECTORS,
                                                                    BATCH_SIZE, DEVICE, DATA_PATH, LOWER_CASE)

model = NER_Model(WORD.vocab.vectors, CHAR.vocab.vectors, len(LABEL.vocab.stoi), HIDDEN_SIZE, DROPOUT_RATE,
                  LSTM_LAYER_NUM,
                  KERNEL_STEP, N_FILTERS, USE_CHAR, FREEZE_EMBEDDING, USE_CRF).to(DEVICE)
print(model)
pad_idx = WORD.vocab.stoi[WORD.pad_token]

optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM)
train(train_iter, dev_iter, optimizer)
eval(test_iter, "Test", best_model=BEST_MODEL)
predict(test_iter, PREDICT_OUT_FILE)

train sentence num / total word num: 14737/172400
dev sentence num / total word num: 3412/43461
test sentence num / total word num: 3646/38944
************************************************** unique words details of dev set **************************************************
IV 0(fuzzy 0)
OOTV 0
OOEV 4334
OOBV 1565

##############################################################################################################
************************************************** unique words details of test set **************************************************
IV 0(fuzzy 0)
OOTV 0
OOEV 3874
OOBV 1513

##############################################################################################################
************************************************** get train entities **************************************************
entities contains `UNK` 0
conflict entities 2
all entities: 1042

###############################################################################################

                                       

-DOCSTART-


                                       

O


                                               

The firm has signed an agreement with the Polish ( ) to manage the issue and plans them to be listed on the bourse 's bond market .


                                               

O O O O O O O O B-ORG O O O O O O O O O O O O O O O O O O O


100%|██████████| 461/461 [00:25<00:00, 18.31it/s]


Epoch: 1, Train Loss: 17524
Epoch: 1, Dev, ootv Entity Micro F1: 0.000, Loss 2662.724
Epoch: 1, Dev, ooev Entity Micro F1: 0.000, Loss 2662.724
Epoch: 1, Dev, oobv Entity Micro F1: 0.000, Loss 2662.724
Epoch: 1, Dev, iv Entity Micro F1: 0.000, Loss 2662.724
Epoch: 1, Dev, total Entity Micro F1: 0.000, Loss 2662.724
New best model, saved to best_model.ckpt, patience: 0/5


                                       

00 00 00 00 , 00 00 00 00


                                       

O O O O O O O O O


                                       

Our not signing a new treaty does not mean we are going in for any new kind of weapons , particularly nuclear . "


                                       

O O O O O O O O O O O O O O O O O O O O O O O O


100%|██████████| 461/461 [00:24<00:00, 18.75it/s]


Epoch: 2, Train Loss: 10294
Epoch: 2, Dev, ootv Entity Micro F1: 0.000, Loss 3001.220
Epoch: 2, Dev, ooev Entity Micro F1: 0.179, Loss 3001.220
Epoch: 2, Dev, oobv Entity Micro F1: 0.012, Loss 3001.220
Epoch: 2, Dev, iv Entity Micro F1: 0.000, Loss 3001.220
Epoch: 2, Dev, total Entity Micro F1: 0.088, Loss 3001.220
New best model, saved to best_model.ckpt, patience: 0/5


                                       

Diplomats fear that the crisis could cast doubt over the entire election process , which already appears set to confirm 's ethnic partition rather than its reintegration as the peace agreement had planned .


                                       

O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O


                                               

division three - v .


                                               

O O O O O


100%|██████████| 461/461 [00:24<00:00, 18.67it/s]


Epoch: 3, Train Loss: 6184
Epoch: 3, Dev, ootv Entity Micro F1: 0.000, Loss 1639.482
Epoch: 3, Dev, ooev Entity Micro F1: 0.494, Loss 1639.482
Epoch: 3, Dev, oobv Entity Micro F1: 0.135, Loss 1639.482
Epoch: 3, Dev, iv Entity Micro F1: 0.000, Loss 1639.482
Epoch: 3, Dev, total Entity Micro F1: 0.304, Loss 1639.482
New best model, saved to best_model.ckpt, patience: 0/5


                                       

police were seeking an escaped gunman on Tuesday after an incident in which he fired at a 00-year-old schoolgirl , Radio said .


                                       

O O O O O O O O O O O O O O O O O O O O B-ORG O O


                                               

The spot riyal against the dollar and riyal interbank deposit rates were mainly steady this week in quiet summer trade , dealers in the kingdom said .


                                               

O O O O O O O O O O O O O O O O O O O O O O O O O O O


100%|██████████| 461/461 [00:24<00:00, 18.65it/s]


Epoch: 4, Train Loss: 4434
Epoch: 4, Dev, ootv Entity Micro F1: 0.000, Loss 1141.729
Epoch: 4, Dev, ooev Entity Micro F1: 0.599, Loss 1141.729
Epoch: 4, Dev, oobv Entity Micro F1: 0.340, Loss 1141.729
Epoch: 4, Dev, iv Entity Micro F1: 0.000, Loss 1141.729
Epoch: 4, Dev, total Entity Micro F1: 0.447, Loss 1141.729
New best model, saved to best_model.ckpt, patience: 0/5


                                       

Leading results from round


                                       

O O O O


                                       

" As we have said publicly before , if there is any effort to do so ( hold local elections ) these elections will not be valid , " he said . "


                                       

O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O


100%|██████████| 461/461 [00:24<00:00, 18.74it/s]


Epoch: 5, Train Loss: 3496
Epoch: 5, Dev, ootv Entity Micro F1: 0.000, Loss 837.771
Epoch: 5, Dev, ooev Entity Micro F1: 0.723, Loss 837.771
Epoch: 5, Dev, oobv Entity Micro F1: 0.524, Loss 837.771
Epoch: 5, Dev, iv Entity Micro F1: 0.000, Loss 837.771
Epoch: 5, Dev, total Entity Micro F1: 0.578, Loss 837.771
New best model, saved to best_model.ckpt, patience: 0/5


                                       

-DOCSTART-


                                       

O


                                       

, 0000-00-00


                                       

O O


100%|██████████| 461/461 [00:24<00:00, 18.58it/s]


Epoch: 6, Train Loss: 2868
Epoch: 6, Dev, ootv Entity Micro F1: 0.000, Loss 800.112
Epoch: 6, Dev, ooev Entity Micro F1: 0.753, Loss 800.112
Epoch: 6, Dev, oobv Entity Micro F1: 0.586, Loss 800.112
Epoch: 6, Dev, iv Entity Micro F1: 0.000, Loss 800.112
Epoch: 6, Dev, total Entity Micro F1: 0.613, Loss 800.112
New best model, saved to best_model.ckpt, patience: 0/5


                                       

goals for , goals against , points ) .


                                       

O O O O O O O O O


                                       

0. ( ) 00.00


                                       

O O O O


100%|██████████| 461/461 [00:24<00:00, 15.33it/s]


Epoch: 7, Train Loss: 2439
Epoch: 7, Dev, ootv Entity Micro F1: 0.000, Loss 877.575
Epoch: 7, Dev, ooev Entity Micro F1: 0.746, Loss 877.575
Epoch: 7, Dev, oobv Entity Micro F1: 0.523, Loss 877.575
Epoch: 7, Dev, iv Entity Micro F1: 0.000, Loss 877.575
Epoch: 7, Dev, total Entity Micro F1: 0.602, Loss 877.575
No improvement, patience: 1/5


                                       

0000-00-00


                                       

O


                                       

0 0


                                       

O O


100%|██████████| 461/461 [00:25<00:00, 18.24it/s]


Epoch: 8, Train Loss: 2020
Epoch: 8, Dev, ootv Entity Micro F1: 0.000, Loss 774.694
Epoch: 8, Dev, ooev Entity Micro F1: 0.798, Loss 774.694
Epoch: 8, Dev, oobv Entity Micro F1: 0.548, Loss 774.694
Epoch: 8, Dev, iv Entity Micro F1: 0.000, Loss 774.694
Epoch: 8, Dev, total Entity Micro F1: 0.625, Loss 774.694
New best model, saved to best_model.ckpt, patience: 0/5


                                       

, who has been coaching the provincial team , takes over from who took to 00th place in the in .


                                       

O O O O O O O O O O O O O O O O O O O O O


                                               

Close of play scores on the first


                                               

O O O O O O O


100%|██████████| 461/461 [00:24<00:00, 22.74it/s]


Epoch: 9, Train Loss: 1793
Epoch: 9, Dev, ootv Entity Micro F1: 0.000, Loss 810.717
Epoch: 9, Dev, ooev Entity Micro F1: 0.795, Loss 810.717
Epoch: 9, Dev, oobv Entity Micro F1: 0.568, Loss 810.717
Epoch: 9, Dev, iv Entity Micro F1: 0.000, Loss 810.717
Epoch: 9, Dev, total Entity Micro F1: 0.639, Loss 810.717
New best model, saved to best_model.ckpt, patience: 0/5


                                       

00 00 0 0 000 000 00


                                       

O O O O O O O


                                       

The world 's largest dredger said in March that it was uncertain whether it could hold 0000 full-year profit steady at the previous year 's 00.0 million guilders , but added long-term prospects were good .


                                       

O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O


100%|██████████| 461/461 [00:24<00:00, 19.85it/s]


Epoch: 10, Train Loss: 1538
Epoch: 10, Dev, ootv Entity Micro F1: 0.000, Loss 785.073
Epoch: 10, Dev, ooev Entity Micro F1: 0.814, Loss 785.073
Epoch: 10, Dev, oobv Entity Micro F1: 0.589, Loss 785.073
Epoch: 10, Dev, iv Entity Micro F1: 0.000, Loss 785.073
Epoch: 10, Dev, total Entity Micro F1: 0.655, Loss 785.073
New best model, saved to best_model.ckpt, patience: 0/5


                                       

( ) beat ( ) 0-0 0-0 0-0


                                       

O O O O O O O O


                                       

opened their title defence with a 0-0 win over on Wednesday .


                                       

O O O O O O O O O O O O


100%|██████████| 461/461 [00:24<00:00, 18.61it/s]


Epoch: 11, Train Loss: 1443
Epoch: 11, Dev, ootv Entity Micro F1: 0.000, Loss 831.241
Epoch: 11, Dev, ooev Entity Micro F1: 0.802, Loss 831.241
Epoch: 11, Dev, oobv Entity Micro F1: 0.578, Loss 831.241
Epoch: 11, Dev, iv Entity Micro F1: 0.000, Loss 831.241
Epoch: 11, Dev, total Entity Micro F1: 0.639, Loss 831.241
No improvement, patience: 1/5


                                       

0000-00-00


                                       

O


                                       

is the 's steel-producing arm , slated for a December sale worth an estimated $ 0.0 billion .


                                       

O O O O O O O O O O O O O O O O O O


100%|██████████| 461/461 [00:24<00:00, 18.54it/s]


Epoch: 12, Train Loss: 1312
Epoch: 12, Dev, ootv Entity Micro F1: 0.000, Loss 783.935
Epoch: 12, Dev, ooev Entity Micro F1: 0.798, Loss 783.935
Epoch: 12, Dev, oobv Entity Micro F1: 0.605, Loss 783.935
Epoch: 12, Dev, iv Entity Micro F1: 0.000, Loss 783.935
Epoch: 12, Dev, total Entity Micro F1: 0.638, Loss 783.935
No improvement, patience: 2/5


                                       

- 0 through 0


                                       

O O O O


                                       

( ) vs. ( )


                                       

O O O O O


100%|██████████| 461/461 [00:24<00:00, 18.31it/s]


Epoch: 13, Train Loss: 1098
Epoch: 13, Dev, ootv Entity Micro F1: 0.000, Loss 845.494
Epoch: 13, Dev, ooev Entity Micro F1: 0.802, Loss 845.494
Epoch: 13, Dev, oobv Entity Micro F1: 0.597, Loss 845.494
Epoch: 13, Dev, iv Entity Micro F1: 0.000, Loss 845.494
Epoch: 13, Dev, total Entity Micro F1: 0.651, Loss 845.494
No improvement, patience: 3/5


                                       

" I was n't happy when I saw the draw .


                                       

O O O O O O O O O O O


                                       

goals for , goals against , points ) .


                                       

O O O O O O O O O


 82%|████████▏ | 378/461 [00:20<00:04, 18.20it/s]

KeyboardInterrupt: 