In [42]:
#imports
from datasets import load_dataset
from thai2transformers.metrics import seqeval_classification_metrics
from sklearn.metrics import f1_score
import pandas as pd
import numpy as np
from tqdm.auto import tqdm

In [49]:
#parameters
class Args:
    dataset_name_or_path = 'thainer'
    feature_col = 'tokens'
    label_col = 'ner_tags'
    metric_for_best_model = 'f1_macro'
    seed = 2020
    data_dir = '~/Downloads/LST20_Corpus'

args = Args()

In [50]:
if args.dataset_name_or_path == 'lst20':
    dataset = load_dataset(args.dataset_name_or_path,data_dir=args.data_dir)
else:
    dataset = load_dataset(args.dataset_name_or_path)
dataset

Downloading and preparing dataset thainer/thainer (download: 5.20 MiB, generated: 7.74 MiB, post-processed: Unknown size, total: 12.95 MiB) to /Users/admin/.cache/huggingface/datasets/thainer/thainer/1.3.0/e0a86672e5ad057c1093708597cdda3671a76e9b053d210a32205406726cca92...


HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…

Dataset thainer downloaded and prepared to /Users/admin/.cache/huggingface/datasets/thainer/thainer/1.3.0/e0a86672e5ad057c1093708597cdda3671a76e9b053d210a32205406726cca92. Subsequent calls will reuse this data.


DatasetDict({
    train: Dataset({
        features: ['id', 'tokens', 'pos_tags', 'ner_tags'],
        num_rows: 6348
    })
})

In [51]:
if args.dataset_name_or_path == 'thainer' and args.label_col== 'ner_tags':
    dataset = dataset.map(lambda examples: {'ner_tags': [i if i not in [13,26] else 27 for i in examples[args.label_col]]})
    train_valtest_split = dataset['train'].train_test_split(test_size=0.2, shuffle=True, seed=args.seed)
    dataset['train'] = train_valtest_split['train']
    dataset['validation'] = train_valtest_split['test']
    val_test_split = dataset['validation'].train_test_split(test_size=0.5, shuffle=True, seed=args.seed)
    dataset['validation'] = val_test_split['train']
    dataset['test'] = val_test_split['test']
    tag_labels = dataset['train'].features[args.label_col].feature.names
    tag_labels = [tag_labels[i] for i in range(len(tag_labels)) if i not in [13,26]]
elif args.dataset_name_or_path == 'thainer' and args.label_col== 'pos_tags':
    train_valtest_split = dataset['train'].train_test_split(test_size=0.2, shuffle=True, seed=args.seed)
    dataset['train'] = train_valtest_split['train']
    dataset['validation'] = train_valtest_split['test']
    val_test_split = dataset['validation'].train_test_split(test_size=0.5, shuffle=True, seed=args.seed)
    dataset['validation'] = val_test_split['train']
    dataset['test'] = val_test_split['test']
    tag_labels = dataset['train'].features[args.label_col].feature.names
else:
    tag_labels = dataset['train'].features[args.label_col].feature.names
dataset

HBox(children=(FloatProgress(value=0.0, max=6348.0), HTML(value='')))




DatasetDict({
    train: Dataset({
        features: ['id', 'ner_tags', 'pos_tags', 'tokens'],
        num_rows: 5078
    })
    validation: Dataset({
        features: ['id', 'ner_tags', 'pos_tags', 'tokens'],
        num_rows: 635
    })
    test: Dataset({
        features: ['id', 'ner_tags', 'pos_tags', 'tokens'],
        num_rows: 635
    })
})

In [52]:
if args.dataset_name_or_path == 'thainer':
    from transformers import AutoTokenizer
    mbert_tokenizer = AutoTokenizer.from_pretrained('bert-base-multilingual-cased')
    def pre_tokenize(token, space_token):
        token = token.replace(' ', space_token)
        return token
    def is_not_too_long(example,
                        max_length=510):
        tokens = sum([mbert_tokenizer.tokenize(
            pre_tokenize(token, space_token='<_>'))
                      for token in example[args.feature_col]], [])
        return len(tokens) < max_length
    dataset['test'] = dataset['test'].filter(is_not_too_long)
dataset['test']

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




Dataset({
    features: ['id', 'ner_tags', 'pos_tags', 'tokens'],
    num_rows: 621
})

In [53]:
%%time
#get sentence forms
def generate_sents(dataset, idx):
    features = dataset[idx][args.feature_col]
    labels = dataset[idx][args.label_col]
    return [(features[i], labels[i]) for i in range(len(features))]

train_sents = [generate_sents(dataset['train'],i) for i in range(len(dataset['train']))]
valid_sents = [generate_sents(dataset['validation'],i) for i in range(len(dataset['validation']))]
test_sents = [generate_sents(dataset['test'],i) for i in range(len(dataset['test']))]
len(train_sents), len(valid_sents), len(test_sents)

CPU times: user 1.17 s, sys: 30.8 ms, total: 1.2 s
Wall time: 1.23 s


(5078, 635, 621)

In [54]:
#generate x,y
def extract_features(doc, window=3, max_n_gram=3):
    #padding for words
    doc = ['xxpad' for i in range(window)] + doc + ['xxpad' for i in range(window)]            
    doc_features = []
    
    #for each word
    for i in range(window, len(doc)-window):
        #bias term
        word_features = ['bias'] 
        
        #ngram features
        for n_gram in range(1, min(max_n_gram+1,2+window*2)):
            for j in range(i-window,i+window+2-n_gram):
                feature_position = f'{n_gram}_{j-i}_{j-i+n_gram}'
                
                #word
                word_ = f'{"|".join(doc[j:(j+n_gram)])}'
                word_features += [f'word_{feature_position}={word_}']
        
        #append to feature per word
        doc_features.append(word_features)
    return doc_features

def generate_xy(all_tuples):
    #target
    y = [[str(l) for (w,l) in t] for t in all_tuples]
    #features
    x_pre = [[w for (w,l) in t] for t in all_tuples]
    x = [extract_features(x_, window=2, max_n_gram = 2) for x_ in tqdm(x_pre)]
    return x, y


x_train, y_train = generate_xy(train_sents)
if args.dataset_name_or_path=='lst20':
    import random
    random.seed(args.seed)
    x_train_small = random.sample(x_train,10000)
    random.seed(args.seed)
    y_train_small =  random.sample(y_train,10000)
else:
    x_train_small = x_train
    y_train_small = y_train
x_valid, y_valid = generate_xy(valid_sents)
x_test, y_test = generate_xy(test_sents)

HBox(children=(FloatProgress(value=0.0, max=5078.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=635.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=621.0), HTML(value='')))




In [55]:
import pycrfsuite
from sklearn.metrics import classification_report

def train_crf(model_name, c1, c2, x_train, y_train, max_iterations=500):
    # Train model
    trainer = pycrfsuite.Trainer(verbose=True)

    for xseq, yseq in tqdm(zip(x_train, y_train)):
        trainer.append(xseq, yseq)

    trainer.set_params({
        'c1': c1,
        'c2': c2,
        'max_iterations': max_iterations,
        'feature.possible_transitions': True,
        'feature.minfreq': 3.0,
    })

    trainer.train(f'{model_name}_{c1}_{c2}.model')

class Pred:
    def __init__(self,label_ids,predictions):
        self.label_ids = label_ids
        self.predictions = predictions
        
def evaluate_crf(model_path, features, labels, tag_labels):
    tagger = pycrfsuite.Tagger()
    tagger.open(model_path)
    y_pred = []
    for xseq in tqdm(features, total=len(features)): y_pred.append(tagger.tag(xseq))
    idx2tag = {}
    if args.dataset_name_or_path == 'thainer' and args.label_col=='ner_tags':
        idx = [str(i) for i in range(13)] + [str(i) for i in range(14,26)]+ ['27']
    else:
        idx = [str(i) for i in range(len(tag_labels))]
    for i in range(len(idx)):
        idx2tag[idx[i]] = tag_labels[i]
    label_ids = []
    predictions = []
    for s in labels: label_ids.append([idx2tag[i] for i in s])
    for s in y_pred: predictions.append([idx2tag[i] for i in s])
    pred = Pred(label_ids, predictions)
    return seqeval_classification_metrics(pred)

In [122]:
hyperparams = []
for c1 in tqdm([0.,0.5,1.]):
    for c2 in tqdm([0.,0.5,1.]):
        train_crf(args.dataset_name_or_path,c1,c2,x_train_small,y_train_small)
        res = evaluate_crf(f'{args.dataset_name_or_path}_{c1}_{c2}.model',
                                                  x_valid, y_valid, tag_labels)
        res['c1'], res['c2'] = c1, c2
        print(res['classification_report'])
        hyperparams.append(res)

HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))

HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…


Feature generation
type: CRF1d
feature.minfreq: 3.000000
feature.possible_states: 0
feature.possible_transitions: 1
0....1....2....3....4....5....6....7....8....9....10
Number of features: 129129
Seconds required: 4.896

L-BFGS optimization
c1: 0.000000
c2: 0.000000
num_memories: 6
max_iterations: 500
epsilon: 0.000010
stop: 10
delta: 0.000010
linesearch: MoreThuente
linesearch.max_iterations: 20

***** Iteration #1 *****
Loss: 579570.646983
Feature norm: 5.000000
Error norm: 123054.508418
Active features: 129129
Line search trials: 2
Line search step: 0.000011
Seconds required for this iteration: 3.591

***** Iteration #2 *****
Loss: 395921.452896
Feature norm: 3.511936
Error norm: 92701.019940
Active features: 129129
Line search trials: 3
Line search step: 1.541237
Seconds required for this iteration: 3.957

***** Iteration #3 *****
Loss: 364498.566376
Feature norm: 3.196999
Error norm: 52473.324824
Active features: 129129
Line search trials: 2
Line search step: 0.077037
Seconds req

***** Iteration #44 *****
Loss: 67519.490771
Feature norm: 68.150625
Error norm: 5892.477282
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.061

***** Iteration #45 *****
Loss: 66658.531029
Feature norm: 70.753459
Error norm: 7479.034673
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.028

***** Iteration #46 *****
Loss: 66228.997756
Feature norm: 70.444923
Error norm: 3926.702859
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.077

***** Iteration #47 *****
Loss: 65651.324525
Feature norm: 71.037748
Error norm: 2610.999465
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.047

***** Iteration #48 *****
Loss: 64967.958477
Feature norm: 72.736415
Error norm: 4194.450912
Active features: 129129
Line search trials: 1
Line search step: 1.0

***** Iteration #84 *****
Loss: 44650.249926
Feature norm: 126.025337
Error norm: 1620.965297
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.114

***** Iteration #85 *****
Loss: 44291.844201
Feature norm: 127.126328
Error norm: 3292.597919
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.088

***** Iteration #86 *****
Loss: 43573.867887
Feature norm: 128.088032
Error norm: 1573.621267
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.074

***** Iteration #87 *****
Loss: 43120.702492
Feature norm: 128.622932
Error norm: 1663.027574
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.107

***** Iteration #88 *****
Loss: 42594.095164
Feature norm: 130.151361
Error norm: 5402.022837
Active features: 129129
Line search trials: 1
Line search step

***** Iteration #124 *****
Loss: 29986.109492
Feature norm: 171.056139
Error norm: 1029.633940
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.023

***** Iteration #125 *****
Loss: 29535.829973
Feature norm: 173.683294
Error norm: 1829.151198
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.032

***** Iteration #126 *****
Loss: 29241.047501
Feature norm: 176.239940
Error norm: 1333.483454
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.038

***** Iteration #127 *****
Loss: 29037.260263
Feature norm: 175.682779
Error norm: 685.448287
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.058

***** Iteration #128 *****
Loss: 28876.450028
Feature norm: 175.702001
Error norm: 1123.920839
Active features: 129129
Line search trials: 1
Line search 

***** Iteration #167 *****
Loss: 23428.055339
Feature norm: 215.441955
Error norm: 1644.893700
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.124

***** Iteration #168 *****
Loss: 23331.863512
Feature norm: 218.852349
Error norm: 1134.818874
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.068

***** Iteration #169 *****
Loss: 23242.931216
Feature norm: 218.559569
Error norm: 644.568023
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.088

***** Iteration #170 *****
Loss: 23159.450478
Feature norm: 219.194917
Error norm: 668.350270
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.059

***** Iteration #171 *****
Loss: 23065.141289
Feature norm: 220.861415
Error norm: 1144.255433
Active features: 129129
Line search trials: 1
Line search s

***** Iteration #207 *****
Loss: 18984.137724
Feature norm: 288.021219
Error norm: 2669.461749
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.053

***** Iteration #208 *****
Loss: 18778.259838
Feature norm: 289.855538
Error norm: 668.291021
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.175

***** Iteration #209 *****
Loss: 18722.007264
Feature norm: 289.219354
Error norm: 529.038858
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.117

***** Iteration #210 *****
Loss: 18649.264088
Feature norm: 291.266402
Error norm: 1244.184413
Active features: 129129
Line search trials: 2
Line search step: 0.491091
Seconds required for this iteration: 2.179

***** Iteration #211 *****
Loss: 18562.291034
Feature norm: 293.324291
Error norm: 640.028205
Active features: 129129
Line search trials: 1
Line search st

***** Iteration #246 *****
Loss: 15534.429999
Feature norm: 367.918547
Error norm: 321.065481
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.050

***** Iteration #247 *****
Loss: 15426.005234
Feature norm: 370.125909
Error norm: 398.221677
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.046

***** Iteration #248 *****
Loss: 15372.863780
Feature norm: 371.950668
Error norm: 484.710206
Active features: 129129
Line search trials: 2
Line search step: 0.449021
Seconds required for this iteration: 2.117

***** Iteration #249 *****
Loss: 15313.456840
Feature norm: 373.358540
Error norm: 658.461628
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.024

***** Iteration #250 *****
Loss: 15268.746356
Feature norm: 374.235867
Error norm: 331.085926
Active features: 129129
Line search trials: 1
Line search step

***** Iteration #287 *****
Loss: 13822.863437
Feature norm: 412.749270
Error norm: 699.174294
Active features: 129129
Line search trials: 2
Line search step: 0.138235
Seconds required for this iteration: 2.202

***** Iteration #288 *****
Loss: 13778.687670
Feature norm: 413.502168
Error norm: 412.512373
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.072

***** Iteration #289 *****
Loss: 13747.026977
Feature norm: 414.530763
Error norm: 292.807699
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.082

***** Iteration #290 *****
Loss: 13725.306545
Feature norm: 415.376569
Error norm: 273.747439
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.063

***** Iteration #291 *****
Loss: 13706.498891
Feature norm: 416.153956
Error norm: 551.416630
Active features: 129129
Line search trials: 1
Line search step

***** Iteration #331 *****
Loss: 12708.955418
Feature norm: 444.331298
Error norm: 236.322280
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.023

***** Iteration #332 *****
Loss: 12690.217792
Feature norm: 445.162142
Error norm: 300.145821
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.035

***** Iteration #333 *****
Loss: 12662.966191
Feature norm: 446.463913
Error norm: 702.387110
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.074

***** Iteration #334 *****
Loss: 12622.489677
Feature norm: 448.410984
Error norm: 408.469306
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.039

***** Iteration #335 *****
Loss: 12590.945153
Feature norm: 449.416478
Error norm: 227.637156
Active features: 129129
Line search trials: 1
Line search step

***** Iteration #375 *****
Loss: 11789.551510
Feature norm: 479.719242
Error norm: 394.243324
Active features: 129129
Line search trials: 2
Line search step: 0.410964
Seconds required for this iteration: 2.164

***** Iteration #376 *****
Loss: 11757.339066
Feature norm: 481.285017
Error norm: 195.595587
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.094

***** Iteration #377 *****
Loss: 11738.308155
Feature norm: 482.465992
Error norm: 285.818519
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.081

***** Iteration #378 *****
Loss: 11728.618926
Feature norm: 482.925849
Error norm: 349.127947
Active features: 129129
Line search trials: 2
Line search step: 0.438495
Seconds required for this iteration: 2.229

***** Iteration #379 *****
Loss: 11719.407783
Feature norm: 483.096098
Error norm: 274.263520
Active features: 129129
Line search trials: 1
Line search step

***** Iteration #420 *****
Loss: 10943.459717
Feature norm: 531.075532
Error norm: 235.149848
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.045

***** Iteration #421 *****
Loss: 10904.763080
Feature norm: 532.240531
Error norm: 347.908777
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.032

***** Iteration #422 *****
Loss: 10886.890776
Feature norm: 535.301459
Error norm: 360.610682
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.050

***** Iteration #423 *****
Loss: 10868.961268
Feature norm: 535.367511
Error norm: 165.787520
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.042

***** Iteration #424 *****
Loss: 10858.520785
Feature norm: 535.709531
Error norm: 149.358302
Active features: 129129
Line search trials: 1
Line search step

***** Iteration #461 *****
Loss: 10383.389910
Feature norm: 562.368099
Error norm: 219.810132
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.064

***** Iteration #462 *****
Loss: 10370.446945
Feature norm: 563.409629
Error norm: 174.207467
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.020

***** Iteration #463 *****
Loss: 10365.183953
Feature norm: 564.511359
Error norm: 322.729237
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.038

***** Iteration #464 *****
Loss: 10353.074645
Feature norm: 564.244728
Error norm: 138.181447
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.063

***** Iteration #465 *****
Loss: 10346.797229
Feature norm: 564.323094
Error norm: 221.908862
Active features: 129129
Line search trials: 1
Line search step

Storing the model
Number of active features: 129129 (129129)
Number of active attributes: 111484 (588372)
Number of active labels: 31 (31)
Writing labels
Writing attributes
Writing feature references for transitions
Writing feature references for attributes
Seconds required: 0.989



HBox(children=(FloatProgress(value=0.0, max=6094.0), HTML(value='')))






              precision    recall  f1-score   support

        _BRN     0.2381    0.1111    0.1515        45
        _DES     0.8490    0.8055    0.8267      1815
        _DTM     0.6923    0.6699    0.6809      1948
        _LOC     0.6993    0.6408    0.6688      4254
        _MEA     0.6040    0.5305    0.5649      3097
        _NUM     0.5580    0.5511    0.5545      1292
        _ORG     0.5865    0.5684    0.5773      4544
        _PER     0.8019    0.7785    0.7900      4118
        _TRM     0.4143    0.3580    0.3841       162
        _TTL     0.9552    0.9529    0.9541      2016

   micro avg     0.7076    0.6707    0.6887     23291
   macro avg     0.6399    0.5967    0.6153     23291
weighted avg     0.7053    0.6707    0.6873     23291



HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…


Feature generation
type: CRF1d
feature.minfreq: 3.000000
feature.possible_states: 0
feature.possible_transitions: 1
0....1....2....3....4....5....6....7....8....9....10
Number of features: 129129
Seconds required: 4.410

L-BFGS optimization
c1: 0.000000
c2: 0.500000
num_memories: 6
max_iterations: 500
epsilon: 0.000010
stop: 10
delta: 0.000010
linesearch: MoreThuente
linesearch.max_iterations: 20

***** Iteration #1 *****
Loss: 579583.146983
Feature norm: 5.000000
Error norm: 123059.164305
Active features: 129129
Line search trials: 2
Line search step: 0.000011
Seconds required for this iteration: 3.313

***** Iteration #2 *****
Loss: 395926.022412
Feature norm: 3.511906
Error norm: 92702.293755
Active features: 129129
Line search trials: 3
Line search step: 1.541209
Seconds required for this iteration: 3.126

***** Iteration #3 *****
Loss: 364504.519368
Feature norm: 3.196980
Error norm: 52474.525453
Active features: 129129
Line search trials: 2
Line search step: 0.077040
Seconds req

***** Iteration #41 *****
Loss: 79689.576526
Feature norm: 52.058097
Error norm: 9014.981015
Active features: 129129
Line search trials: 2
Line search step: 0.153507
Seconds required for this iteration: 2.088

***** Iteration #42 *****
Loss: 78791.089886
Feature norm: 52.386841
Error norm: 4778.577416
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.060

***** Iteration #43 *****
Loss: 78132.952553
Feature norm: 52.664397
Error norm: 3473.150521
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 0.998

***** Iteration #44 *****
Loss: 77505.724585
Feature norm: 52.895794
Error norm: 4710.891523
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.029

***** Iteration #45 *****
Loss: 76188.855684
Feature norm: 53.635539
Error norm: 6898.540627
Active features: 129129
Line search trials: 1
Line search step: 1.0

***** Iteration #83 *****
Loss: 48719.769424
Feature norm: 101.530733
Error norm: 1452.621227
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.042

***** Iteration #84 *****
Loss: 48423.492137
Feature norm: 102.607610
Error norm: 1364.699743
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.000

***** Iteration #85 *****
Loss: 48091.097094
Feature norm: 103.972249
Error norm: 1304.583348
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.042

***** Iteration #86 *****
Loss: 47988.514414
Feature norm: 105.069134
Error norm: 3366.948023
Active features: 129129
Line search trials: 2
Line search step: 0.226090
Seconds required for this iteration: 2.094

***** Iteration #87 *****
Loss: 47758.651159
Feature norm: 106.116541
Error norm: 2350.650442
Active features: 129129
Line search trials: 1
Line search step

***** Iteration #130 *****
Loss: 40668.811754
Feature norm: 133.886223
Error norm: 930.043029
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.039

***** Iteration #131 *****
Loss: 40610.787732
Feature norm: 134.011234
Error norm: 1079.919038
Active features: 129129
Line search trials: 2
Line search step: 0.168540
Seconds required for this iteration: 2.073

***** Iteration #132 *****
Loss: 40524.465939
Feature norm: 134.164515
Error norm: 700.779070
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.065

***** Iteration #133 *****
Loss: 40422.089720
Feature norm: 134.061625
Error norm: 648.393520
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.038

***** Iteration #134 *****
Loss: 40263.606564
Feature norm: 133.740770
Error norm: 740.970695
Active features: 129129
Line search trials: 1
Line search ste

***** Iteration #171 *****
Loss: 38126.518728
Feature norm: 134.210064
Error norm: 412.721679
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.050

***** Iteration #172 *****
Loss: 38088.672204
Feature norm: 134.272380
Error norm: 421.705185
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.048

***** Iteration #173 *****
Loss: 38035.311032
Feature norm: 134.361601
Error norm: 443.807059
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.052

***** Iteration #174 *****
Loss: 38012.637864
Feature norm: 134.441226
Error norm: 756.695915
Active features: 129129
Line search trials: 2
Line search step: 0.295959
Seconds required for this iteration: 2.142

***** Iteration #175 *****
Loss: 37981.113530
Feature norm: 134.463474
Error norm: 453.926617
Active features: 129129
Line search trials: 1
Line search step

***** Iteration #210 *****
Loss: 37083.811211
Feature norm: 138.101839
Error norm: 205.238058
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.037

***** Iteration #211 *****
Loss: 37076.032604
Feature norm: 138.140179
Error norm: 691.722945
Active features: 129129
Line search trials: 2
Line search step: 0.492215
Seconds required for this iteration: 2.074

***** Iteration #212 *****
Loss: 37063.746107
Feature norm: 138.172471
Error norm: 344.012097
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.009

***** Iteration #213 *****
Loss: 37053.900610
Feature norm: 138.225337
Error norm: 313.848429
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.057

***** Iteration #214 *****
Loss: 37044.579546
Feature norm: 138.286772
Error norm: 328.921117
Active features: 129129
Line search trials: 1
Line search step

***** Iteration #252 *****
Loss: 36773.734271
Feature norm: 140.818933
Error norm: 371.234948
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.020

***** Iteration #253 *****
Loss: 36770.198830
Feature norm: 140.888872
Error norm: 194.102477
Active features: 129129
Line search trials: 2
Line search step: 0.515542
Seconds required for this iteration: 2.075

***** Iteration #254 *****
Loss: 36766.557925
Feature norm: 140.971916
Error norm: 174.751243
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.038

***** Iteration #255 *****
Loss: 36759.246995
Feature norm: 141.174639
Error norm: 210.326294
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.094

***** Iteration #256 *****
Loss: 36750.098946
Feature norm: 141.358791
Error norm: 237.019164
Active features: 129129
Line search trials: 1
Line search step

***** Iteration #293 *****
Loss: 36607.556832
Feature norm: 144.808789
Error norm: 121.662000
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.039

***** Iteration #294 *****
Loss: 36605.724512
Feature norm: 144.777338
Error norm: 133.613332
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.021

***** Iteration #295 *****
Loss: 36603.755798
Feature norm: 144.856840
Error norm: 374.327577
Active features: 129129
Line search trials: 2
Line search step: 0.432871
Seconds required for this iteration: 2.062

***** Iteration #296 *****
Loss: 36599.557434
Feature norm: 144.874465
Error norm: 234.100482
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.102

***** Iteration #297 *****
Loss: 36596.654814
Feature norm: 144.966930
Error norm: 96.063356
Active features: 129129
Line search trials: 1
Line search step:

***** Iteration #334 *****
Loss: 36540.187566
Feature norm: 146.135055
Error norm: 60.768288
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.034

***** Iteration #335 *****
Loss: 36539.952818
Feature norm: 146.136143
Error norm: 63.994409
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.043

***** Iteration #336 *****
Loss: 36539.567156
Feature norm: 146.144729
Error norm: 89.370482
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.048

***** Iteration #337 *****
Loss: 36538.917543
Feature norm: 146.160093
Error norm: 63.875311
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.108

***** Iteration #338 *****
Loss: 36538.485124
Feature norm: 146.189772
Error norm: 126.152358
Active features: 129129
Line search trials: 2
Line search step: 0.

***** Iteration #373 *****
Loss: 36523.313794
Feature norm: 146.693089
Error norm: 53.747712
Active features: 129129
Line search trials: 2
Line search step: 0.459406
Seconds required for this iteration: 2.127

***** Iteration #374 *****
Loss: 36523.139019
Feature norm: 146.713637
Error norm: 37.290492
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.069

***** Iteration #375 *****
Loss: 36522.940812
Feature norm: 146.730373
Error norm: 34.853627
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.079

***** Iteration #376 *****
Loss: 36522.620469
Feature norm: 146.750320
Error norm: 33.369057
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.063

***** Iteration #377 *****
Loss: 36522.058826
Feature norm: 146.781645
Error norm: 44.016537
Active features: 129129
Line search trials: 1
Line search step: 1.0

***** Iteration #413 *****
Loss: 36516.049748
Feature norm: 147.189836
Error norm: 109.929363
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.058

***** Iteration #414 *****
Loss: 36515.906646
Feature norm: 147.187582
Error norm: 27.504957
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.054

***** Iteration #415 *****
Loss: 36515.864489
Feature norm: 147.195565
Error norm: 22.206594
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.058

***** Iteration #416 *****
Loss: 36515.830198
Feature norm: 147.206173
Error norm: 25.778452
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.034

***** Iteration #417 *****
Loss: 36515.708018
Feature norm: 147.225838
Error norm: 25.312940
Active features: 129129
Line search trials: 1
Line search step: 1.

***** Iteration #454 *****
Loss: 36512.451836
Feature norm: 147.512584
Error norm: 18.613588
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.090

***** Iteration #455 *****
Loss: 36512.416471
Feature norm: 147.515388
Error norm: 25.481721
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.060

***** Iteration #456 *****
Loss: 36512.359673
Feature norm: 147.521138
Error norm: 25.908962
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.065

***** Iteration #457 *****
Loss: 36512.286856
Feature norm: 147.531820
Error norm: 36.754491
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.087

***** Iteration #458 *****
Loss: 36512.235201
Feature norm: 147.532642
Error norm: 24.771422
Active features: 129129
Line search trials: 2
Line search step: 0.4

HBox(children=(FloatProgress(value=0.0, max=6094.0), HTML(value='')))


              precision    recall  f1-score   support

        _BRN     0.2000    0.0444    0.0727        45
        _DES     0.9180    0.7152    0.8040      1815
        _DTM     0.7243    0.6905    0.7070      1948
        _LOC     0.7819    0.6305    0.6981      4254
        _MEA     0.6514    0.5641    0.6046      3097
        _NUM     0.6340    0.5604    0.5949      1292
        _ORG     0.6916    0.5299    0.6000      4544
        _PER     0.8111    0.7319    0.7695      4118
        _TRM     0.4286    0.1481    0.2202       162
        _TTL     0.9802    0.9350    0.9571      2016

   micro avg     0.7675    0.6496    0.7036     23291
   macro avg     0.6821    0.5550    0.6028     23291
weighted avg     0.7632    0.6496    0.7003     23291



HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…


Feature generation
type: CRF1d
feature.minfreq: 3.000000
feature.possible_states: 0
feature.possible_transitions: 1
0....1....2....3....4....5....6....7....8....9....10
Number of features: 129129
Seconds required: 4.322

L-BFGS optimization
c1: 0.000000
c2: 1.000000
num_memories: 6
max_iterations: 500
epsilon: 0.000010
stop: 10
delta: 0.000010
linesearch: MoreThuente
linesearch.max_iterations: 20

***** Iteration #1 *****
Loss: 579595.646983
Feature norm: 5.000000
Error norm: 123063.820219
Active features: 129129
Line search trials: 2
Line search step: 0.000011
Seconds required for this iteration: 3.155

***** Iteration #2 *****
Loss: 395930.591374
Feature norm: 3.511877
Error norm: 92703.567181
Active features: 129129
Line search trials: 3
Line search step: 1.541180
Seconds required for this iteration: 3.218

***** Iteration #3 *****
Loss: 364510.471914
Feature norm: 3.196960
Error norm: 52475.725486
Active features: 129129
Line search trials: 2
Line search step: 0.077043
Seconds req

***** Iteration #45 *****
Loss: 72313.429784
Feature norm: 66.150915
Error norm: 3000.031388
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.042

***** Iteration #46 *****
Loss: 71913.272108
Feature norm: 66.482118
Error norm: 4698.399810
Active features: 129129
Line search trials: 2
Line search step: 0.248879
Seconds required for this iteration: 2.126

***** Iteration #47 *****
Loss: 71247.058058
Feature norm: 66.344515
Error norm: 3324.097796
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.018

***** Iteration #48 *****
Loss: 70250.125970
Feature norm: 66.080732
Error norm: 2340.575345
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.094

***** Iteration #49 *****
Loss: 69113.572915
Feature norm: 66.226757
Error norm: 2422.919339
Active features: 129129
Line search trials: 1
Line search step: 1.0

***** Iteration #85 *****
Loss: 52117.392476
Feature norm: 90.747362
Error norm: 1065.702923
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.070

***** Iteration #86 *****
Loss: 51949.912304
Feature norm: 91.114880
Error norm: 1864.218779
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.057

***** Iteration #87 *****
Loss: 51760.621516
Feature norm: 91.863702
Error norm: 2643.448332
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.056

***** Iteration #88 *****
Loss: 51482.093532
Feature norm: 92.794823
Error norm: 2131.269849
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.082

***** Iteration #89 *****
Loss: 51301.657163
Feature norm: 95.000052
Error norm: 2744.581091
Active features: 129129
Line search trials: 1
Line search step: 1.0

***** Iteration #125 *****
Loss: 47140.606617
Feature norm: 110.081191
Error norm: 566.033053
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.079

***** Iteration #126 *****
Loss: 47102.688987
Feature norm: 110.056350
Error norm: 626.535300
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.080

***** Iteration #127 *****
Loss: 47007.619356
Feature norm: 110.177128
Error norm: 758.086060
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.069

***** Iteration #128 *****
Loss: 46954.462379
Feature norm: 110.098786
Error norm: 1168.816436
Active features: 129129
Line search trials: 2
Line search step: 0.368682
Seconds required for this iteration: 2.132

***** Iteration #129 *****
Loss: 46891.316928
Feature norm: 110.182525
Error norm: 650.408868
Active features: 129129
Line search trials: 1
Line search ste

***** Iteration #166 *****
Loss: 45459.099194
Feature norm: 111.885513
Error norm: 607.032007
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.082

***** Iteration #167 *****
Loss: 45430.808150
Feature norm: 111.902414
Error norm: 312.304453
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.070

***** Iteration #168 *****
Loss: 45404.841979
Feature norm: 111.940389
Error norm: 336.388469
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.102

***** Iteration #169 *****
Loss: 45386.780679
Feature norm: 111.972987
Error norm: 605.973004
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.068

***** Iteration #170 *****
Loss: 45372.235668
Feature norm: 112.003654
Error norm: 1178.425920
Active features: 129129
Line search trials: 1
Line search ste

***** Iteration #214 *****
Loss: 44949.473507
Feature norm: 112.884676
Error norm: 152.255083
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.098

***** Iteration #215 *****
Loss: 44946.124826
Feature norm: 112.906988
Error norm: 154.802517
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.076

***** Iteration #216 *****
Loss: 44940.789701
Feature norm: 112.942529
Error norm: 185.942374
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.081

***** Iteration #217 *****
Loss: 44934.557441
Feature norm: 113.016387
Error norm: 300.112154
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.039

***** Iteration #218 *****
Loss: 44927.974694
Feature norm: 113.043933
Error norm: 216.007064
Active features: 129129
Line search trials: 1
Line search step

***** Iteration #257 *****
Loss: 44861.599353
Feature norm: 113.655241
Error norm: 95.044734
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.073

***** Iteration #258 *****
Loss: 44860.671713
Feature norm: 113.652601
Error norm: 73.700840
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.117

***** Iteration #259 *****
Loss: 44859.917509
Feature norm: 113.662253
Error norm: 102.875523
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.095

***** Iteration #260 *****
Loss: 44858.334594
Feature norm: 113.688703
Error norm: 126.062679
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.175

***** Iteration #261 *****
Loss: 44856.447286
Feature norm: 113.732222
Error norm: 133.928317
Active features: 129129
Line search trials: 1
Line search step: 

***** Iteration #300 *****
Loss: 44834.470096
Feature norm: 114.485175
Error norm: 70.140309
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.032

***** Iteration #301 *****
Loss: 44834.281972
Feature norm: 114.487098
Error norm: 85.606276
Active features: 129129
Line search trials: 2
Line search step: 0.391700
Seconds required for this iteration: 2.062

***** Iteration #302 *****
Loss: 44834.139440
Feature norm: 114.486276
Error norm: 53.923186
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.029

***** Iteration #303 *****
Loss: 44833.904499
Feature norm: 114.489490
Error norm: 37.717596
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.038

***** Iteration #304 *****
Loss: 44833.699835
Feature norm: 114.495892
Error norm: 49.754203
Active features: 129129
Line search trials: 1
Line search step: 1.0

***** Iteration #341 *****
Loss: 44827.526356
Feature norm: 114.700737
Error norm: 24.825041
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.067

***** Iteration #342 *****
Loss: 44827.403295
Feature norm: 114.699966
Error norm: 33.199047
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.071

***** Iteration #343 *****
Loss: 44827.281742
Feature norm: 114.698942
Error norm: 34.133478
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.048

***** Iteration #344 *****
Loss: 44827.212053
Feature norm: 114.700445
Error norm: 14.141584
Active features: 129129
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.052

***** Iteration #345 *****
Loss: 44827.176069
Feature norm: 114.702121
Error norm: 16.581605
Active features: 129129
Line search trials: 1
Line search step: 1.0

HBox(children=(FloatProgress(value=0.0, max=6094.0), HTML(value='')))


              precision    recall  f1-score   support

        _BRN     0.1429    0.0222    0.0385        45
        _DES     0.9189    0.6738    0.7775      1815
        _DTM     0.7352    0.6843    0.7089      1948
        _LOC     0.7885    0.6091    0.6873      4254
        _MEA     0.6509    0.5570    0.6003      3097
        _NUM     0.6184    0.5619    0.5888      1292
        _ORG     0.7057    0.5040    0.5880      4544
        _PER     0.8311    0.7062    0.7636      4118
        _TRM     0.4048    0.1049    0.1667       162
        _TTL     0.9813    0.9087    0.9436      2016

   micro avg     0.7743    0.6288    0.6940     23291
   macro avg     0.6778    0.5332    0.5863     23291
weighted avg     0.7706    0.6288    0.6905     23291




HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))

HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…


Feature generation
type: CRF1d
feature.minfreq: 3.000000
feature.possible_states: 0
feature.possible_transitions: 1
0....1....2....3....4....5....6....7....8....9....10
Number of features: 129129
Seconds required: 4.469

L-BFGS optimization
c1: 0.500000
c2: 0.000000
num_memories: 6
max_iterations: 500
epsilon: 0.000010
stop: 10
delta: 0.000010
linesearch: MoreThuente
linesearch.max_iterations: 20

***** Iteration #1 *****
Loss: 923486.560457
Feature norm: 1.000000
Error norm: 433866.887487
Active features: 128518
Line search trials: 1
Line search step: 0.000002
Seconds required for this iteration: 2.036

***** Iteration #2 *****
Loss: 595252.826481
Feature norm: 5.141583
Error norm: 123496.001628
Active features: 128377
Line search trials: 4
Line search step: 0.125000
Seconds required for this iteration: 4.361

***** Iteration #3 *****
Loss: 484865.391843
Feature norm: 4.242539
Error norm: 120191.905239
Active features: 123351
Line search trials: 1
Line search step: 1.000000
Seconds r

***** Iteration #47 *****
Loss: 38162.843397
Feature norm: 191.488487
Error norm: 1294.844341
Active features: 43224
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.067

***** Iteration #48 *****
Loss: 37822.846283
Feature norm: 195.379263
Error norm: 1419.984328
Active features: 41702
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.069

***** Iteration #49 *****
Loss: 37509.052081
Feature norm: 198.311147
Error norm: 1054.504396
Active features: 41306
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.068

***** Iteration #50 *****
Loss: 37162.699505
Feature norm: 201.975116
Error norm: 1796.918430
Active features: 40532
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.075

***** Iteration #51 *****
Loss: 36902.855140
Feature norm: 204.526396
Error norm: 1441.604554
Active features: 39765
Line search trials: 1
Line search step: 1.0

***** Iteration #87 *****
Loss: 34378.174710
Feature norm: 235.605267
Error norm: 1434.798263
Active features: 30681
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.428

***** Iteration #88 *****
Loss: 34345.360807
Feature norm: 235.895680
Error norm: 700.479795
Active features: 30580
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.150

***** Iteration #89 *****
Loss: 34337.921459
Feature norm: 236.279844
Error norm: 1488.542623
Active features: 30465
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.116

***** Iteration #90 *****
Loss: 34302.180106
Feature norm: 236.559152
Error norm: 627.570781
Active features: 30427
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.138

***** Iteration #91 *****
Loss: 34295.142481
Feature norm: 236.902101
Error norm: 1430.124011
Active features: 30335
Line search trials: 1
Line search step: 1.000

***** Iteration #127 *****
Loss: 33754.969148
Feature norm: 244.033238
Error norm: 1057.279094
Active features: 28189
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.067

***** Iteration #128 *****
Loss: 33735.071346
Feature norm: 244.148641
Error norm: 483.530152
Active features: 28145
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.042

***** Iteration #129 *****
Loss: 33732.320325
Feature norm: 244.288969
Error norm: 1076.974762
Active features: 28073
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.128

***** Iteration #130 *****
Loss: 33711.530635
Feature norm: 244.396089
Error norm: 399.447888
Active features: 28023
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.061

***** Iteration #131 *****
Loss: 33711.283430
Feature norm: 244.541744
Error norm: 1153.708209
Active features: 27959
Line search trials: 1
Line search step: 

***** Iteration #168 *****
Loss: 33395.999002
Feature norm: 247.030766
Error norm: 371.051791
Active features: 26405
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.060

***** Iteration #169 *****
Loss: 33394.295268
Feature norm: 247.095878
Error norm: 711.852310
Active features: 26360
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.101

***** Iteration #170 *****
Loss: 33385.208124
Feature norm: 247.122894
Error norm: 350.611899
Active features: 26311
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.130

***** Iteration #171 *****
Loss: 33383.729924
Feature norm: 247.185023
Error norm: 706.430972
Active features: 26276
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.069

***** Iteration #172 *****
Loss: 33374.878144
Feature norm: 247.212891
Error norm: 321.650109
Active features: 26235
Line search trials: 1
Line search step: 1.0

***** Iteration #211 *****
Loss: 33210.554098
Feature norm: 248.553066
Error norm: 726.450824
Active features: 25141
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.058

***** Iteration #212 *****
Loss: 33202.249744
Feature norm: 248.581916
Error norm: 376.925641
Active features: 25121
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.080

***** Iteration #213 *****
Loss: 33201.213365
Feature norm: 248.642769
Error norm: 661.045325
Active features: 25105
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.098

***** Iteration #214 *****
Loss: 33193.779091
Feature norm: 248.665259
Error norm: 304.830541
Active features: 25087
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.080

***** Iteration #215 *****
Loss: 33191.407777
Feature norm: 248.711215
Error norm: 492.685306
Active features: 25084
Line search trials: 1
Line search step: 1.0

***** Iteration #252 *****
Loss: 33092.225148
Feature norm: 249.711849
Error norm: 222.113906
Active features: 24404
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.075

***** Iteration #253 *****
Loss: 33091.797861
Feature norm: 249.739990
Error norm: 429.714406
Active features: 24392
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.042

***** Iteration #254 *****
Loss: 33088.003936
Feature norm: 249.755238
Error norm: 228.827091
Active features: 24375
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.060

***** Iteration #255 *****
Loss: 33087.463297
Feature norm: 249.782457
Error norm: 424.795088
Active features: 24369
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.053

***** Iteration #256 *****
Loss: 33083.707872
Feature norm: 249.798152
Error norm: 224.420710
Active features: 24343
Line search trials: 1
Line search step: 1.0

***** Iteration #292 *****
Loss: 33012.521338
Feature norm: 250.522213
Error norm: 243.525716
Active features: 23759
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.088

***** Iteration #293 *****
Loss: 33011.373653
Feature norm: 250.545788
Error norm: 317.768073
Active features: 23741
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.134

***** Iteration #294 *****
Loss: 33009.400788
Feature norm: 250.562923
Error norm: 253.847957
Active features: 23726
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.119

***** Iteration #295 *****
Loss: 33008.292134
Feature norm: 250.590221
Error norm: 334.978646
Active features: 23704
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.116

***** Iteration #296 *****
Loss: 33006.099151
Feature norm: 250.611185
Error norm: 228.761896
Active features: 23694
Line search trials: 1
Line search step: 1.0

***** Iteration #335 *****
Loss: 32955.777231
Feature norm: 251.429828
Error norm: 279.719285
Active features: 23132
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.106

***** Iteration #336 *****
Loss: 32954.128076
Feature norm: 251.444163
Error norm: 182.210072
Active features: 23128
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.126

***** Iteration #337 *****
Loss: 32953.551313
Feature norm: 251.466345
Error norm: 262.443710
Active features: 23133
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.090

***** Iteration #338 *****
Loss: 32952.039027
Feature norm: 251.482708
Error norm: 182.952938
Active features: 23123
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.039

***** Iteration #339 *****
Loss: 32951.581662
Feature norm: 251.505994
Error norm: 276.494356
Active features: 23117
Line search trials: 1
Line search step: 1.0

***** Iteration #380 *****
Loss: 32913.255199
Feature norm: 252.299663
Error norm: 160.284761
Active features: 22692
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.039

***** Iteration #381 *****
Loss: 32912.920376
Feature norm: 252.321360
Error norm: 246.789515
Active features: 22678
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.068

***** Iteration #382 *****
Loss: 32911.673269
Feature norm: 252.333135
Error norm: 153.766458
Active features: 22672
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.063

***** Iteration #383 *****
Loss: 32911.348846
Feature norm: 252.352390
Error norm: 240.505303
Active features: 22681
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.067

***** Iteration #384 *****
Loss: 32910.135098
Feature norm: 252.360773
Error norm: 144.168352
Active features: 22666
Line search trials: 1
Line search step: 1.0

***** Iteration #420 *****
Loss: 32885.613982
Feature norm: 252.946829
Error norm: 144.642942
Active features: 22408
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.111

***** Iteration #421 *****
Loss: 32885.398082
Feature norm: 252.967786
Error norm: 213.909882
Active features: 22406
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.102

***** Iteration #422 *****
Loss: 32884.375513
Feature norm: 252.972640
Error norm: 141.640200
Active features: 22398
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.152

***** Iteration #423 *****
Loss: 32884.152590
Feature norm: 252.993219
Error norm: 209.911826
Active features: 22391
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.054

***** Iteration #424 *****
Loss: 32883.205024
Feature norm: 252.999057
Error norm: 144.118903
Active features: 22370
Line search trials: 1
Line search step: 1.0

***** Iteration #460 *****
Loss: 32864.982012
Feature norm: 253.500620
Error norm: 111.800950
Active features: 22071
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.066

***** Iteration #461 *****
Loss: 32864.820193
Feature norm: 253.516375
Error norm: 175.090385
Active features: 22064
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.092

***** Iteration #462 *****
Loss: 32864.144549
Feature norm: 253.519890
Error norm: 125.665698
Active features: 22057
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.068

***** Iteration #463 *****
Loss: 32863.926204
Feature norm: 253.536230
Error norm: 174.564914
Active features: 22057
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.034

***** Iteration #464 *****
Loss: 32863.235610
Feature norm: 253.543315
Error norm: 118.415462
Active features: 22053
Line search trials: 1
Line search step: 1.0

HBox(children=(FloatProgress(value=0.0, max=6094.0), HTML(value='')))


              precision    recall  f1-score   support

        _BRN     0.3158    0.1333    0.1875        45
        _DES     0.9033    0.8083    0.8532      1815
        _DTM     0.7377    0.6946    0.7155      1948
        _LOC     0.7701    0.6653    0.7138      4254
        _MEA     0.6453    0.5680    0.6042      3097
        _NUM     0.6282    0.5689    0.5971      1292
        _ORG     0.6776    0.5440    0.6035      4544
        _PER     0.8165    0.7756    0.7955      4118
        _TRM     0.4375    0.1728    0.2478       162
        _TTL     0.9686    0.9499    0.9592      2016

   micro avg     0.7632    0.6766    0.7173     23291
   macro avg     0.6901    0.5881    0.6277     23291
weighted avg     0.7574    0.6766    0.7137     23291



HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…


Feature generation
type: CRF1d
feature.minfreq: 3.000000
feature.possible_states: 0
feature.possible_transitions: 1
0....1....2....3....4....5....6....7....8....9....10
Number of features: 129129
Seconds required: 4.487

L-BFGS optimization
c1: 0.500000
c2: 0.500000
num_memories: 6
max_iterations: 500
epsilon: 0.000010
stop: 10
delta: 0.000010
linesearch: MoreThuente
linesearch.max_iterations: 20

***** Iteration #1 *****
Loss: 923487.060457
Feature norm: 1.000000
Error norm: 433865.887809
Active features: 128518
Line search trials: 1
Line search step: 0.000002
Seconds required for this iteration: 2.189

***** Iteration #2 *****
Loss: 595229.561098
Feature norm: 5.141261
Error norm: 123500.379803
Active features: 128377
Line search trials: 4
Line search step: 0.125000
Seconds required for this iteration: 4.439

***** Iteration #3 *****
Loss: 484843.885297
Feature norm: 4.242254
Error norm: 120193.493113
Active features: 123352
Line search trials: 1
Line search step: 1.000000
Seconds r

***** Iteration #45 *****
Loss: 49645.338728
Feature norm: 114.135106
Error norm: 1106.345788
Active features: 48281
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.105

***** Iteration #46 *****
Loss: 49322.567079
Feature norm: 115.867906
Error norm: 1188.316723
Active features: 46632
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.058

***** Iteration #47 *****
Loss: 49064.498404
Feature norm: 116.774214
Error norm: 636.413665
Active features: 46084
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.110

***** Iteration #48 *****
Loss: 48819.834838
Feature norm: 117.999199
Error norm: 1323.779734
Active features: 45155
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.080

***** Iteration #49 *****
Loss: 48611.520284
Feature norm: 118.484853
Error norm: 1166.922196
Active features: 44885
Line search trials: 1
Line search step: 1.00

***** Iteration #86 *****
Loss: 46805.265815
Feature norm: 124.768435
Error norm: 1329.993295
Active features: 36730
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.085

***** Iteration #87 *****
Loss: 46785.521033
Feature norm: 124.800232
Error norm: 556.252092
Active features: 36676
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.098

***** Iteration #88 *****
Loss: 46781.643404
Feature norm: 124.883341
Error norm: 1256.493391
Active features: 36651
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.109

***** Iteration #89 *****
Loss: 46763.210709
Feature norm: 124.911740
Error norm: 532.552583
Active features: 36595
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.136

***** Iteration #90 *****
Loss: 46758.881990
Feature norm: 124.989156
Error norm: 1254.182283
Active features: 36580
Line search trials: 1
Line search step: 1.000

***** Iteration #133 *****
Loss: 46429.492126
Feature norm: 126.120586
Error norm: 266.166670
Active features: 35575
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.129

***** Iteration #134 *****
Loss: 46425.650496
Feature norm: 126.122679
Error norm: 558.025742
Active features: 35569
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.166

***** Iteration #135 *****
Loss: 46424.793876
Feature norm: 126.091204
Error norm: 1009.090308
Active features: 35496
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.103

***** Iteration #136 *****
Loss: 46412.926760
Feature norm: 126.117723
Error norm: 656.763494
Active features: 35496
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.061

***** Iteration #137 *****
Loss: 46407.128316
Feature norm: 126.099262
Error norm: 462.175775
Active features: 35481
Line search trials: 1
Line search step: 1.

***** Iteration #174 *****
Loss: 46298.481415
Feature norm: 126.134956
Error norm: 672.276375
Active features: 34910
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.148

***** Iteration #175 *****
Loss: 46293.713892
Feature norm: 126.130038
Error norm: 395.829888
Active features: 34902
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.087

***** Iteration #176 *****
Loss: 46291.029857
Feature norm: 126.136781
Error norm: 378.460784
Active features: 34900
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.093

***** Iteration #177 *****
Loss: 46290.068313
Feature norm: 126.131942
Error norm: 540.758288
Active features: 34880
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.073

***** Iteration #178 *****
Loss: 46286.657673
Feature norm: 126.143239
Error norm: 410.048716
Active features: 34869
Line search trials: 1
Line search step: 1.0

***** Iteration #214 *****
Loss: 46232.555786
Feature norm: 126.161955
Error norm: 371.783376
Active features: 34501
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.052

***** Iteration #215 *****
Loss: 46230.962365
Feature norm: 126.158395
Error norm: 298.505815
Active features: 34503
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.038

***** Iteration #216 *****
Loss: 46230.259789
Feature norm: 126.165677
Error norm: 380.663071
Active features: 34496
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.061

***** Iteration #217 *****
Loss: 46228.794976
Feature norm: 126.161029
Error norm: 333.384161
Active features: 34490
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.090

***** Iteration #218 *****
Loss: 46227.881488
Feature norm: 126.170168
Error norm: 370.987155
Active features: 34478
Line search trials: 1
Line search step: 1.0

***** Iteration #256 *****
Loss: 46195.790585
Feature norm: 126.221381
Error norm: 276.085536
Active features: 34169
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.045

***** Iteration #257 *****
Loss: 46195.147308
Feature norm: 126.217892
Error norm: 266.106426
Active features: 34169
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.063

***** Iteration #258 *****
Loss: 46194.543981
Feature norm: 126.223766
Error norm: 275.727900
Active features: 34163
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.045

***** Iteration #259 *****
Loss: 46193.917079
Feature norm: 126.219585
Error norm: 262.672317
Active features: 34153
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.065

***** Iteration #260 *****
Loss: 46193.326110
Feature norm: 126.225662
Error norm: 271.065965
Active features: 34139
Line search trials: 1
Line search step: 1.0

***** Iteration #300 *****
Loss: 46173.728997
Feature norm: 126.220905
Error norm: 224.643040
Active features: 33954
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.091

***** Iteration #301 *****
Loss: 46173.355841
Feature norm: 126.216076
Error norm: 239.201334
Active features: 33942
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.103

***** Iteration #302 *****
Loss: 46172.848995
Feature norm: 126.219261
Error norm: 212.400987
Active features: 33937
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.132

***** Iteration #303 *****
Loss: 46172.541209
Feature norm: 126.214567
Error norm: 242.904190
Active features: 33925
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.107

***** Iteration #304 *****
Loss: 46172.047997
Feature norm: 126.218251
Error norm: 213.111175
Active features: 33928
Line search trials: 1
Line search step: 1.0

***** Iteration #344 *****
Loss: 46158.214053
Feature norm: 126.198289
Error norm: 196.968905
Active features: 33782
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.051

***** Iteration #345 *****
Loss: 46157.986353
Feature norm: 126.194435
Error norm: 214.813196
Active features: 33780
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.077

***** Iteration #346 *****
Loss: 46157.641788
Feature norm: 126.197594
Error norm: 199.133239
Active features: 33777
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.051

***** Iteration #347 *****
Loss: 46157.384140
Feature norm: 126.193699
Error norm: 207.694810
Active features: 33779
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.115

***** Iteration #348 *****
Loss: 46157.061139
Feature norm: 126.196832
Error norm: 197.248027
Active features: 33775
Line search trials: 1
Line search step: 1.0

***** Iteration #385 *****
Loss: 46147.094515
Feature norm: 126.160337
Error norm: 163.456469
Active features: 33693
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.062

***** Iteration #386 *****
Loss: 46147.027460
Feature norm: 126.162886
Error norm: 227.008007
Active features: 33692
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.070

***** Iteration #387 *****
Loss: 46146.594112
Feature norm: 126.158884
Error norm: 153.920295
Active features: 33692
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.034

***** Iteration #388 *****
Loss: 46146.532387
Feature norm: 126.161001
Error norm: 222.166044
Active features: 33693
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.110

***** Iteration #389 *****
Loss: 46146.128830
Feature norm: 126.156891
Error norm: 157.580991
Active features: 33695
Line search trials: 1
Line search step: 1.0

***** Iteration #428 *****
Loss: 46136.595998
Feature norm: 126.126316
Error norm: 105.293017
Active features: 33655
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.096

***** Iteration #429 *****
Loss: 46136.406934
Feature norm: 126.122697
Error norm: 139.686095
Active features: 33653
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.156

***** Iteration #430 *****
Loss: 46136.106413
Feature norm: 126.123948
Error norm: 96.721046
Active features: 33656
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.088

***** Iteration #431 *****
Loss: 46135.929493
Feature norm: 126.120673
Error norm: 144.033159
Active features: 33655
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.116

***** Iteration #432 *****
Loss: 46135.640344
Feature norm: 126.122017
Error norm: 105.912659
Active features: 33654
Line search trials: 2
Line search step: 0.50

***** Iteration #468 *****
Loss: 46128.428450
Feature norm: 126.059775
Error norm: 110.983011
Active features: 33580
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.125

***** Iteration #469 *****
Loss: 46128.264343
Feature norm: 126.054669
Error norm: 117.864220
Active features: 33578
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.147

***** Iteration #470 *****
Loss: 46128.078851
Feature norm: 126.055764
Error norm: 107.373703
Active features: 33577
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.091

***** Iteration #471 *****
Loss: 46127.917695
Feature norm: 126.051122
Error norm: 111.401615
Active features: 33572
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.135

***** Iteration #472 *****
Loss: 46127.745159
Feature norm: 126.051928
Error norm: 101.773919
Active features: 33570
Line search trials: 2
Line search step: 0.5

HBox(children=(FloatProgress(value=0.0, max=6094.0), HTML(value='')))


              precision    recall  f1-score   support

        _BRN     0.1250    0.0222    0.0377        45
        _DES     0.9126    0.6843    0.7821      1815
        _DTM     0.7258    0.6889    0.7069      1948
        _LOC     0.7902    0.6232    0.6968      4254
        _MEA     0.6490    0.5563    0.5991      3097
        _NUM     0.6145    0.5627    0.5875      1292
        _ORG     0.7082    0.5196    0.5994      4544
        _PER     0.8218    0.7188    0.7668      4118
        _TRM     0.4091    0.1111    0.1748       162
        _TTL     0.9803    0.9360    0.9576      2016

   micro avg     0.7720    0.6402    0.7000     23291
   macro avg     0.6736    0.5423    0.5909     23291
weighted avg     0.7679    0.6402    0.6963     23291



HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…


Feature generation
type: CRF1d
feature.minfreq: 3.000000
feature.possible_states: 0
feature.possible_transitions: 1
0....1....2....3....4....5....6....7....8....9....10
Number of features: 129129
Seconds required: 4.712

L-BFGS optimization
c1: 0.500000
c2: 1.000000
num_memories: 6
max_iterations: 500
epsilon: 0.000010
stop: 10
delta: 0.000010
linesearch: MoreThuente
linesearch.max_iterations: 20

***** Iteration #1 *****
Loss: 923487.560457
Feature norm: 1.000000
Error norm: 433864.888131
Active features: 128518
Line search trials: 1
Line search step: 0.000002
Seconds required for this iteration: 2.245

***** Iteration #2 *****
Loss: 595206.297529
Feature norm: 5.140940
Error norm: 123504.757228
Active features: 128377
Line search trials: 4
Line search step: 0.125000
Seconds required for this iteration: 4.259

***** Iteration #3 *****
Loss: 484822.381400
Feature norm: 4.241969
Error norm: 120195.080598
Active features: 123352
Line search trials: 1
Line search step: 1.000000
Seconds r

***** Iteration #39 *****
Loss: 58797.956926
Feature norm: 85.538805
Error norm: 1625.852304
Active features: 57725
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.072

***** Iteration #40 *****
Loss: 57741.180784
Feature norm: 88.893924
Error norm: 1541.377007
Active features: 55290
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.093

***** Iteration #41 *****
Loss: 57455.162976
Feature norm: 90.694860
Error norm: 6712.943778
Active features: 54252
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.164

***** Iteration #42 *****
Loss: 56798.244049
Feature norm: 92.046910
Error norm: 862.930141
Active features: 54013
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.095

***** Iteration #43 *****
Loss: 56404.667119
Feature norm: 93.313199
Error norm: 1565.449163
Active features: 53451
Line search trials: 1
Line search step: 1.000000


***** Iteration #79 *****
Loss: 53133.801973
Feature norm: 102.237249
Error norm: 1154.136962
Active features: 40580
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.093

***** Iteration #80 *****
Loss: 53122.318426
Feature norm: 102.252612
Error norm: 862.979804
Active features: 40557
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.111

***** Iteration #81 *****
Loss: 53115.295693
Feature norm: 102.350569
Error norm: 1231.862458
Active features: 40514
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.047

***** Iteration #82 *****
Loss: 53101.843830
Feature norm: 102.363106
Error norm: 720.145248
Active features: 40475
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.052

***** Iteration #83 *****
Loss: 53094.577098
Feature norm: 102.433106
Error norm: 980.832683
Active features: 40422
Line search trials: 1
Line search step: 1.0000

***** Iteration #123 *****
Loss: 52821.221120
Feature norm: 103.342271
Error norm: 790.721294
Active features: 39310
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.040

***** Iteration #124 *****
Loss: 52815.516241
Feature norm: 103.331475
Error norm: 614.708901
Active features: 39280
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.040

***** Iteration #125 *****
Loss: 52812.547259
Feature norm: 103.351948
Error norm: 800.655472
Active features: 39266
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.091

***** Iteration #126 *****
Loss: 52807.237669
Feature norm: 103.338037
Error norm: 638.932459
Active features: 39246
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.093

***** Iteration #127 *****
Loss: 52804.729089
Feature norm: 103.361628
Error norm: 800.528376
Active features: 39226
Line search trials: 1
Line search step: 1.0

***** Iteration #163 *****
Loss: 52690.935645
Feature norm: 103.347219
Error norm: 477.971192
Active features: 38706
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.040

***** Iteration #164 *****
Loss: 52689.074842
Feature norm: 103.333434
Error norm: 500.607225
Active features: 38691
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.045

***** Iteration #165 *****
Loss: 52686.925743
Feature norm: 103.339903
Error norm: 482.258654
Active features: 38681
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.061

***** Iteration #166 *****
Loss: 52684.729238
Feature norm: 103.326511
Error norm: 469.399965
Active features: 38656
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.083

***** Iteration #167 *****
Loss: 52682.233116
Feature norm: 103.330012
Error norm: 452.975633
Active features: 38643
Line search trials: 1
Line search step: 1.0

***** Iteration #203 *****
Loss: 52622.585976
Feature norm: 103.221726
Error norm: 420.032716
Active features: 38325
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.149

***** Iteration #204 *****
Loss: 52621.319141
Feature norm: 103.209899
Error norm: 413.858265
Active features: 38314
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.114

***** Iteration #205 *****
Loss: 52620.031001
Feature norm: 103.219046
Error norm: 405.454167
Active features: 38311
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.073

***** Iteration #206 *****
Loss: 52618.722883
Feature norm: 103.207683
Error norm: 379.754489
Active features: 38308
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.057

***** Iteration #207 *****
Loss: 52617.531196
Feature norm: 103.214498
Error norm: 393.996217
Active features: 38312
Line search trials: 1
Line search step: 1.0

***** Iteration #244 *****
Loss: 52582.232888
Feature norm: 103.180158
Error norm: 370.114693
Active features: 38065
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.067

***** Iteration #245 *****
Loss: 52581.258847
Feature norm: 103.185862
Error norm: 308.118683
Active features: 38047
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.085

***** Iteration #246 *****
Loss: 52580.654102
Feature norm: 103.178960
Error norm: 362.209903
Active features: 38033
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.060

***** Iteration #247 *****
Loss: 52579.775938
Feature norm: 103.184191
Error norm: 313.394295
Active features: 38026
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.093

***** Iteration #248 *****
Loss: 52579.197760
Feature norm: 103.177638
Error norm: 369.042296
Active features: 38024
Line search trials: 1
Line search step: 1.0

***** Iteration #286 *****
Loss: 52553.537291
Feature norm: 103.134324
Error norm: 349.085447
Active features: 37869
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.097

***** Iteration #287 *****
Loss: 52552.737870
Feature norm: 103.138834
Error norm: 277.491381
Active features: 37868
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.031

***** Iteration #288 *****
Loss: 52552.363380
Feature norm: 103.130723
Error norm: 350.017468
Active features: 37863
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.037

***** Iteration #289 *****
Loss: 52551.561664
Feature norm: 103.135353
Error norm: 278.428837
Active features: 37863
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.081

***** Iteration #290 *****
Loss: 52551.192255
Feature norm: 103.127160
Error norm: 346.279354
Active features: 37857
Line search trials: 1
Line search step: 1.0

***** Iteration #327 *****
Loss: 52531.355489
Feature norm: 103.049263
Error norm: 258.831565
Active features: 37793
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.090

***** Iteration #328 *****
Loss: 52531.086223
Feature norm: 103.040909
Error norm: 312.062533
Active features: 37791
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.087

***** Iteration #329 *****
Loss: 52530.420361
Feature norm: 103.043244
Error norm: 265.174756
Active features: 37786
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.071

***** Iteration #330 *****
Loss: 52530.108390
Feature norm: 103.035007
Error norm: 301.322756
Active features: 37780
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.075

***** Iteration #331 *****
Loss: 52529.479278
Feature norm: 103.037084
Error norm: 260.154996
Active features: 37783
Line search trials: 1
Line search step: 1.0

***** Iteration #368 *****
Loss: 52514.545867
Feature norm: 102.937548
Error norm: 229.897035
Active features: 37734
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.056

***** Iteration #369 *****
Loss: 52514.331864
Feature norm: 102.939174
Error norm: 274.926880
Active features: 37736
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.086

***** Iteration #370 *****
Loss: 52513.825865
Feature norm: 102.932952
Error norm: 228.437597
Active features: 37733
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.045

***** Iteration #371 *****
Loss: 52513.610319
Feature norm: 102.934717
Error norm: 275.117961
Active features: 37728
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.036

***** Iteration #372 *****
Loss: 52513.123029
Feature norm: 102.928311
Error norm: 231.663602
Active features: 37729
Line search trials: 1
Line search step: 1.0

***** Iteration #409 *****
Loss: 52500.087426
Feature norm: 102.859876
Error norm: 140.491447
Active features: 37686
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.228

***** Iteration #410 *****
Loss: 52499.843263
Feature norm: 102.855074
Error norm: 199.286083
Active features: 37688
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.186

***** Iteration #411 *****
Loss: 52499.338717
Feature norm: 102.856082
Error norm: 135.581796
Active features: 37690
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.170

***** Iteration #412 *****
Loss: 52499.089122
Feature norm: 102.851233
Error norm: 189.857093
Active features: 37686
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.216

***** Iteration #413 *****
Loss: 52498.605495
Feature norm: 102.851857
Error norm: 139.304595
Active features: 37679
Line search trials: 2
Line search step: 0.5

***** Iteration #450 *****
Loss: 52485.106603
Feature norm: 102.747251
Error norm: 180.119278
Active features: 37656
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.110

***** Iteration #451 *****
Loss: 52484.640163
Feature norm: 102.747537
Error norm: 131.483206
Active features: 37659
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.104

***** Iteration #452 *****
Loss: 52484.444652
Feature norm: 102.741305
Error norm: 194.840015
Active features: 37660
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.086

***** Iteration #453 *****
Loss: 52483.983722
Feature norm: 102.741958
Error norm: 142.440561
Active features: 37657
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.095

***** Iteration #454 *****
Loss: 52483.755872
Feature norm: 102.735784
Error norm: 183.359931
Active features: 37657
Line search trials: 2
Line search step: 0.5

***** Iteration #490 *****
Loss: 52474.405280
Feature norm: 102.656852
Error norm: 151.732942
Active features: 37626
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.139

***** Iteration #491 *****
Loss: 52474.095194
Feature norm: 102.657163
Error norm: 89.769754
Active features: 37628
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.149

***** Iteration #492 *****
Loss: 52473.963104
Feature norm: 102.652836
Error norm: 141.216414
Active features: 37628
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.115

***** Iteration #493 *****
Loss: 52473.698188
Feature norm: 102.652909
Error norm: 96.390814
Active features: 37627
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.070

***** Iteration #494 *****
Loss: 52473.569042
Feature norm: 102.648477
Error norm: 141.579991
Active features: 37625
Line search trials: 2
Line search step: 0.500

HBox(children=(FloatProgress(value=0.0, max=6094.0), HTML(value='')))


              precision    recall  f1-score   support

        _BRN     0.1667    0.0222    0.0392        45
        _DES     0.9135    0.6579    0.7649      1815
        _DTM     0.7342    0.6807    0.7064      1948
        _LOC     0.7872    0.5886    0.6736      4254
        _MEA     0.6506    0.5447    0.5930      3097
        _NUM     0.6204    0.5565    0.5867      1292
        _ORG     0.7160    0.4938    0.5845      4544
        _PER     0.8263    0.6943    0.7546      4118
        _TRM     0.4571    0.0988    0.1624       162
        _TTL     0.9774    0.9008    0.9375      2016

   micro avg     0.7749    0.6168    0.6869     23291
   macro avg     0.6849    0.5238    0.5803     23291
weighted avg     0.7712    0.6168    0.6829     23291




HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))

HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…


Feature generation
type: CRF1d
feature.minfreq: 3.000000
feature.possible_states: 0
feature.possible_transitions: 1
0....1....2....3....4....5....6....7....8....9....10
Number of features: 129129
Seconds required: 4.509

L-BFGS optimization
c1: 1.000000
c2: 0.000000
num_memories: 6
max_iterations: 500
epsilon: 0.000010
stop: 10
delta: 0.000010
linesearch: MoreThuente
linesearch.max_iterations: 20

***** Iteration #1 *****
Loss: 923490.885246
Feature norm: 1.000000
Error norm: 433862.622313
Active features: 127910
Line search trials: 1
Line search step: 0.000002
Seconds required for this iteration: 2.089

***** Iteration #2 *****
Loss: 595274.837896
Feature norm: 5.141577
Error norm: 123497.261034
Active features: 127787
Line search trials: 4
Line search step: 0.125000
Seconds required for this iteration: 4.195

***** Iteration #3 *****
Loss: 484885.293874
Feature norm: 4.242481
Error norm: 120191.476136
Active features: 121002
Line search trials: 1
Line search step: 1.000000
Seconds r

***** Iteration #39 *****
Loss: 55370.499655
Feature norm: 104.378270
Error norm: 2848.292512
Active features: 42440
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.043

***** Iteration #40 *****
Loss: 53907.425363
Feature norm: 109.691198
Error norm: 2462.252983
Active features: 41089
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.050

***** Iteration #41 *****
Loss: 52694.409898
Feature norm: 115.113016
Error norm: 1783.416960
Active features: 39845
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.055

***** Iteration #42 *****
Loss: 51539.900836
Feature norm: 120.772070
Error norm: 1354.969751
Active features: 37535
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.044

***** Iteration #43 *****
Loss: 50539.013817
Feature norm: 125.887200
Error norm: 2408.010027
Active features: 36322
Line search trials: 1
Line search step: 1.0

***** Iteration #83 *****
Loss: 43744.878688
Feature norm: 180.635947
Error norm: 1509.723723
Active features: 19880
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.077

***** Iteration #84 *****
Loss: 43717.536917
Feature norm: 180.811376
Error norm: 843.653240
Active features: 19813
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.051

***** Iteration #85 *****
Loss: 43709.558323
Feature norm: 181.045398
Error norm: 1556.166196
Active features: 19773
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.101

***** Iteration #86 *****
Loss: 43678.733663
Feature norm: 181.196027
Error norm: 662.644953
Active features: 19726
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.093

***** Iteration #87 *****
Loss: 43670.786818
Feature norm: 181.403464
Error norm: 1505.462849
Active features: 19689
Line search trials: 1
Line search step: 1.000

***** Iteration #127 *****
Loss: 43120.363090
Feature norm: 185.729934
Error norm: 973.320630
Active features: 18095
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.100

***** Iteration #128 *****
Loss: 43106.718849
Feature norm: 185.760794
Error norm: 598.022482
Active features: 18064
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.089

***** Iteration #129 *****
Loss: 43103.600317
Feature norm: 185.830435
Error norm: 1072.283332
Active features: 18043
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.067

***** Iteration #130 *****
Loss: 43087.333304
Feature norm: 185.860475
Error norm: 427.217614
Active features: 18019
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.068

***** Iteration #131 *****
Loss: 43084.520916
Feature norm: 185.920800
Error norm: 1004.539182
Active features: 18021
Line search trials: 1
Line search step: 1

***** Iteration #166 *****
Loss: 42825.319074
Feature norm: 186.756321
Error norm: 506.121281
Active features: 17200
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.054

***** Iteration #167 *****
Loss: 42821.135610
Feature norm: 186.778933
Error norm: 692.426716
Active features: 17156
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.084

***** Iteration #168 *****
Loss: 42814.863885
Feature norm: 186.778807
Error norm: 515.564779
Active features: 17132
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.052

***** Iteration #169 *****
Loss: 42811.938385
Feature norm: 186.800126
Error norm: 752.161650
Active features: 17122
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.021

***** Iteration #170 *****
Loss: 42804.046890
Feature norm: 186.793494
Error norm: 426.429680
Active features: 17112
Line search trials: 1
Line search step: 1.0

***** Iteration #206 *****
Loss: 42671.060145
Feature norm: 187.257276
Error norm: 420.598912
Active features: 16559
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.077

***** Iteration #207 *****
Loss: 42670.063911
Feature norm: 187.285845
Error norm: 687.514628
Active features: 16547
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.034

***** Iteration #208 *****
Loss: 42663.555831
Feature norm: 187.294238
Error norm: 414.398196
Active features: 16544
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.034

***** Iteration #209 *****
Loss: 42661.813458
Feature norm: 187.322306
Error norm: 595.401938
Active features: 16538
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.062

***** Iteration #210 *****
Loss: 42656.429014
Feature norm: 187.330533
Error norm: 305.289751
Active features: 16534
Line search trials: 1
Line search step: 1.0

***** Iteration #247 *****
Loss: 42562.588972
Feature norm: 188.049621
Error norm: 407.962393
Active features: 16193
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.068

***** Iteration #248 *****
Loss: 42560.421148
Feature norm: 188.061907
Error norm: 331.325711
Active features: 16184
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.030

***** Iteration #249 *****
Loss: 42559.253622
Feature norm: 188.090398
Error norm: 430.874767
Active features: 16171
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.061

***** Iteration #250 *****
Loss: 42556.603897
Feature norm: 188.104740
Error norm: 302.246595
Active features: 16163
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.107

***** Iteration #251 *****
Loss: 42555.656351
Feature norm: 188.131453
Error norm: 420.576003
Active features: 16144
Line search trials: 1
Line search step: 1.0

***** Iteration #289 *****
Loss: 42494.633308
Feature norm: 188.857846
Error norm: 396.218916
Active features: 15781
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.038

***** Iteration #290 *****
Loss: 42492.370914
Feature norm: 188.872187
Error norm: 229.941479
Active features: 15774
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.077

***** Iteration #291 *****
Loss: 42491.946068
Feature norm: 188.892135
Error norm: 394.618968
Active features: 15764
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.033

***** Iteration #292 *****
Loss: 42489.685464
Feature norm: 188.901840
Error norm: 201.778960
Active features: 15757
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.048

***** Iteration #293 *****
Loss: 42489.266518
Feature norm: 188.923034
Error norm: 376.594058
Active features: 15762
Line search trials: 1
Line search step: 1.0

***** Iteration #329 *****
Loss: 42444.648257
Feature norm: 189.488237
Error norm: 341.109578
Active features: 15510
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.083

***** Iteration #330 *****
Loss: 42442.676213
Feature norm: 189.497911
Error norm: 206.009930
Active features: 15515
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.085

***** Iteration #331 *****
Loss: 42441.882249
Feature norm: 189.516938
Error norm: 278.036248
Active features: 15519
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.094

***** Iteration #332 *****
Loss: 42440.935900
Feature norm: 189.526831
Error norm: 269.525867
Active features: 15512
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.099

***** Iteration #333 *****
Loss: 42439.882155
Feature norm: 189.546588
Error norm: 288.065680
Active features: 15503
Line search trials: 1
Line search step: 1.0

***** Iteration #378 *****
Loss: 42399.569369
Feature norm: 190.260463
Error norm: 193.618537
Active features: 15272
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.072

***** Iteration #379 *****
Loss: 42399.111834
Feature norm: 190.274347
Error norm: 247.082299
Active features: 15270
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.120

***** Iteration #380 *****
Loss: 42398.204241
Feature norm: 190.281016
Error norm: 184.243337
Active features: 15258
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.041

***** Iteration #381 *****
Loss: 42397.912258
Feature norm: 190.295015
Error norm: 263.849355
Active features: 15255
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.069

***** Iteration #382 *****
Loss: 42396.839763
Feature norm: 190.302172
Error norm: 173.579720
Active features: 15241
Line search trials: 1
Line search step: 1.0

***** Iteration #418 *****
Loss: 42374.719767
Feature norm: 190.648281
Error norm: 146.802017
Active features: 15013
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.059

***** Iteration #419 *****
Loss: 42374.604025
Feature norm: 190.662540
Error norm: 242.298266
Active features: 15006
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.055

***** Iteration #420 *****
Loss: 42373.694817
Feature norm: 190.660944
Error norm: 137.899155
Active features: 14998
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.081

***** Iteration #421 *****
Loss: 42373.178429
Feature norm: 190.667959
Error norm: 59.977543
Active features: 15001
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.096

***** Iteration #422 *****
Loss: 42372.836023
Feature norm: 190.675856
Error norm: 186.512085
Active features: 14995
Line search trials: 1
Line search step: 1.00

***** Iteration #459 *****
Loss: 42353.365687
Feature norm: 190.922870
Error norm: 220.513576
Active features: 14885
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.169

***** Iteration #460 *****
Loss: 42352.544125
Feature norm: 190.923375
Error norm: 151.794435
Active features: 14883
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.051

***** Iteration #461 *****
Loss: 42352.366087
Feature norm: 190.933653
Error norm: 214.895692
Active features: 14882
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.046

***** Iteration #462 *****
Loss: 42351.577736
Feature norm: 190.935059
Error norm: 139.129586
Active features: 14883
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.083

***** Iteration #463 *****
Loss: 42351.446652
Feature norm: 190.944692
Error norm: 212.479409
Active features: 14870
Line search trials: 1
Line search step: 1.0

HBox(children=(FloatProgress(value=0.0, max=6094.0), HTML(value='')))


              precision    recall  f1-score   support

        _BRN     0.2857    0.0889    0.1356        45
        _DES     0.9132    0.7884    0.8462      1815
        _DTM     0.7479    0.6946    0.7203      1948
        _LOC     0.7873    0.6596    0.7178      4254
        _MEA     0.6446    0.5570    0.5976      3097
        _NUM     0.6408    0.5619    0.5988      1292
        _ORG     0.6993    0.5282    0.6018      4544
        _PER     0.8215    0.7610    0.7901      4118
        _TRM     0.4912    0.1728    0.2557       162
        _TTL     0.9725    0.9489    0.9606      2016

   micro avg     0.7747    0.6664    0.7164     23291
   macro avg     0.7004    0.5761    0.6225     23291
weighted avg     0.7686    0.6664    0.7123     23291



HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…


Feature generation
type: CRF1d
feature.minfreq: 3.000000
feature.possible_states: 0
feature.possible_transitions: 1
0....1....2....3....4....5....6....7....8....9....10
Number of features: 129129
Seconds required: 4.512

L-BFGS optimization
c1: 1.000000
c2: 0.500000
num_memories: 6
max_iterations: 500
epsilon: 0.000010
stop: 10
delta: 0.000010
linesearch: MoreThuente
linesearch.max_iterations: 20

***** Iteration #1 *****
Loss: 923491.385246
Feature norm: 1.000000
Error norm: 433861.622635
Active features: 127910
Line search trials: 1
Line search step: 0.000002
Seconds required for this iteration: 2.110

***** Iteration #2 *****
Loss: 595251.570957
Feature norm: 5.141255
Error norm: 123501.639328
Active features: 127787
Line search trials: 4
Line search step: 0.125000
Seconds required for this iteration: 4.344

***** Iteration #3 *****
Loss: 484863.799224
Feature norm: 4.242190
Error norm: 120193.033062
Active features: 121002
Line search trials: 1
Line search step: 1.000000
Seconds r

***** Iteration #40 *****
Loss: 63545.670931
Feature norm: 81.482748
Error norm: 2648.170695
Active features: 44865
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.138

***** Iteration #41 *****
Loss: 62578.886487
Feature norm: 83.244673
Error norm: 1634.439093
Active features: 44205
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.048

***** Iteration #42 *****
Loss: 60887.225511
Feature norm: 88.748980
Error norm: 4522.750379
Active features: 41254
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.070

***** Iteration #43 *****
Loss: 60058.395636
Feature norm: 91.061492
Error norm: 6176.674762
Active features: 40257
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.038

***** Iteration #44 *****
Loss: 59317.020891
Feature norm: 92.008786
Error norm: 1276.316037
Active features: 39727
Line search trials: 1
Line search step: 1.000000

***** Iteration #86 *****
Loss: 53034.092660
Feature norm: 113.073156
Error norm: 1180.093336
Active features: 23879
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.053

***** Iteration #87 *****
Loss: 53019.043236
Feature norm: 113.124335
Error norm: 817.547465
Active features: 23841
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.066

***** Iteration #88 *****
Loss: 53009.034783
Feature norm: 113.221436
Error norm: 1115.481171
Active features: 23805
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.073

***** Iteration #89 *****
Loss: 52995.728712
Feature norm: 113.261440
Error norm: 760.569229
Active features: 23744
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.086

***** Iteration #90 *****
Loss: 52985.670730
Feature norm: 113.355507
Error norm: 1096.087616
Active features: 23702
Line search trials: 1
Line search step: 1.000

***** Iteration #130 *****
Loss: 52667.064624
Feature norm: 114.827767
Error norm: 744.454694
Active features: 22757
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.108

***** Iteration #131 *****
Loss: 52658.945409
Feature norm: 114.818116
Error norm: 287.769059
Active features: 22752
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.134

***** Iteration #132 *****
Loss: 52655.075670
Feature norm: 114.826124
Error norm: 721.285829
Active features: 22742
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.105

***** Iteration #133 *****
Loss: 52647.693868
Feature norm: 114.809961
Error norm: 283.315760
Active features: 22715
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.075

***** Iteration #134 *****
Loss: 52642.857770
Feature norm: 114.817259
Error norm: 614.313897
Active features: 22701
Line search trials: 2
Line search step: 0.5

***** Iteration #170 *****
Loss: 52495.353479
Feature norm: 114.862534
Error norm: 654.836936
Active features: 22350
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.108

***** Iteration #171 *****
Loss: 52490.630990
Feature norm: 114.860262
Error norm: 379.114366
Active features: 22342
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.091

***** Iteration #172 *****
Loss: 52489.512560
Feature norm: 114.873507
Error norm: 623.422475
Active features: 22333
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.074

***** Iteration #173 *****
Loss: 52485.186138
Feature norm: 114.871109
Error norm: 380.512985
Active features: 22321
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.084

***** Iteration #174 *****
Loss: 52484.883314
Feature norm: 114.883602
Error norm: 696.606886
Active features: 22310
Line search trials: 1
Line search step: 1.0

***** Iteration #213 *****
Loss: 52404.979382
Feature norm: 115.086829
Error norm: 402.266622
Active features: 22030
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.060

***** Iteration #214 *****
Loss: 52403.363307
Feature norm: 115.098210
Error norm: 369.221095
Active features: 22020
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.088

***** Iteration #215 *****
Loss: 52402.213076
Feature norm: 115.098229
Error norm: 397.558888
Active features: 22006
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.091

***** Iteration #216 *****
Loss: 52400.603345
Feature norm: 115.110259
Error norm: 372.254644
Active features: 21996
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.069

***** Iteration #217 *****
Loss: 52399.335008
Feature norm: 115.109838
Error norm: 381.403941
Active features: 21982
Line search trials: 1
Line search step: 1.0

***** Iteration #256 *****
Loss: 52359.697475
Feature norm: 115.194091
Error norm: 291.105121
Active features: 21775
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.038

***** Iteration #257 *****
Loss: 52359.451320
Feature norm: 115.190423
Error norm: 400.682155
Active features: 21763
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.046

***** Iteration #258 *****
Loss: 52357.901275
Feature norm: 115.195846
Error norm: 273.034924
Active features: 21755
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.101

***** Iteration #259 *****
Loss: 52357.628725
Feature norm: 115.193198
Error norm: 374.158856
Active features: 21745
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.091

***** Iteration #260 *****
Loss: 52356.222497
Feature norm: 115.197744
Error norm: 256.927496
Active features: 21738
Line search trials: 1
Line search step: 1.0

***** Iteration #299 *****
Loss: 52333.245381
Feature norm: 115.204526
Error norm: 302.367490
Active features: 21572
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.071

***** Iteration #300 *****
Loss: 52332.557725
Feature norm: 115.208815
Error norm: 251.559328
Active features: 21571
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.087

***** Iteration #301 *****
Loss: 52332.250705
Feature norm: 115.203719
Error norm: 298.685095
Active features: 21568
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.103

***** Iteration #302 *****
Loss: 52331.584327
Feature norm: 115.207836
Error norm: 249.091353
Active features: 21570
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.105

***** Iteration #303 *****
Loss: 52331.268154
Feature norm: 115.203925
Error norm: 297.482955
Active features: 21565
Line search trials: 1
Line search step: 1.0

***** Iteration #339 *****
Loss: 52315.427271
Feature norm: 115.205883
Error norm: 232.114799
Active features: 21471
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.068

***** Iteration #340 *****
Loss: 52315.136750
Feature norm: 115.210368
Error norm: 256.661698
Active features: 21469
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.057

***** Iteration #341 *****
Loss: 52314.691153
Feature norm: 115.206298
Error norm: 232.483667
Active features: 21462
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.094

***** Iteration #342 *****
Loss: 52314.434650
Feature norm: 115.209834
Error norm: 261.744683
Active features: 21463
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.044

***** Iteration #343 *****
Loss: 52313.968766
Feature norm: 115.206126
Error norm: 234.401380
Active features: 21465
Line search trials: 1
Line search step: 1.0

***** Iteration #380 *****
Loss: 52301.612059
Feature norm: 115.214001
Error norm: 254.574860
Active features: 21396
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.087

***** Iteration #381 *****
Loss: 52301.159333
Feature norm: 115.210193
Error norm: 216.268239
Active features: 21392
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.062

***** Iteration #382 *****
Loss: 52300.994640
Feature norm: 115.212836
Error norm: 257.316002
Active features: 21390
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.059

***** Iteration #383 *****
Loss: 52300.538057
Feature norm: 115.208611
Error norm: 214.312158
Active features: 21381
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.039

***** Iteration #384 *****
Loss: 52300.382427
Feature norm: 115.211544
Error norm: 259.086317
Active features: 21382
Line search trials: 1
Line search step: 1.0

***** Iteration #428 *****
Loss: 52286.687290
Feature norm: 115.172619
Error norm: 167.999813
Active features: 21351
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.098

***** Iteration #429 *****
Loss: 52286.381477
Feature norm: 115.168823
Error norm: 157.248044
Active features: 21341
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.117

***** Iteration #430 *****
Loss: 52286.087281
Feature norm: 115.170033
Error norm: 165.533515
Active features: 21342
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.132

***** Iteration #431 *****
Loss: 52285.785979
Feature norm: 115.166410
Error norm: 149.001565
Active features: 21337
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.123

***** Iteration #432 *****
Loss: 52285.509216
Feature norm: 115.167636
Error norm: 156.963356
Active features: 21333
Line search trials: 2
Line search step: 0.5

***** Iteration #473 *****
Loss: 52275.513352
Feature norm: 115.132224
Error norm: 130.443879
Active features: 21293
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.259

***** Iteration #474 *****
Loss: 52275.292810
Feature norm: 115.133993
Error norm: 139.015070
Active features: 21297
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.176

***** Iteration #475 *****
Loss: 52275.055904
Feature norm: 115.130352
Error norm: 131.159822
Active features: 21294
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.228

***** Iteration #476 *****
Loss: 52274.836929
Feature norm: 115.132050
Error norm: 134.416858
Active features: 21290
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.079

***** Iteration #477 *****
Loss: 52274.606962
Feature norm: 115.128848
Error norm: 121.650711
Active features: 21291
Line search trials: 2
Line search step: 0.5

HBox(children=(FloatProgress(value=0.0, max=6094.0), HTML(value='')))


              precision    recall  f1-score   support

        _BRN     0.1429    0.0222    0.0385        45
        _DES     0.9113    0.6738    0.7748      1815
        _DTM     0.7199    0.6874    0.7033      1948
        _LOC     0.7884    0.6034    0.6836      4254
        _MEA     0.6477    0.5450    0.5920      3097
        _NUM     0.6206    0.5557    0.5864      1292
        _ORG     0.7123    0.5062    0.5918      4544
        _PER     0.8247    0.7059    0.7607      4118
        _TRM     0.4722    0.1049    0.1717       162
        _TTL     0.9783    0.9187    0.9476      2016

   micro avg     0.7727    0.6274    0.6925     23291
   macro avg     0.6818    0.5323    0.5850     23291
weighted avg     0.7688    0.6274    0.6885     23291



HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…


Feature generation
type: CRF1d
feature.minfreq: 3.000000
feature.possible_states: 0
feature.possible_transitions: 1
0....1....2....3....4....5....6....7....8....9....10
Number of features: 129129
Seconds required: 4.503

L-BFGS optimization
c1: 1.000000
c2: 1.000000
num_memories: 6
max_iterations: 500
epsilon: 0.000010
stop: 10
delta: 0.000010
linesearch: MoreThuente
linesearch.max_iterations: 20

***** Iteration #1 *****
Loss: 923491.885246
Feature norm: 1.000000
Error norm: 433860.622957
Active features: 127910
Line search trials: 1
Line search step: 0.000002
Seconds required for this iteration: 2.212

***** Iteration #2 *****
Loss: 595228.305833
Feature norm: 5.140934
Error norm: 123506.016872
Active features: 127787
Line search trials: 4
Line search step: 0.125000
Seconds required for this iteration: 4.374

***** Iteration #3 *****
Loss: 484842.293650
Feature norm: 4.241906
Error norm: 120194.620593
Active features: 121002
Line search trials: 1
Line search step: 1.000000
Seconds r

***** Iteration #41 *****
Loss: 64871.202195
Feature norm: 77.657936
Error norm: 1333.295667
Active features: 43574
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.047

***** Iteration #42 *****
Loss: 63893.157785
Feature norm: 79.709751
Error norm: 1845.831398
Active features: 42264
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.109

***** Iteration #43 *****
Loss: 63189.507894
Feature norm: 80.799992
Error norm: 1062.949078
Active features: 41358
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.051

***** Iteration #44 *****
Loss: 62576.971282
Feature norm: 83.431964
Error norm: 6404.695173
Active features: 39957
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.043

***** Iteration #45 *****
Loss: 61907.252560
Feature norm: 85.206826
Error norm: 2995.751920
Active features: 39587
Line search trials: 1
Line search step: 1.000000

***** Iteration #81 *****
Loss: 58279.775620
Feature norm: 94.763035
Error norm: 1261.412767
Active features: 26574
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.068

***** Iteration #82 *****
Loss: 58263.602333
Feature norm: 94.803361
Error norm: 756.929375
Active features: 26521
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.054

***** Iteration #83 *****
Loss: 58254.384114
Feature norm: 94.874204
Error norm: 1155.123739
Active features: 26494
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.069

***** Iteration #84 *****
Loss: 58240.758904
Feature norm: 94.919193
Error norm: 854.263953
Active features: 26448
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.053

***** Iteration #85 *****
Loss: 58236.018196
Feature norm: 95.004335
Error norm: 1386.650138
Active features: 26387
Line search trials: 1
Line search step: 1.000000
S

***** Iteration #124 *****
Loss: 57939.982539
Feature norm: 95.890754
Error norm: 630.284289
Active features: 25322
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.116

***** Iteration #125 *****
Loss: 57936.303185
Feature norm: 95.909558
Error norm: 707.309044
Active features: 25317
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.108

***** Iteration #126 *****
Loss: 57931.540675
Feature norm: 95.898877
Error norm: 587.021718
Active features: 25304
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.099

***** Iteration #127 *****
Loss: 57928.375609
Feature norm: 95.916745
Error norm: 711.412065
Active features: 25290
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.072

***** Iteration #128 *****
Loss: 57923.377938
Feature norm: 95.903547
Error norm: 610.098579
Active features: 25266
Line search trials: 1
Line search step: 1.000000

***** Iteration #174 *****
Loss: 57795.571193
Feature norm: 95.959653
Error norm: 464.071614
Active features: 24720
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.059

***** Iteration #175 *****
Loss: 57793.940571
Feature norm: 95.971486
Error norm: 457.145071
Active features: 24711
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.074

***** Iteration #176 *****
Loss: 57792.355420
Feature norm: 95.964867
Error norm: 502.830332
Active features: 24693
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.057

***** Iteration #177 *****
Loss: 57790.470388
Feature norm: 95.976800
Error norm: 475.209671
Active features: 24684
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.076

***** Iteration #178 *****
Loss: 57788.998807
Feature norm: 95.967976
Error norm: 516.451355
Active features: 24678
Line search trials: 1
Line search step: 1.000000

***** Iteration #215 *****
Loss: 57743.816045
Feature norm: 95.990767
Error norm: 334.018802
Active features: 24448
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.097

***** Iteration #216 *****
Loss: 57743.005763
Feature norm: 95.984494
Error norm: 361.041526
Active features: 24444
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.086

***** Iteration #217 *****
Loss: 57742.102012
Feature norm: 95.990975
Error norm: 340.496863
Active features: 24446
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.079

***** Iteration #218 *****
Loss: 57741.266182
Feature norm: 95.985479
Error norm: 361.139223
Active features: 24435
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.100

***** Iteration #219 *****
Loss: 57740.303255
Feature norm: 95.991609
Error norm: 327.197687
Active features: 24429
Line search trials: 1
Line search step: 1.000000

***** Iteration #255 *****
Loss: 57715.665720
Feature norm: 95.984809
Error norm: 257.070540
Active features: 24305
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.132

***** Iteration #256 *****
Loss: 57715.325650
Feature norm: 95.979138
Error norm: 311.884796
Active features: 24299
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.162

***** Iteration #257 *****
Loss: 57714.626683
Feature norm: 95.984084
Error norm: 257.725388
Active features: 24296
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.133

***** Iteration #258 *****
Loss: 57714.303468
Feature norm: 95.978261
Error norm: 313.496801
Active features: 24289
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.134

***** Iteration #259 *****
Loss: 57713.627766
Feature norm: 95.982489
Error norm: 261.366395
Active features: 24289
Line search trials: 1
Line search step: 1.000000

***** Iteration #298 *****
Loss: 57696.512740
Feature norm: 95.932300
Error norm: 267.162465
Active features: 24180
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.059

***** Iteration #299 *****
Loss: 57696.035259
Feature norm: 95.934797
Error norm: 227.629843
Active features: 24183
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.073

***** Iteration #300 *****
Loss: 57695.770705
Feature norm: 95.930089
Error norm: 256.213781
Active features: 24186
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.065

***** Iteration #301 *****
Loss: 57695.344296
Feature norm: 95.931966
Error norm: 233.075178
Active features: 24189
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.079

***** Iteration #302 *****
Loss: 57695.026583
Feature norm: 95.927170
Error norm: 246.318257
Active features: 24193
Line search trials: 1
Line search step: 1.000000

***** Iteration #338 *****
Loss: 57683.658654
Feature norm: 95.887238
Error norm: 230.128561
Active features: 24150
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.116

***** Iteration #339 *****
Loss: 57683.370175
Feature norm: 95.890114
Error norm: 225.744708
Active features: 24148
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.093

***** Iteration #340 *****
Loss: 57683.069434
Feature norm: 95.885319
Error norm: 235.255210
Active features: 24148
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.091

***** Iteration #341 *****
Loss: 57682.791617
Feature norm: 95.888473
Error norm: 229.132154
Active features: 24148
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.093

***** Iteration #342 *****
Loss: 57682.469990
Feature norm: 95.883694
Error norm: 232.070565
Active features: 24146
Line search trials: 1
Line search step: 1.000000

***** Iteration #384 *****
Loss: 57671.412074
Feature norm: 95.829369
Error norm: 183.020335
Active features: 24084
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.087

***** Iteration #385 *****
Loss: 57671.255878
Feature norm: 95.831084
Error norm: 217.822814
Active features: 24082
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.088

***** Iteration #386 *****
Loss: 57670.956640
Feature norm: 95.826079
Error norm: 180.351699
Active features: 24082
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.056

***** Iteration #387 *****
Loss: 57670.797046
Feature norm: 95.827642
Error norm: 213.840190
Active features: 24090
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 1.078

***** Iteration #388 *****
Loss: 57670.507979
Feature norm: 95.822517
Error norm: 182.784619
Active features: 24091
Line search trials: 1
Line search step: 1.000000

***** Iteration #424 *****
Loss: 57662.735426
Feature norm: 95.764053
Error norm: 176.831842
Active features: 24052
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.167

***** Iteration #425 *****
Loss: 57662.465861
Feature norm: 95.765785
Error norm: 139.493976
Active features: 24049
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.223

***** Iteration #426 *****
Loss: 57662.255742
Feature norm: 95.761313
Error norm: 184.497543
Active features: 24048
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.183

***** Iteration #427 *****
Loss: 57661.973383
Feature norm: 95.763225
Error norm: 137.870947
Active features: 24054
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.182

***** Iteration #428 *****
Loss: 57661.756981
Feature norm: 95.758874
Error norm: 178.181341
Active features: 24055
Line search trials: 2
Line search step: 0.500000

***** Iteration #465 *****
Loss: 57654.393481
Feature norm: 95.726521
Error norm: 107.730041
Active features: 24026
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.182

***** Iteration #466 *****
Loss: 57654.225295
Feature norm: 95.723260
Error norm: 113.690931
Active features: 24023
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.201

***** Iteration #467 *****
Loss: 57654.071374
Feature norm: 95.724479
Error norm: 109.882476
Active features: 24024
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.405

***** Iteration #468 *****
Loss: 57653.914362
Feature norm: 95.721193
Error norm: 117.336865
Active features: 24023
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 2.396

***** Iteration #469 *****
Loss: 57653.742145
Feature norm: 95.722351
Error norm: 106.126726
Active features: 24022
Line search trials: 2
Line search step: 0.500000

HBox(children=(FloatProgress(value=0.0, max=6094.0), HTML(value='')))


              precision    recall  f1-score   support

        _BRN     0.1667    0.0222    0.0392        45
        _DES     0.9195    0.6419    0.7560      1815
        _DTM     0.7241    0.6751    0.6987      1948
        _LOC     0.7905    0.5712    0.6632      4254
        _MEA     0.6435    0.5315    0.5821      3097
        _NUM     0.6202    0.5472    0.5814      1292
        _ORG     0.7171    0.4815    0.5762      4544
        _PER     0.8272    0.6824    0.7478      4118
        _TRM     0.4848    0.0988    0.1641       162
        _TTL     0.9789    0.8953    0.9352      2016

   micro avg     0.7744    0.6047    0.6791     23291
   macro avg     0.6872    0.5147    0.5744     23291
weighted avg     0.7712    0.6047    0.6749     23291





In [123]:
hyperparams_df = pd.DataFrame(hyperparams).sort_values('f1_macro',ascending=False).reset_index(drop=True)
best_hyperparams = hyperparams_df.iloc[0,:].to_dict()
hyperparams_df[['c1','c2','f1_micro','f1_macro']]

Unnamed: 0,c1,c2,f1_micro,f1_macro
0,0.5,0.0,0.717296,0.627721
1,1.0,0.0,0.716445,0.622451
2,0.0,0.0,0.688666,0.615289
3,0.0,0.5,0.703625,0.602803
4,0.5,0.5,0.699979,0.590872
5,0.0,1.0,0.694041,0.586303
6,1.0,0.5,0.692479,0.585022
7,0.5,1.0,0.686875,0.580285
8,1.0,1.0,0.679075,0.574405


In [124]:
print(best_hyperparams['classification_report'])

              precision    recall  f1-score   support

        _BRN     0.3158    0.1333    0.1875        45
        _DES     0.9033    0.8083    0.8532      1815
        _DTM     0.7377    0.6946    0.7155      1948
        _LOC     0.7701    0.6653    0.7138      4254
        _MEA     0.6453    0.5680    0.6042      3097
        _NUM     0.6282    0.5689    0.5971      1292
        _ORG     0.6776    0.5440    0.6035      4544
        _PER     0.8165    0.7756    0.7955      4118
        _TRM     0.4375    0.1728    0.2478       162
        _TTL     0.9686    0.9499    0.9592      2016

   micro avg     0.7632    0.6766    0.7173     23291
   macro avg     0.6901    0.5881    0.6277     23291
weighted avg     0.7574    0.6766    0.7137     23291



In [125]:
#final model
c1, c2 = best_hyperparams['c1'], best_hyperparams['c2']
train_crf(f'{args.dataset_name_or_path}_{args.label_col}_best',c1,c2,x_train,y_train)

HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…


Feature generation
type: CRF1d
feature.minfreq: 3.000000
feature.possible_states: 0
feature.possible_transitions: 1
0....1....2....3....4....5....6....7....8....9....10
Number of features: 588510
Seconds required: 23.289

L-BFGS optimization
c1: 0.500000
c2: 0.000000
num_memories: 6
max_iterations: 500
epsilon: 0.000010
stop: 10
delta: 0.000010
linesearch: MoreThuente
linesearch.max_iterations: 20

***** Iteration #1 *****
Loss: 6294769.553331
Feature norm: 1.000000
Error norm: 2955330.355206
Active features: 586206
Line search trials: 1
Line search step: 0.000000
Seconds required for this iteration: 15.132

***** Iteration #2 *****
Loss: 4064922.203746
Feature norm: 5.129274
Error norm: 846213.800774
Active features: 585553
Line search trials: 4
Line search step: 0.125000
Seconds required for this iteration: 31.056

***** Iteration #3 *****
Loss: 3307829.621747
Feature norm: 4.231016
Error norm: 823351.642770
Active features: 562709
Line search trials: 1
Line search step: 1.000000
Se

***** Iteration #44 *****
Loss: 237664.202285
Feature norm: 234.379928
Error norm: 31662.058590
Active features: 265085
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.955

***** Iteration #45 *****
Loss: 226402.334024
Feature norm: 240.837793
Error norm: 9338.432305
Active features: 265800
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.737

***** Iteration #46 *****
Loss: 222464.013006
Feature norm: 246.155359
Error norm: 4215.166487
Active features: 263592
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.672

***** Iteration #47 *****
Loss: 213634.897806
Feature norm: 263.726325
Error norm: 4649.514020
Active features: 249695
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.768

***** Iteration #48 *****
Loss: 212696.472886
Feature norm: 265.315835
Error norm: 33709.332524
Active features: 245462
Line search trials: 3
Line sear

***** Iteration #87 *****
Loss: 160339.256365
Feature norm: 434.229664
Error norm: 3861.836117
Active features: 134033
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.908

***** Iteration #88 *****
Loss: 160133.613139
Feature norm: 434.555307
Error norm: 2958.694421
Active features: 133116
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.947

***** Iteration #89 *****
Loss: 159932.164603
Feature norm: 434.966622
Error norm: 3626.316252
Active features: 131995
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.836

***** Iteration #90 *****
Loss: 159732.533293
Feature norm: 435.271993
Error norm: 3527.054727
Active features: 130871
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.735

***** Iteration #91 *****
Loss: 159545.064757
Feature norm: 435.709170
Error norm: 4035.681303
Active features: 129883
Line search trials: 1
Line search

***** Iteration #132 *****
Loss: 155736.472255
Feature norm: 453.790293
Error norm: 2351.163384
Active features: 115371
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.868

***** Iteration #133 *****
Loss: 155711.500146
Feature norm: 454.177206
Error norm: 4056.474973
Active features: 115241
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.885

***** Iteration #134 *****
Loss: 155649.592646
Feature norm: 454.535460
Error norm: 2334.922076
Active features: 115150
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.928

***** Iteration #135 *****
Loss: 155624.302730
Feature norm: 454.897432
Error norm: 3907.930459
Active features: 115007
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.756

***** Iteration #136 *****
Loss: 155565.471347
Feature norm: 455.246077
Error norm: 2316.364293
Active features: 114910
Line search trials: 1
Line s

***** Iteration #172 *****
Loss: 154187.405613
Feature norm: 464.778848
Error norm: 1855.997149
Active features: 112350
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.631

***** Iteration #173 *****
Loss: 154176.857156
Feature norm: 464.980963
Error norm: 3764.888161
Active features: 112246
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.827

***** Iteration #174 *****
Loss: 154117.377949
Feature norm: 465.172207
Error norm: 1605.337866
Active features: 112228
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.762

***** Iteration #175 *****
Loss: 154108.435123
Feature norm: 465.363741
Error norm: 3618.840242
Active features: 112082
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.782

***** Iteration #176 *****
Loss: 154050.481711
Feature norm: 465.553945
Error norm: 1626.544302
Active features: 112017
Line search trials: 1
Line s

***** Iteration #212 *****
Loss: 152976.097111
Feature norm: 470.827818
Error norm: 1386.593271
Active features: 109039
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.721

***** Iteration #213 *****
Loss: 152970.171295
Feature norm: 470.915005
Error norm: 2969.721763
Active features: 108945
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.659

***** Iteration #214 *****
Loss: 152929.888699
Feature norm: 471.010810
Error norm: 1321.236145
Active features: 108896
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.644

***** Iteration #215 *****
Loss: 152929.235738
Feature norm: 471.104350
Error norm: 3040.506297
Active features: 108828
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.691

***** Iteration #216 *****
Loss: 152885.004320
Feature norm: 471.196509
Error norm: 1162.459795
Active features: 108783
Line search trials: 1
Line s

***** Iteration #253 *****
Loss: 152218.572594
Feature norm: 473.300325
Error norm: 2266.599209
Active features: 105811
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.751

***** Iteration #254 *****
Loss: 152193.750245
Feature norm: 473.344517
Error norm: 1251.235412
Active features: 105741
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.665

***** Iteration #255 *****
Loss: 152183.546729
Feature norm: 473.380518
Error norm: 1820.930266
Active features: 105718
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.620

***** Iteration #256 *****
Loss: 152166.267803
Feature norm: 473.426828
Error norm: 1383.491836
Active features: 105648
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.688

***** Iteration #257 *****
Loss: 152156.233380
Feature norm: 473.465090
Error norm: 1874.946327
Active features: 105603
Line search trials: 1
Line s

***** Iteration #295 *****
Loss: 151691.057886
Feature norm: 474.769396
Error norm: 1757.103328
Active features: 103078
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.933

***** Iteration #296 *****
Loss: 151676.825221
Feature norm: 474.800139
Error norm: 1167.714058
Active features: 102997
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.840

***** Iteration #297 *****
Loss: 151671.466193
Feature norm: 474.841889
Error norm: 1862.015691
Active features: 102960
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.827

***** Iteration #298 *****
Loss: 151655.113146
Feature norm: 474.870439
Error norm: 1053.503779
Active features: 102905
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.730

***** Iteration #299 *****
Loss: 151651.294009
Feature norm: 474.910195
Error norm: 1871.210915
Active features: 102852
Line search trials: 1
Line s

***** Iteration #334 *****
Loss: 151326.969049
Feature norm: 476.131465
Error norm: 1008.466756
Active features: 100993
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.737

***** Iteration #335 *****
Loss: 151323.870210
Feature norm: 476.172151
Error norm: 1626.637083
Active features: 100905
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.717

***** Iteration #336 *****
Loss: 151311.502354
Feature norm: 476.207468
Error norm: 922.962754
Active features: 100869
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.690

***** Iteration #337 *****
Loss: 151309.104248
Feature norm: 476.245421
Error norm: 1636.227311
Active features: 100799
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.863

***** Iteration #338 *****
Loss: 151296.178825
Feature norm: 476.278675
Error norm: 844.737696
Active features: 100736
Line search trials: 1
Line sea

***** Iteration #379 *****
Loss: 151022.988942
Feature norm: 477.365175
Error norm: 457.671554
Active features: 98813
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 15.596

***** Iteration #380 *****
Loss: 151019.777749
Feature norm: 477.389598
Error norm: 1166.662094
Active features: 98655
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.903

***** Iteration #381 *****
Loss: 151015.588537
Feature norm: 477.426935
Error norm: 1571.080288
Active features: 98604
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.948

***** Iteration #382 *****
Loss: 151005.338396
Feature norm: 477.452632
Error norm: 1098.251072
Active features: 98554
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.983

***** Iteration #383 *****
Loss: 151001.704645
Feature norm: 477.489321
Error norm: 1483.762858
Active features: 98570
Line search trials: 1
Line search

***** Iteration #419 *****
Loss: 150813.487467
Feature norm: 478.309989
Error norm: 417.684495
Active features: 97184
Line search trials: 2
Line search step: 0.500000
Seconds required for this iteration: 15.313

***** Iteration #420 *****
Loss: 150811.622151
Feature norm: 478.331071
Error norm: 1047.877533
Active features: 97066
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.624

***** Iteration #421 *****
Loss: 150808.450531
Feature norm: 478.360953
Error norm: 1389.144716
Active features: 97039
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.676

***** Iteration #422 *****
Loss: 150800.141334
Feature norm: 478.380995
Error norm: 869.689908
Active features: 97019
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.794

***** Iteration #423 *****
Loss: 150798.070474
Feature norm: 478.411026
Error norm: 1280.164981
Active features: 97071
Line search trials: 1
Line search 

***** Iteration #461 *****
Loss: 150643.142138
Feature norm: 479.168536
Error norm: 1013.305266
Active features: 95712
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.616

***** Iteration #462 *****
Loss: 150638.227927
Feature norm: 479.187027
Error norm: 731.211107
Active features: 95679
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.722

***** Iteration #463 *****
Loss: 150636.271741
Feature norm: 479.210290
Error norm: 1061.232958
Active features: 95675
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.724

***** Iteration #464 *****
Loss: 150630.852037
Feature norm: 479.225224
Error norm: 662.939215
Active features: 95616
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 7.607

***** Iteration #465 *****
Loss: 150629.280618
Feature norm: 479.246285
Error norm: 1076.870517
Active features: 95564
Line search trials: 1
Line search s

Storing the model
Number of active features: 94411 (588510)
Number of active attributes: 63545 (1983731)
Number of active labels: 31 (31)
Writing labels
Writing attributes
Writing feature references for transitions
Writing feature references for attributes
Seconds required: 0.980



In [56]:
#debug
c1, c2 = 0.5, 0.0

if args.dataset_name_or_path=='lst20' and args.label_col=='ner_tags':
    res = evaluate_crf(f'{args.dataset_name_or_path}_{args.label_col}_best_{c1}_{c2}.model',
                                              x_test, y_test, tag_labels[:-1]) #test set of lst20 does not have E_TTL
    print(res['classification_report'])
else:
    res = evaluate_crf(f'{args.dataset_name_or_path}_{args.label_col}_best_{c1}_{c2}.model',
                                              x_test, y_test, tag_labels)
    print(res['classification_report'])

HBox(children=(FloatProgress(value=0.0, max=621.0), HTML(value='')))


              precision    recall  f1-score   support

        DATE     0.8758    0.8221    0.8481       163
       EMAIL     1.0000    1.0000    1.0000         1
         LAW     0.9000    0.6000    0.7200        15
         LEN     0.9412    0.8000    0.8649        20
    LOCATION     0.7747    0.6770    0.7226       452
       MONEY     1.0000    0.9138    0.9550        58
ORGANIZATION     0.8550    0.7400    0.7934       550
     PERCENT     0.9375    0.9375    0.9375        16
      PERSON     0.8816    0.7941    0.8356       272
       PHONE     0.7500    0.6000    0.6667        10
        TIME     0.8154    0.6235    0.7067        85
         URL     1.0000    0.8571    0.9231         7
         ZIP     1.0000    0.5000    0.6667         2

   micro avg     0.8458    0.7408    0.7898      1651
   macro avg     0.9024    0.7589    0.8185      1651
weighted avg     0.8450    0.7408    0.7889      1651



In [57]:
pd.DataFrame.from_dict(res,orient='index').transpose()

Unnamed: 0,accuracy,f1_micro,precision_micro,recall_micro,f1_macro,precision_macro,recall_macro,nb_samples,classification_report
0,0.94819,0.789797,0.845781,0.740763,0.818462,0.902403,0.758858,621,precision recall f1-score ...
