In [1]:
import sys
import os
import fastText

from validation import compute_f1
import conlleval

from keras.models import load_model
from keras.callbacks import ModelCheckpoint, Callback

import models
import utils

import shutil, json

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  return f(*args, **kwds)


In [2]:
#%env CUDA_DEVICE_ORDER=PCI_BUS_ID
#%env CUDA_VISIBLE_DEVICES=2

In [3]:
from keras_contrib.layers import CRF
def create_custom_objects():
    instanceHolder = {"instance": None}
    class ClassWrapper(CRF):
        def __init__(self, *args, **kwargs):
            instanceHolder["instance"] = self
            super(ClassWrapper, self).__init__(*args, **kwargs)
    def loss(*args):
        method = getattr(instanceHolder["instance"], "loss_function")
        return method(*args)
    def accuracy(*args):
        method = getattr(instanceHolder["instance"], "accuracy")
        return method(*args)
    return {"ClassWrapper": ClassWrapper ,"CRF": ClassWrapper, "loss": loss, "accuracy":accuracy}

In [60]:
testSentences = utils.get_sentences_germeval('../../Resources/GermEVAL/NER-de-test.tsv')
print(len(testSentences))


5100


In [5]:
models.ft = fastText.load_model("../../fastText/wiki.de.bin")

In [6]:
def load_model_indexes(indexes_file):
    indexMappings = json.load(open(indexes_file, "r"))
    models.idx2Label = {int(k):v for k,v in indexMappings[0].items()}
    models.label2Idx = indexMappings[1]
    models.char2Idx = indexMappings[2]
    models.case2Idx = indexMappings[3]
    models.max_sequence_length = 56

# Test Model

In [7]:
load_model_indexes('model_lstm_germeval_v2.0.h5.indexes')
finalmodel = load_model('model_lstm_germeval_v2.0.h5', custom_objects=create_custom_objects())

In [8]:
true_labels, pred_labels = utils.predict_sequences(finalmodel, testSentences)
print(compute_f1(pred_labels, true_labels, models.idx2Label))

(0.8325358851674641, 0.8167691809647135, 0.8245771713375276)


# Official script eval CoNLL

In [17]:
import conlleval

eval_file = 'test_pl.tsv'
write_conll_file(true_labels, pred_labels, testSentences, models.idx2Label, eval_file) 
p,r,f = evaluate_conll_file(eval_file) 

processed 101599 tokens with 6178 phrases; found: 6063 phrases; correct: 5047.
accuracy:  81.27%; (non-O)
accuracy:  97.73%; precision:  83.24%; recall:  81.69%; FB1:  82.46
              LOC: precision:  85.47%; recall:  89.98%; FB1:  87.66  1796
         LOCderiv: precision:  86.61%; recall:  93.40%; FB1:  89.88  605
          LOCpart: precision:  76.62%; recall:  54.13%; FB1:  63.44  77
              ORG: precision:  77.52%; recall:  75.57%; FB1:  76.53  1121
         ORGderiv: precision:  50.00%; recall:  12.50%; FB1:  20.00  2
          ORGpart: precision:  67.04%; recall:  69.77%; FB1:  68.38  179
              OTH: precision:  72.20%; recall:  57.39%; FB1:  63.95  554
         OTHderiv: precision:  64.86%; recall:  61.54%; FB1:  63.16  37
          OTHpart: precision:  64.29%; recall:  21.43%; FB1:  32.14  14
              PER: precision:  90.64%; recall:  91.03%; FB1:  90.84  1646
         PERderiv: precision:  37.50%; recall:  27.27%; FB1:  31.58  8
          PERpart: precisio

In [18]:
print(p)
print(r)
print(f)

83.24261916542966
81.693104564584
82.46058328567928


## Averages

In [48]:
import numpy as np

In [61]:
file_base_name = 'model_lstm_germeval_v2.'
eval_file = 'test_pl.tsv'
all_f1_scores = []

for run_i in range(10):
    model_file = file_base_name + str(run_i) + '.h5'
    index_file = file_base_name + str(run_i) + '.h5.indexes'
    if not os.path.isfile(model_file):
        print(model_file + ' not found')
        continue
    if not os.path.isfile(index_file):
        print(model_file + ' not found')
        continue
    
    print('Evaluating model ' + model_file)
    
    load_model_indexes(index_file)
    finalmodel = load_model(model_file, custom_objects=create_custom_objects())
    
    true_labels, pred_labels = utils.predict_sequences(finalmodel, testSentences)
    
    conlleval.write_conll_file(true_labels, pred_labels, testSentences, models.idx2Label, eval_file) 
    p,r,f = conlleval.evaluate_conll_file(eval_file) 
    all_f1_scores.append(f)

print(all_f1_scores)
print(np.mean(all_f1_scores))

Evaluating model model_lstm_germeval_v2.0.h5
processed 101599 tokens with 6178 phrases; found: 6063 phrases; correct: 5047.
accuracy:  81.27%; (non-O)
accuracy:  97.73%; precision:  83.24%; recall:  81.69%; FB1:  82.46
              LOC: precision:  85.47%; recall:  89.98%; FB1:  87.66  1796
         LOCderiv: precision:  86.61%; recall:  93.40%; FB1:  89.88  605
          LOCpart: precision:  76.62%; recall:  54.13%; FB1:  63.44  77
              ORG: precision:  77.52%; recall:  75.57%; FB1:  76.53  1121
         ORGderiv: precision:  50.00%; recall:  12.50%; FB1:  20.00  2
          ORGpart: precision:  67.04%; recall:  69.77%; FB1:  68.38  179
              OTH: precision:  72.20%; recall:  57.39%; FB1:  63.95  554
         OTHderiv: precision:  64.86%; recall:  61.54%; FB1:  63.16  37
          OTHpart: precision:  64.29%; recall:  21.43%; FB1:  32.14  14
              PER: precision:  90.64%; recall:  91.03%; FB1:  90.84  1646
         PERderiv: precision:  37.50%; recall:  27.27

ValueError: not enough values to unpack (expected 2, got 1)

In [45]:
# import importlib
# importlib.reload(conlleval)
# importlib.reload(utils)

<module 'conlleval' from '/srv/microNER/scripts/conlleval.py'>

# Official script eval Germeval

In [19]:
idx_1st = models.idx2Label

In [24]:
print(models.label2Idx)
print(idx_1st)

{'O': 1, 'B-LOC': 7, 'I-LOC': 18, 'B-OTHpart': 2, 'B-PERderiv': 9, 'B-PER': 15, 'B-OTHderiv': 3, 'B-ORGpart': 6, 'B-LOCpart': 11, 'B-PERpart': 12, 'PADDING_TOKEN': 0, 'I-OTH': 16, 'B-ORG': 4, 'I-LOCderiv': 10, 'B-OTH': 13, 'B-ORGderiv': 5, 'I-PER': 8, 'B-LOCderiv': 17, 'I-ORG': 14}
{0: 'PADDING_TOKEN', 1: 'I-LOC', 2: 'B-ORGderiv', 3: 'I-ORGderiv', 4: 'I-OTH', 5: 'I-ORGpart', 6: 'B-OTHderiv', 7: 'I-OTHpart', 8: 'I-PERderiv', 9: 'O', 10: 'B-PER', 11: 'B-PERderiv', 12: 'B-LOC', 13: 'I-PERpart', 14: 'B-ORGpart', 15: 'I-ORG', 16: 'B-LOCpart', 17: 'I-LOCpart', 18: 'B-ORG', 19: 'I-PER', 20: 'I-OTHderiv', 21: 'B-LOCderiv', 22: 'I-LOCderiv', 23: 'B-PERpart', 24: 'B-OTHpart', 25: 'B-OTH'}


In [40]:
testSentences_2nd = utils.get_sentences_germeval('../../Resources/GermEVAL/NER-de-test.tsv', level2=True)
load_model_indexes('model_lstm_germeval_2nd-level.h5.indexes')
finalmodel_2nd = load_model('model_lstm_germeval_2nd-level.h5', custom_objects=create_custom_objects())
true_labels_2nd, pred_labels_2nd = utils.predict_sequences(finalmodel_2nd, testSentences_2nd)
print(compute_f1(pred_labels_2nd, true_labels_2nd, models.idx2Label))
idx_2nd = models.idx2Label

(0.5938864628820961, 0.5281553398058253, 0.5590955806783146)


In [41]:
eval_file = 'test_pl.tsv'
conlleval.write_germeval_file(true_labels, pred_labels, true_labels_2nd, pred_labels_2nd, testSentences, idx_1st, idx_2nd, eval_file) 