In [None]:
import sys
import os
import fastText

from validation import compute_f1

from keras.models import load_model
from keras.callbacks import ModelCheckpoint, Callback

import models
import utils

In [None]:
%env CUDA_DEVICE_ORDER=PCI_BUS_ID
%env CUDA_VISIBLE_DEVICES=2

# Train model for level 2 annotations
Level 2 classifier takes level 1 labels as additional input

In [None]:
trainSentences = utils.get_sentences_germeval('../data/GermEVAL/NER-de-train.tsv', level2=True)
devSentences = utils.get_sentences_germeval('../data/GermEVAL/NER-de-dev.tsv', level2=True)
testSentences = utils.get_sentences_germeval('../data/GermEVAL/NER-de-test.tsv', level2=True)

# trainSentences = get_sentences('../data/CONLL/deu/deu_utf.train')
# devSentences = get_sentences('../data/CONLL/deu/deu_utf.testa')
# testSentences = get_sentences('../data/CONLL/deu/deu_utf.testb')

print(len(trainSentences))
print(len(devSentences))
print(len(testSentences))


In [2]:
print(testSentences[0])

NameError: name 'testSentences' is not defined

In [None]:
labelSet = set()
labelOuterSet = set()
characters= set()
models.max_sequence_length = 0

for dataset in [trainSentences, devSentences, testSentences]:
    for sentence in dataset:
        for word, label_outer, label_inner in sentence:
            for char in word:
                characters.add(char)
            labelSet.add(label_inner)
            labelOuterSet.add(label_outer)
        if len(sentence) > models.max_sequence_length:
            models.max_sequence_length = len(sentence)

In [None]:
print(len(labelSet))
print(len(labelOuterSet))
print(models.max_sequence_length)

In [None]:
# :: Create a mapping for the labels ::
models.label2Idx = {"PADDING_TOKEN":0}
for label in labelSet:
    models.label2Idx[label] = len(models.label2Idx)

# :: Create a mapping for the outer labels ::
models.labelOuter2Idx = {"PADDING_TOKEN":0}
for label in labelOuterSet:
    models.labelOuter2Idx[label] = len(models.labelOuter2Idx)

In [None]:
print(models.label2Idx)
print(models.labelOuter2Idx)

In [None]:
# :: Hard coded case lookup ::
models.case2Idx = {'PADDING_TOKEN':0, 'numeric': 1, 'allLower':2, 'allUpper':3, 'initialUpper':4, 'other':5, 'mainly_numeric':6, 'contains_digit': 7}

In [None]:
print(models.case2Idx)

In [None]:
print(type(trainSentences))

In [None]:
print(trainSentences[0])

In [None]:
models.char2Idx={"PADDING_TOKEN":0}
for char in characters:
    models.char2Idx[char] = len(models.char2Idx)
models.char2Idx['UNKNOWN'] = len(models.char2Idx)
print(models.char2Idx)

In [None]:
print(devSentences[0]) 
print(testSentences[0]) 

In [None]:
models.ft = fastText.load_model("../embeddings/wiki.de.bin")

In [None]:
print(models.nb_embedding_dims)
print(len(trainSentences[0]))

In [None]:
models.idx2Label = {v: k for k, v in models.label2Idx.items()}
print(len(models.label2Idx))
print(len(models.idx2Label))
models.idx2LabelOuter = {v: k for k, v in models.labelOuter2Idx.items()}
print(len(models.labelOuter2Idx))
print(len(models.idx2LabelOuter))

# Test Model

In [None]:
# import importlib
# importlib.reload(models)

In [None]:
tmp_model_filename = 'tmp_2ndlevelGermEval_3cnn_bi-lstm.h5'
# checkpoint = ModelCheckpoint(tmp_model_filename, verbose=1, save_best_only = True, monitor = 'val_acc')
history = utils.F1History(tmp_model_filename, devSet = devSentences, level2=True)
model = models.get_model_3cnn_2ndlevel()

In [None]:
# importlib.reload(utils)
# print(models.max_sequence_length)

In [None]:
model.fit_generator(
    utils.NerSequence(trainSentences, shuffle_data=True, batch_size=32, level2=True), 
    validation_data = utils.NerSequence(devSentences, batch_size=256, level2=True), 
    epochs = 10, callbacks = [history]
)

In [None]:
print(history.acc)
print(history.f1_scores)

In [None]:
model.load_weights(tmp_model_filename)

In [None]:
model.fit_generator(
    utils.NerSequence(trainSentences, shuffle_data=True, batch_size=2048, level2=True), 
    validation_data = utils.NerSequence(devSentences, batch_size=256, level2=True), 
    epochs = 5, callbacks = [history]
)

In [None]:
true_labels, pred_labels = utils.predict_sequences(model, testSentences, level2=True)
print(compute_f1(pred_labels, true_labels, models.idx2Label))

In [None]:
import shutil, json
# copy file for best run
model.save('../models/final_model_germeval_inner.h5')
with open("../models/final_model_germeval_inner.indexes", "w") as f:
    json.dump([models.idx2Label, models.label2Idx, models.char2Idx, models.case2Idx, models.idx2LabelOuter, models.labelOuter2Idx], f)

# Evaluate on test set

To create a realistic test setup, we include predictions from outer chunks as input for second level prediction instead of gold level outer labels.

Proceeding:
1. Load model for outer chunks (Level 1)
2. Predict outer chunks for Level 1
3. Write as "new test data file": t_id token predicted_outer gold_inner
4. Create model for level 2 based on GermEval training / dev set
5. Predict on "new test data file"
6. Official pl script performance evaluation

In [3]:
import sys
import os
import fastText

from validation import compute_f1

from keras.models import load_model
from keras.callbacks import ModelCheckpoint, Callback

import models
import utils

from keras_contrib.layers import CRF
def create_custom_objects():
    instanceHolder = {"instance": None}
    class ClassWrapper(CRF):
        def __init__(self, *args, **kwargs):
            instanceHolder["instance"] = self
            super(ClassWrapper, self).__init__(*args, **kwargs)
    def loss(*args):
        method = getattr(instanceHolder["instance"], "loss_function")
        return method(*args)
    def accuracy(*args):
        method = getattr(instanceHolder["instance"], "accuracy")
        return method(*args)
    return {"ClassWrapper": ClassWrapper ,"CRF": ClassWrapper, "loss": loss, "accuracy":accuracy}

finalmodel = load_model('../models/final_model_germeval_inner.h5', custom_objects=create_custom_objects())

In [11]:
# Load label mapping
import json
indexMappings = json.load(open("../models/final_model_germeval_inner.indexes", "r"))
models.idx2Label = {int(k):v for k,v in indexMappings[0].items()}
models.label2Idx = indexMappings[1]
models.char2Idx = indexMappings[2]
models.case2Idx = indexMappings[3]
models.idx2LabelOuter = {int(k):v for k,v in indexMappings[4].items()}
models.labelOuter2Idx = indexMappings[5]
models.max_sequence_length = 56
models.ft = fastText.load_model("../embeddings/wiki.de.bin")

In [12]:
testSentences = utils.get_sentences_germeval('../data/GermEVAL/NER-de-test.tsv', level2=True)

In [13]:
true_labels, pred_labels = utils.predict_sequences(finalmodel, testSentences, level2=True)
print(compute_f1(pred_labels, true_labels, models.idx2Label))

(0.7730061349693251, 0.7339805825242719, 0.752988047808765)


In [None]:
f = open('germeval_output_inner.tsv', 'w', encoding='UTF-8')
for i_sent, sent in enumerate(testSentences):
    for i_tok, tok in enumerate(sent):
        if tok[0] == 'PADDING_TOKEN':
            break
        correctlabel = models.idx2Label[true_labels[i_sent][i_tok]]
        guessedlabel = models.idx2Label[pred_labels[i_sent][i_tok]]
        line = "\t".join([str(i_tok+1), tok[0], correctlabel, correctlabel, guessedlabel, guessedlabel])
        f.write(line + '\n')
    f.write('\n')
f.close