In [1]:
import pandas as pd
import numpy as np
from csv import DictReader, DictWriter
from utils.data_helper import get_markable_dataframe, get_embedding_variables
from functools import reduce

from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import load_model
from sklearn.metrics import classification_report, accuracy_score

In [2]:
np.random.seed(26061997)

In [3]:
embedding_indexes_file_path = 'helper_files/embedding/embedding_indexes.txt'
indexed_embedding_file_path = 'helper_files/embedding/indexed_embedding.txt'

word_vector, embedding_matrix, idx_by_word, word_by_idx = get_embedding_variables(embedding_indexes_file_path, indexed_embedding_file_path)

In [4]:
data_testing_file_path = "data/testing/markables.csv"

data = get_markable_dataframe(data_testing_file_path, word_vector, idx_by_word)

data.head()

Unnamed: 0,id,text,is_pronoun,entity_type,is_proper_name,is_first_person,previous_words,next_words,is_singleton
0,1916,"[1263, 1264, 1968, 1395]",0,"[0, 0, 0, 1, 0, 1, 0, 0, 0, 0]",1,0,[],"[999, 379, 1161, 213, 27, 1263, 1969, 1188, 14...","[1.0, 0.0]"
1,1917,[213],1,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0]",0,0,"[1263, 1264, 1968, 1395, 999, 379, 1161]","[27, 1263, 1969, 1188, 1470, 25, 1161, 63, 424...","[1.0, 0.0]"
2,1918,"[1263, 1969, 1188]",0,"[0, 0, 0, 1, 0, 0, 0, 0, 0, 1]",1,0,"[1263, 1264, 1968, 1395, 999, 379, 1161, 213, 27]","[1470, 25, 1161, 63, 424, 1223, 25, 1415, 1161...","[1.0, 0.0]"
3,1919,"[1470, 25, 1161]",0,"[0, 1, 0, 0, 0, 1, 0, 0, 0, 0]",0,0,"[1968, 1395, 999, 379, 1161, 213, 27, 1263, 19...","[63, 424, 1223, 25, 1415, 1161, 876, 344, 213,...","[0.0, 1.0]"
4,1920,[424],0,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,"[1161, 213, 27, 1263, 1969, 1188, 1470, 25, 11...","[1223, 25, 1415, 1161, 876, 344, 213, 406, 122...","[0.0, 1.0]"


In [5]:
max_text_length = 10
max_prev_words_length = 10
max_next_words_length = 10

data_text = pad_sequences(data.text, maxlen=max_text_length, padding='post')
data_previous_words = pad_sequences(data.previous_words.map(lambda seq: seq[(-1*max_prev_words_length):]), maxlen=max_prev_words_length, padding='pre')
data_next_words = pad_sequences(data.next_words.map(lambda seq: seq[:max_next_words_length]), maxlen=max_next_words_length, padding='post')
data_syntactic = data[['is_pronoun', 'entity_type', 'is_proper_name', 'is_first_person']]

data_syntactic = np.array(list(map(lambda p: reduce(lambda x,y: x + y, [i if type(i) is list else [i] for i in p]), data_syntactic.values)))
label = np.vstack(data.is_singleton)

# Load Model

In [6]:
words_model = load_model('models/singleton_classifiers/words.model')
context_model = load_model('models/singleton_classifiers/context.model')
syntactic_model = load_model('models/singleton_classifiers/syntactic.model')
words_context_model = load_model('models/singleton_classifiers/words_context.model')
words_syntactic_model = load_model('models/singleton_classifiers/words_syntactic.model')
context_syntactic_model = load_model('models/singleton_classifiers/context_syntactic.model')
words_context_syntactic_model = load_model('models/singleton_classifiers/words_context_syntactic.model')

W0314 11:24:22.308352 140304210511680 deprecation.py:506] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/initializers.py:111: calling RandomUniform.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0314 11:24:22.309836 140304210511680 deprecation.py:506] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/initializers.py:135: calling RandomNormal.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0314 11:24:22.310751 140304210511680 deprecation.py:506] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/init_ops.py:96: calling Zeros.__init__ (from tensorflow.python.ops.init_ops) 

# Test Model

In [7]:
def get_classes(output, threshold=0.5):
    return list(map(lambda x: 1 if x[1] > threshold else 0, output))

def evaluate(label, pred, threshold=0.5):
    label = get_classes(label)
    pred = get_classes(pred, threshold)
    
    print('threshold %f:' % threshold)
    print(classification_report(label, pred))
    print('accuracy: %f' % accuracy_score(label, pred))

def evaluate_all(label, pred):
    for i in range(1, 10):
        evaluate(label, pred, i*0.1)

## Words Model

In [8]:
words_pred = words_model.predict([data_text])

In [9]:
evaluate_all(label, words_pred)

threshold 0.100000:
             precision    recall  f1-score   support

          0       0.80      0.33      0.47       105
          1       0.91      0.99      0.95       693

avg / total       0.89      0.90      0.88       798

accuracy: 0.901003
threshold 0.200000:
             precision    recall  f1-score   support

          0       0.80      0.35      0.49       105
          1       0.91      0.99      0.95       693

avg / total       0.90      0.90      0.89       798

accuracy: 0.903509
threshold 0.300000:
             precision    recall  f1-score   support

          0       0.75      0.36      0.49       105
          1       0.91      0.98      0.94       693

avg / total       0.89      0.90      0.88       798

accuracy: 0.899749
threshold 0.400000:
             precision    recall  f1-score   support

          0       0.72      0.36      0.48       105
          1       0.91      0.98      0.94       693

avg / total       0.88      0.90      0.88       798

acc

## Context Model

In [10]:
context_pred = context_model.predict([data_previous_words, data_next_words])

In [11]:
evaluate_all(label, context_pred)

threshold 0.100000:
             precision    recall  f1-score   support

          0       0.66      0.22      0.33       105
          1       0.89      0.98      0.94       693

avg / total       0.86      0.88      0.86       798

accuracy: 0.882206
threshold 0.200000:
             precision    recall  f1-score   support

          0       0.59      0.37      0.46       105
          1       0.91      0.96      0.93       693

avg / total       0.87      0.88      0.87       798

accuracy: 0.883459
threshold 0.300000:
             precision    recall  f1-score   support

          0       0.54      0.38      0.45       105
          1       0.91      0.95      0.93       693

avg / total       0.86      0.88      0.87       798

accuracy: 0.875940
threshold 0.400000:
             precision    recall  f1-score   support

          0       0.51      0.40      0.45       105
          1       0.91      0.94      0.93       693

avg / total       0.86      0.87      0.86       798

acc

## Syntactic Model

In [12]:
syntactic_pred = syntactic_model.predict([data_syntactic])

In [13]:
evaluate_all(label, syntactic_pred)

threshold 0.100000:
             precision    recall  f1-score   support

          0       0.00      0.00      0.00       105
          1       0.87      1.00      0.93       693

avg / total       0.75      0.87      0.81       798

accuracy: 0.868421
threshold 0.200000:
             precision    recall  f1-score   support

          0       0.00      0.00      0.00       105
          1       0.87      1.00      0.93       693

avg / total       0.75      0.87      0.81       798

accuracy: 0.868421
threshold 0.300000:
             precision    recall  f1-score   support

          0       0.00      0.00      0.00       105
          1       0.87      1.00      0.93       693

avg / total       0.75      0.87      0.81       798

accuracy: 0.868421
threshold 0.400000:
             precision    recall  f1-score   support

          0       0.00      0.00      0.00       105
          1       0.87      1.00      0.93       693

avg / total       0.75      0.87      0.81       798

acc

  'precision', 'predicted', average, warn_for)


## Words + Context Model

In [14]:
words_context_pred = words_context_model.predict([data_text, data_previous_words, data_next_words])

In [15]:
evaluate_all(label, words_context_pred)

threshold 0.100000:
             precision    recall  f1-score   support

          0       0.88      0.41      0.56       105
          1       0.92      0.99      0.95       693

avg / total       0.91      0.91      0.90       798

accuracy: 0.914787
threshold 0.200000:
             precision    recall  f1-score   support

          0       0.85      0.49      0.62       105
          1       0.93      0.99      0.96       693

avg / total       0.92      0.92      0.91       798

accuracy: 0.921053
threshold 0.300000:
             precision    recall  f1-score   support

          0       0.76      0.55      0.64       105
          1       0.93      0.97      0.95       693

avg / total       0.91      0.92      0.91       798

accuracy: 0.918546
threshold 0.400000:
             precision    recall  f1-score   support

          0       0.76      0.55      0.64       105
          1       0.93      0.97      0.95       693

avg / total       0.91      0.92      0.91       798

acc

## Words + Syntactic Model

In [16]:
words_syntactic_pred = words_syntactic_model.predict([data_text, data_syntactic])

In [17]:
evaluate_all(label, words_syntactic_pred)

threshold 0.100000:
             precision    recall  f1-score   support

          0       0.87      0.38      0.53       105
          1       0.91      0.99      0.95       693

avg / total       0.91      0.91      0.90       798

accuracy: 0.911028
threshold 0.200000:
             precision    recall  f1-score   support

          0       0.87      0.39      0.54       105
          1       0.91      0.99      0.95       693

avg / total       0.91      0.91      0.90       798

accuracy: 0.912281
threshold 0.300000:
             precision    recall  f1-score   support

          0       0.82      0.40      0.54       105
          1       0.92      0.99      0.95       693

avg / total       0.90      0.91      0.90       798

accuracy: 0.909774
threshold 0.400000:
             precision    recall  f1-score   support

          0       0.81      0.44      0.57       105
          1       0.92      0.98      0.95       693

avg / total       0.91      0.91      0.90       798

acc

## Context + Syntactic Model

In [18]:
context_syntactic_pred = context_syntactic_model.predict([data_previous_words, data_next_words, data_syntactic])

In [19]:
evaluate_all(label, context_syntactic_pred)

threshold 0.100000:
             precision    recall  f1-score   support

          0       0.65      0.21      0.32       105
          1       0.89      0.98      0.93       693

avg / total       0.86      0.88      0.85       798

accuracy: 0.880952
threshold 0.200000:
             precision    recall  f1-score   support

          0       0.66      0.30      0.41       105
          1       0.90      0.98      0.94       693

avg / total       0.87      0.89      0.87       798

accuracy: 0.887218
threshold 0.300000:
             precision    recall  f1-score   support

          0       0.66      0.33      0.44       105
          1       0.91      0.97      0.94       693

avg / total       0.87      0.89      0.87       798

accuracy: 0.889724
threshold 0.400000:
             precision    recall  f1-score   support

          0       0.64      0.35      0.45       105
          1       0.91      0.97      0.94       693

avg / total       0.87      0.89      0.87       798

acc

## Words + Context + Syntactic Model

In [20]:
words_context_syntactic_pred = words_context_syntactic_model.predict([
    data_text, data_previous_words, data_next_words, data_syntactic])

In [21]:
evaluate_all(label, words_context_syntactic_pred)

threshold 0.100000:
             precision    recall  f1-score   support

          0       0.78      0.53      0.63       105
          1       0.93      0.98      0.95       693

avg / total       0.91      0.92      0.91       798

accuracy: 0.918546
threshold 0.200000:
             precision    recall  f1-score   support

          0       0.75      0.57      0.65       105
          1       0.94      0.97      0.95       693

avg / total       0.91      0.92      0.91       798

accuracy: 0.918546
threshold 0.300000:
             precision    recall  f1-score   support

          0       0.75      0.58      0.66       105
          1       0.94      0.97      0.95       693

avg / total       0.91      0.92      0.92       798

accuracy: 0.919799
threshold 0.400000:
             precision    recall  f1-score   support

          0       0.74      0.59      0.66       105
          1       0.94      0.97      0.95       693

avg / total       0.91      0.92      0.91       798

acc

# Richer Markable Data with Predicted Singleton (for coreference resolution testing purpose)

In [22]:
rich_data_testing_file_path = "data/testing/markables_with_predicted_singleton.csv"

In [24]:
with open(data_testing_file_path, "r") as orifile:
    oricsv = DictReader(orifile)
    
    with open(rich_data_testing_file_path, "w") as newfile:
        newcsv = DictWriter(newfile, fieldnames=oricsv.fieldnames)
        
        newcsv.writeheader()
        
        for row, is_singleton in zip(oricsv, get_classes(words_syntactic_pred, 0.8)):
            newcsv.writerow({**row, 'is_singleton': is_singleton})