In [27]:
import pandas as pd
import numpy as np
from csv import DictReader, DictWriter
from utils.data_helper import get_markable_dataframe, get_embedding_variables
from functools import reduce

from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import load_model
from sklearn.metrics import classification_report, accuracy_score

In [2]:
np.random.seed(26061997)

In [3]:
embedding_indexes_file_path = 'helper_files/embedding/embedding_indexes.txt'
indexed_embedding_file_path = 'helper_files/embedding/indexed_embedding.txt'

word_vector, embedding_matrix, idx_by_word, word_by_idx = get_embedding_variables(embedding_indexes_file_path, indexed_embedding_file_path)

In [25]:
data_testing_file_path = "data/testing/markables.csv"

data = get_markable_dataframe(data_testing_file_path, word_vector, idx_by_word)

data.head()

Unnamed: 0,id,text,is_pronoun,entity_type,is_proper_name,is_first_person,num_words,previous_words,next_words,is_singleton
0,1916,"[1258, 1259, 1955, 1389]",0,"[0, 0, 0, 0, 0, 0, 1, 0, 1, 0]",1,0,4,[],"[996, 377, 1156, 212, 26, 1258, 1956, 1183, 14...","[1.0, 0.0]"
1,1917,[212],1,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0]",0,0,1,"[1258, 1259, 1955, 1389, 996, 377, 1156]","[26, 1258, 1956, 1183, 1464, 24, 1156, 62, 422...","[1.0, 0.0]"
2,1918,"[1258, 1956, 1183]",0,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 1]",1,0,3,"[1258, 1259, 1955, 1389, 996, 377, 1156, 212, 26]","[1464, 24, 1156, 62, 422, 1218, 24, 1409, 1156...","[1.0, 0.0]"
3,1919,"[1464, 24, 1156]",0,"[0, 0, 0, 0, 0, 0, 0, 1, 1, 0]",0,0,3,"[1955, 1389, 996, 377, 1156, 212, 26, 1258, 19...","[62, 422, 1218, 24, 1409, 1156, 874, 342, 212,...","[0.0, 1.0]"
4,1920,[422],0,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0]",0,0,1,"[1156, 212, 26, 1258, 1956, 1183, 1464, 24, 11...","[1218, 24, 1409, 1156, 874, 342, 212, 404, 121...","[0.0, 1.0]"


In [5]:
max_text_length = 10
max_prev_words_length = 10
max_next_words_length = 10

data_text = pad_sequences(data.text, maxlen=max_text_length, padding='post')
data_previous_words = pad_sequences(data.previous_words.map(lambda seq: seq[(-1*max_prev_words_length):]), maxlen=max_prev_words_length, padding='pre')
data_next_words = pad_sequences(data.next_words.map(lambda seq: seq[:max_next_words_length]), maxlen=max_next_words_length, padding='post')
data_syntactic = data[['is_pronoun', 'entity_type', 'is_proper_name', 'is_first_person', 'num_words']]

data_syntactic = np.array(list(map(lambda p: reduce(lambda x,y: x + y, [i if type(i) is list else [i] for i in p]), data_syntactic.values)))
label = np.vstack(data.is_singleton)

# Load Model

In [6]:
words_model = load_model('models/singleton_classifiers/words.model')
context_model = load_model('models/singleton_classifiers/context.model')
syntactic_model = load_model('models/singleton_classifiers/syntactic.model')
words_context_model = load_model('models/singleton_classifiers/words_context.model')
words_syntactic_model = load_model('models/singleton_classifiers/words_syntactic.model')
context_syntactic_model = load_model('models/singleton_classifiers/context_syntactic.model')
words_context_syntactic_model = load_model('models/singleton_classifiers/words_context_syntactic.model')

# Test Model

In [22]:
def get_classes(output, threshold=0.5):
    return list(map(lambda x: 1 if x[1] > threshold else 0, output))

def evaluate(label, pred, threshold=0.5):
    label = get_classes(label)
    pred = get_classes(pred, threshold)
    
    print('threshold %f:' % threshold)
    print(classification_report(label, pred))
    print('accuracy: %f' % accuracy_score(label, pred))

def evaluate_all(label, pred):
    for i in range(1, 10):
        evaluate(label, pred, i*0.1)

## Words Model

In [8]:
words_pred = words_model.predict([data_text])

In [9]:
evaluate_all(label, words_pred)

threshold 0.100000:
              precision    recall  f1-score   support

           0       1.00      0.17      0.29       105
           1       0.89      1.00      0.94       693

   micro avg       0.89      0.89      0.89       798
   macro avg       0.94      0.59      0.62       798
weighted avg       0.90      0.89      0.86       798

accuracy: 0.890977
threshold 0.200000:
              precision    recall  f1-score   support

           0       0.87      0.26      0.40       105
           1       0.90      0.99      0.94       693

   micro avg       0.90      0.90      0.90       798
   macro avg       0.88      0.63      0.67       798
weighted avg       0.89      0.90      0.87       798

accuracy: 0.897243
threshold 0.300000:
              precision    recall  f1-score   support

           0       0.80      0.31      0.45       105
           1       0.90      0.99      0.94       693

   micro avg       0.90      0.90      0.90       798
   macro avg       0.85      0

## Context Model

In [10]:
context_pred = context_model.predict([data_previous_words, data_next_words])

In [11]:
evaluate_all(label, context_pred)

threshold 0.100000:
              precision    recall  f1-score   support

           0       0.62      0.17      0.27       105
           1       0.89      0.98      0.93       693

   micro avg       0.88      0.88      0.88       798
   macro avg       0.75      0.58      0.60       798
weighted avg       0.85      0.88      0.85       798

accuracy: 0.877193
threshold 0.200000:
              precision    recall  f1-score   support

           0       0.55      0.21      0.30       105
           1       0.89      0.97      0.93       693

   micro avg       0.87      0.87      0.87       798
   macro avg       0.72      0.59      0.62       798
weighted avg       0.85      0.87      0.85       798

accuracy: 0.873434
threshold 0.300000:
              precision    recall  f1-score   support

           0       0.50      0.28      0.36       105
           1       0.90      0.96      0.93       693

   micro avg       0.87      0.87      0.87       798
   macro avg       0.70      0

## Syntactic Model

In [12]:
syntactic_pred = syntactic_model.predict([data_syntactic])

In [13]:
evaluate_all(label, syntactic_pred)

threshold 0.100000:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       105
           1       0.87      1.00      0.93       693

   micro avg       0.87      0.87      0.87       798
   macro avg       0.43      0.50      0.46       798
weighted avg       0.75      0.87      0.81       798

accuracy: 0.868421
threshold 0.200000:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       105
           1       0.87      1.00      0.93       693

   micro avg       0.87      0.87      0.87       798
   macro avg       0.43      0.50      0.46       798
weighted avg       0.75      0.87      0.81       798

accuracy: 0.868421
threshold 0.300000:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       105
           1       0.87      1.00      0.93       693

   micro avg       0.87      0.87      0.87       798
   macro avg       0.43      0

  'precision', 'predicted', average, warn_for)


## Words + Context Model

In [14]:
words_context_pred = words_context_model.predict([data_text, data_previous_words, data_next_words])

In [15]:
evaluate_all(label, words_context_pred)

threshold 0.100000:
              precision    recall  f1-score   support

           0       0.79      0.47      0.59       105
           1       0.92      0.98      0.95       693

   micro avg       0.91      0.91      0.91       798
   macro avg       0.86      0.72      0.77       798
weighted avg       0.91      0.91      0.90       798

accuracy: 0.913534
threshold 0.200000:
              precision    recall  f1-score   support

           0       0.75      0.50      0.60       105
           1       0.93      0.98      0.95       693

   micro avg       0.91      0.91      0.91       798
   macro avg       0.84      0.74      0.77       798
weighted avg       0.90      0.91      0.90       798

accuracy: 0.912281
threshold 0.300000:
              precision    recall  f1-score   support

           0       0.70      0.53      0.61       105
           1       0.93      0.97      0.95       693

   micro avg       0.91      0.91      0.91       798
   macro avg       0.82      0

## Words + Syntactic Model

In [16]:
words_syntactic_pred = words_syntactic_model.predict([data_text, data_syntactic])

In [17]:
evaluate_all(label, words_syntactic_pred)

threshold 0.100000:
              precision    recall  f1-score   support

           0       0.97      0.29      0.44       105
           1       0.90      1.00      0.95       693

   micro avg       0.90      0.90      0.90       798
   macro avg       0.93      0.64      0.69       798
weighted avg       0.91      0.90      0.88       798

accuracy: 0.904762
threshold 0.200000:
              precision    recall  f1-score   support

           0       0.92      0.31      0.47       105
           1       0.91      1.00      0.95       693

   micro avg       0.91      0.91      0.91       798
   macro avg       0.91      0.65      0.71       798
weighted avg       0.91      0.91      0.89       798

accuracy: 0.906015
threshold 0.300000:
              precision    recall  f1-score   support

           0       0.75      0.34      0.47       105
           1       0.91      0.98      0.94       693

   micro avg       0.90      0.90      0.90       798
   macro avg       0.83      0

## Context + Syntactic Model

In [18]:
context_syntactic_pred = context_syntactic_model.predict([data_previous_words, data_next_words, data_syntactic])

In [19]:
evaluate_all(label, context_syntactic_pred)

threshold 0.100000:
              precision    recall  f1-score   support

           0       0.73      0.36      0.48       105
           1       0.91      0.98      0.94       693

   micro avg       0.90      0.90      0.90       798
   macro avg       0.82      0.67      0.71       798
weighted avg       0.89      0.90      0.88       798

accuracy: 0.898496
threshold 0.200000:
              precision    recall  f1-score   support

           0       0.69      0.46      0.55       105
           1       0.92      0.97      0.94       693

   micro avg       0.90      0.90      0.90       798
   macro avg       0.80      0.71      0.75       798
weighted avg       0.89      0.90      0.89       798

accuracy: 0.901003
threshold 0.300000:
              precision    recall  f1-score   support

           0       0.67      0.52      0.59       105
           1       0.93      0.96      0.95       693

   micro avg       0.90      0.90      0.90       798
   macro avg       0.80      0

## Words + Context + Syntactic Model

In [20]:
words_context_syntactic_pred = words_context_syntactic_model.predict([
    data_text, data_previous_words, data_next_words, data_syntactic])

In [21]:
evaluate_all(label, words_context_syntactic_pred)

threshold 0.100000:
              precision    recall  f1-score   support

           0       0.71      0.57      0.63       105
           1       0.94      0.96      0.95       693

   micro avg       0.91      0.91      0.91       798
   macro avg       0.82      0.77      0.79       798
weighted avg       0.91      0.91      0.91       798

accuracy: 0.912281
threshold 0.200000:
              precision    recall  f1-score   support

           0       0.69      0.61      0.65       105
           1       0.94      0.96      0.95       693

   micro avg       0.91      0.91      0.91       798
   macro avg       0.82      0.78      0.80       798
weighted avg       0.91      0.91      0.91       798

accuracy: 0.912281
threshold 0.300000:
              precision    recall  f1-score   support

           0       0.66      0.63      0.64       105
           1       0.94      0.95      0.95       693

   micro avg       0.91      0.91      0.91       798
   macro avg       0.80      0

# Richer Markable Data with Predicted Singleton (for coreference resolution testing purpose)

In [26]:
rich_data_testing_file_path = "data/testing/markables_with_predicted_singleton.csv"

In [39]:
with open(data_testing_file_path, "r") as orifile:
    oricsv = DictReader(orifile)
    
    with open(rich_data_testing_file_path, "w") as newfile:
        newcsv = DictWriter(newfile, fieldnames=oricsv.fieldnames)
        
        newcsv.writeheader()
        
        for row, is_singleton in zip(oricsv, get_classes(words_syntactic_pred, 0.6)):
            newcsv.writerow({**row, 'is_singleton': is_singleton})