# Evaluating model results after introducing token-based difference scores.

- No regard for token frames.

- Six difference scores constructed by averaging sequence differences across gold examples.

In [1]:
import sys
import json
sys.path.append('util')
import util
import pandas as pd

In [2]:
obs_categories, sug_categories = util.load_categories()

In [3]:
obs_categories, sug_categories

(['ACCURACY',
  'EXPRESSION',
  'FLUENCY',
  'MONITORING_FOR_MEANING',
  'MORPHOLOGY',
  'MULTISYLLABIC_WORDS',
  'OMISSION_INSERTION',
  'PHONICS',
  'PHRASING',
  'PRONUNCIATION',
  'PUNCTUATION',
  'RATE',
  'SELF_CORRECTION',
  'SIGHT_WORD',
  'SUBSTITUTION_REVERSAL',
  'VOCABULARY',
  'WORD_ATTACK',
  'WORD_BY_WORD',
  'WORD_ENDINGS'],
 ['ARTICULATION',
  'DIFFICULTY',
  'EXPRESSION',
  'FLUENCY',
  'MEANING_COMPRENHENSION',
  'MORPHOLOGY',
  'MULTISYLLABIC_WORDS',
  'OMISSIONS_INSERTIONS',
  'PHONICS',
  'PHRASING',
  'PRONUNCIATION',
  'PUNCTUATION',
  'RATE',
  'SELF_CORRECTION',
  'SELF_MONITOR',
  'SIGHT_WORD',
  'SUBSTITUTIONS_REVERSALS',
  'VOCABULARY',
  'VOICE',
  'WORD_ATTACK',
  'WORD_ENDINGS'])

Prepare matrices again.

In [4]:
# with open('res/reading_examples_with_token_features.json') as f:
#     reading_examples = json.load(f)
with open('res/reading_examples_with_token_features_normalized.json') as f:
    reading_examples = json.load(f)

In [5]:
diff_types = [
    'StrippedPosDiff',
    'StrippedNegDiff',
    'StrippedMinorDiff',
    'NoPausePosDiff',
    'NoPauseNegDiff',
    'NoPauseMinorDiff'
]
def fetch_diff_scores(ex):
    return [ex[t] for t in diff_types]

In [6]:
features_header = '\t'.join(obs_categories + diff_types)
labels_header = '\t'.join(sug_categories)

In [7]:
def combine_as_example(obs_scores, sug_scores, diff_scores):
    ret_obs = [obs_scores[c] for c in obs_categories]
    ret_sug = [sug_scores[c] for c in sug_categories]
    return '\t'.join([str(x) for x in ret_obs + diff_scores]), '\t'.join([str(x) for x in ret_sug])

First try featurizing by evaluation, later try by example

In [8]:
def ternerize(d):
    for k, v in d.items():
        if v > 1:
            d[k] = 1
        if v < -1:
            d[k] = -1
    return d

In [18]:
features = []
labels = []
for ex in reading_examples:
    diff_scores = fetch_diff_scores(ex)
    for ev in ex['Evaluations']:
        obs_scores = {c: 0 for c in obs_categories}
        sug_scores = {c: 0 for c in sug_categories}
        for statement, categories in ev['Observations'].items():
            if categories[0] == 'NEUTRAL':
                continue
            polarity = 1 if categories[0] == 'POSITIVE' else -1
            for category in categories[1:]:
                obs_scores[category] += polarity
        for statement, categories in ev['Suggestions'].items():
            for category in categories:
                sug_scores[category] += 1
        ts_features, ts_labels = combine_as_example(obs_scores, ternerize(sug_scores), diff_scores)
        features.append(ts_features)
        labels.append(ts_labels)

In [19]:
len(features), len(labels)

(222, 222)

In [16]:
# with open('res/features_by_ex_with_normalized_token_scores.tsv', 'w') as f:
#     print(features_header, file=f)
#     for feature in features:
#         print(feature, file=f)

In [17]:
# with open('res/labels_by_ex_with_normalized_token_scores.tsv', 'w') as f:
#     print(labels_header, file=f)
#     for label in labels:
#         print(label, file=f)

In [20]:
# with open('res/features_by_eval_with_token_scores.tsv', 'w') as f:
#     print(features_header, file=f)
#     for feature in features:
#         print(feature, file=f)

In [21]:
# with open('res/labels_by_eval_with_token_scores.tsv', 'w') as f:
#     print(labels_header, file=f)
#     for label in labels:
#         print(label, file=f)

In [20]:
features = []
labels = []
for ex in reading_examples:
    diff_scores = fetch_diff_scores(ex)
    obs_scores = {c: 0 for c in obs_categories}
    sug_scores = {c: 0 for c in sug_categories}
    for ev in ex['Evaluations']:
        for statement, categories in ev['Observations'].items():
            if categories[0] == 'NEUTRAL':
                continue
            polarity = 1 if categories[0] == 'POSITIVE' else -1
            for category in categories[1:]:
                obs_scores[category] += polarity
        for statement, categories in ev['Suggestions'].items():
            for category in categories:
                sug_scores[category] += 1
    ts_features, ts_labels = combine_as_example(ternerize(obs_scores), ternerize(sug_scores), diff_scores)
    features.append(ts_features)
    labels.append(ts_labels)

In [21]:
# with open('res/features_by_ex_with_normalized_token_scores.tsv', 'x') as f:
#     print(features_header, file=f)
#     for feature in features:
#         print(feature, file=f)

In [22]:
# with open('res/labels_by_ex_with_normalized_token_scores.tsv', 'x') as f:
#     print(labels_header, file=f)
#     for label in labels:
#         print(label, file=f)

In [12]:
# with open('res/features_by_ex_with_token_scores.tsv', 'w') as f:
#     print(features_header, file=f)
#     for feature in features:
#         print(feature, file=f)

In [13]:
# with open('res/labels_by_ex_with_token_scores.tsv', 'w') as f:
#     print(labels_header, file=f)
#     for label in labels:
#         print(label, file=f)

By evaluation (n = 222)

In [18]:
ev_features_df = pd.read_csv(
    'res/features_by_eval_with_token_scores.tsv',
    sep='\t'
)
ev_labels_df = pd.read_csv(
    'res/labels_by_eval_with_token_scores.tsv',
    sep='\t'
)