# Notebook: Count each observation/suggestion as separate training examples

In [11]:
import json
import pandas as pd
from collections import Counter, defaultdict

In [2]:
with open('res/reading_examples.json') as f:
    reading_examples = json.load(f)

In [16]:
with open('.obs.tsv.tmp') as f:
    for line in f:
        obs_categories = sorted(line.strip().split('\t'))
        break
with open('.sug.tsv.tmp') as f:
    for line in f:
        sug_categories = sorted(line.strip().split('\t'))
        break

In [33]:
obs_table = ['\t'.join(obs_categories)]
sug_table = ['\t'.join(sug_categories)]
for ex in reading_examples:
    for ev in ex['Evaluations']:
        obs_cat_to_val = defaultdict(int)
        sug_cat_to_val = defaultdict(int)
        for obs, comments in ev['Observations'].items():
            if comments[0] == 'NEUTRAL':
                continue
            polarity_score = 1 if comments[0] == 'POSITIVE' else -1
            for c in comments[1:]:
                obs_cat_to_val[c] += polarity_score
        obs_cat_to_val_for_this_example = [
            str(obs_cat_to_val[o])
            for o in obs_categories
        ]
        assert len(obs_cat_to_val_for_this_example) == len(obs_categories)
        obs_table.append('\t'.join(obs_cat_to_val_for_this_example))
        for sug, comments in ev['Suggestions'].items():
            for c in comments:
                sug_cat_to_val[c] += 1
        sug_cat_to_val_for_this_example = [
            str(sug_cat_to_val[s])
            for s in sug_categories
        ]
        assert len(sug_cat_to_val_for_this_example) == len(sug_categories)
        sug_table.append('\t'.join(sug_cat_to_val_for_this_example))

In [37]:
with open('.obs.tsv.tmp.by_evaluation', 'w') as f:
    for l in obs_table:
        print(l, file=f)

In [38]:
with open('.sug.tsv.tmp.by_evaluation', 'w') as f:
    for l in sug_table:
        print(l, file=f)