In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
DOMAIN = 'semparse_cdr'
# DOMAIN = 'semparse_cdr' # test, spouse, semparse_cdr

In [3]:
import sys
import os

os.environ['SNORKELDB'] = 'postgres:///{}'.format(DOMAIN)

from snorkel import SnorkelSession
session = SnorkelSession()

from snorkel.models import candidate_subclass
if DOMAIN in ['test', 'spouse']:
    Spouse = candidate_subclass('Spouse', ['person1', 'person2'])
    candidate_class = Spouse
elif DOMAIN == 'semparse_cdr':
    ChemicalDisease = candidate_subclass('ChemicalDisease', ['chemical', 'disease'])
    candidate_class = ChemicalDisease
else:
    raise Exception("Invalid DOMAIN.")

In [None]:
candidates = session.query(candidate_class).all()
print("Candidates: {}".format(len(candidates)))

Candidates: 13780


In [None]:
%%time

if DOMAIN == 'test':
    user_lists = {
        'colors':['red','green','blue'],
        'bluebird':['blue','bird','fly'],
        'greek':['alpha','beta','gamma'],
        'letters':['a','B','C'],
        'smalls':['a','b','c','d'],
        'spouse':['wife','husband','spouse']}
elif DOMAIN == 'spouse':
    user_lists = {
        'spouse':['wife','husband','spouse'],
        'family':['father', 'mother', 'brother', 'sister']}
elif DOMAIN == 'semparse_cdr':
    import bz2
    import cPickle
    if 'ctd_unspecified' not in globals():
        with bz2.BZ2File('data/ctd.pkl.bz2', 'rb') as ctd_f:
            ctd_unspecified, ctd_therapy, ctd_marker = cPickle.load(ctd_f)
    user_lists = {
        'uncertain': ['combin', 'possible', 'unlikely'],
        'causal': ['causes', 'caused', 'induce', 'induces', 'induced', 'associated with'],
        'treat': ['treat', 'effective', 'prevent', 'resistant', 'slow', 'promise', 'therap'],
        'procedure': ['inject', 'administrat'],
        'patient': ['in a patient with', 'in patients with'],
        'weak': ['none', 'although', 'was carried out', 'was conducted', 'seems', 
                 'suggests', 'risk', 'implicated', 'the aim', 'to investigate',
                 'to assess', 'to study'],
        'ctd_unspecified': ctd_unspecified,
        'ctd_therapy': ctd_therapy,
        'ctd_marker': ctd_marker,
    }

In [None]:
from semparse_examples import get_examples

examples = get_examples(DOMAIN, candidates)

In [None]:
from snorkel.semantic import (get_left_phrases, get_right_phrases, 
                              get_between_phrases, get_sentence_phrases)
c = candidates[1]
print c[0].get_parent()._asdict()['text']
print c

In [None]:
# print c[0].get_attrib_tokens(a='words')
# c[0].get_attrib_tokens(a='entity_cids')

In [None]:
# for p in get_left_phrases(c[0], cmp='.leq', num=3, n_max=3):
#     print getattr(p, 'text')

In [None]:
# c[0].get_parent()._asdict()

In [None]:
# from snorkel.viewer import SentenceNgramViewer
# sv = SentenceNgramViewer(candidates[:300], session)
# sv

In [None]:
# c = sv.get_selected()
# print c
# print c[0].get_parent()._asdict()['pos_tags']
# print c[0].get_parent()._asdict()['ner_tags']

In [None]:
# hash(c)

In [None]:
# examples[0].display()

In [None]:
# examples[28].candidate[0].get_parent()

In [None]:
from snorkel.semantic import SemanticParser

%time sp = SemanticParser(candidate_class, user_lists, absorb=False)

In [None]:
%time results = sp.evaluate(examples,\
                            show_everything=False,\
                            show_explanation=False,\
                            show_candidate=False,\
                            show_sentence=False,\
                            show_parse=False,\
                            show_passing=False,\
                            show_correct=False,\
                            pseudo_python=False,\
                            only=[])
results

In [None]:
import numpy as np
print np.sum(results, axis=0)

In [None]:
(correct, passing, failing, redundant, erroring, unknown) = sp.LFs
LFs = correct + passing
print len(LFs)

In [None]:
from snorkel.semantic import sem_to_str

sem = ('.root', ('.label', ('.bool', True), ('.and', ('.any', ('.map', ('.in', ('.extract_text', ('.between', ('.list', ('.arg', ('.int', 1)), ('.arg', ('.int', 2)))))), ('.user_list', ('.string', u'causal')))), ('.not', ('.call', ('.in', ('.extract_text', ('.between', ('.list', ('.arg', ('.int', 1)), ('.arg', ('.int', 2)))))), ('.string', u'not'))))))
print sem
print sem_to_str(sem)

In [None]:
# sp.grammar.print_chart(nested=True)

In [None]:
# sp.grammar.print_grammar()