In [None]:
# CONFIGURATION

db = 'semparse_cdr_lite'
PARALLELISM = 1
MAX_DOCS = 1500
SPLITS = 3

# LFs
SOURCE = 'py'
INCLUDE = []
MAX_LFS = None

# Supervision
MODEL_DEP = True
MAJORITY_VOTE = False
TRADITIONAL = False
DISPLAY_CORRELATION = True
EMPIRICAL_FROM_TRAIN = False

# Discriminative
SEARCH_N = 1
N_EPOCHS = 5
LR = 0.01
L1_PENALTY = 0.0
L2_PENALTY = 0.0
PRINT_FREQ = 5

In [None]:
import os

# os.environ['SNORKELDB'] = 'postgres://localhost:5432/' + db

from snorkel import SnorkelSession
session = SnorkelSession()

In [None]:
from snorkel.models import candidate_subclass
ChemicalDisease = candidate_subclass('ChemicalDisease', ['chemical', 'disease'])

In [None]:
from snorkel.semantic import CDRModel
sm = CDRModel(session,\
              candidate_class=ChemicalDisease,\
              traditional=TRADITIONAL,\
              splits=SPLITS,\
              parallelism=PARALLELISM)

In [None]:
%time sm.parse(max_docs=MAX_DOCS)

In [None]:
%time sm.extract()

In [None]:
%time sm.load_gold()

In [None]:
%time sm.featurize()

In [None]:
%time sm.generate_lfs(source=SOURCE, include=INCLUDE, max_lfs=MAX_LFS)

In [None]:
%time sm.label()

In [None]:
%time sm.supervise(model_dep=MODEL_DEP,\
                   majority_vote=MAJORITY_VOTE,\
                   display_correlation=DISPLAY_CORRELATION,\
                   empirical_from_train=EMPIRICAL_FROM_TRAIN)

In [None]:
%time sm.classify(model='logreg', search_n=SEARCH_N, n_epochs=N_EPOCHS,\
            lr=LR, l1_penalty=L1_PENALTY, l2_penalty=L2_PENALTY,\
            print_freq=PRINT_FREQ)