# Markerville Backend

### Imports and Establishing XML File Preprocessors

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

from snorkel import SnorkelSession
session = SnorkelSession()

import os
from snorkel.parser import XMLMultiDocPreprocessor

# The following line is for testing only. Feel free to ignore it.

file_path = 'articles/training.xml'
train_preprocessor = XMLMultiDocPreprocessor(
    path=file_path,
    doc='.//article',
    text='.//front/article-meta/abstract/p/text()',
    id=  './/front/article-meta/article-id/text()'
)

file_path = 'articles/development.xml'
dev_preprocessor = XMLMultiDocPreprocessor(
    path=file_path,    
    doc='.//document',    
    text='.//passage/text/text()',    
    id='.//id/text()'
)

file_path = 'articles/testcorpus.xml'
test_preprocessor = XMLMultiDocPreprocessor(
    path=file_path,    
    doc='.//document',    
    text='.//passage/text/text()',    
    id='.//id/text()'
)



### Corpus Parsing

The CorpusParser is applied to each of the corpuses, train, dev, and test. This will break the corpuses into Documents and Sentences to be used for extraction

In [3]:
from snorkel.parser import CorpusParser
from snorkel.parser import XMLMultiDocPreprocessor, CorpusParser
from snorkel.parser.spacy_parser import Spacy

corpus_parser = CorpusParser(parser=Spacy())
corpus_parser.apply(list(train_preprocessor)) #parallelism can be run with a Postgres DBMS, but not SQLite
corpus_parser.apply(list(dev_preprocessor), clear=False)
corpus_parser.apply(list(test_preprocessor), clear=False)

NameError: name 'Spacy' is not defined

Sort the sentences into train_sents, dev_sents, and test_sents according to the document IDs (train_ids, dev_ids, test_ids)

In [3]:
from six.moves.cPickle import load
from snorkel.models import Document, Sentence
import cPickle

with open('articles/doc_ids.pkl', 'rb') as f:
    train_ids, dev_ids, test_ids = load(f)
train_ids, dev_ids, test_ids = set(train_ids), set(dev_ids), set(test_ids)
print len(train_ids)
print len(dev_ids)
print len(test_ids)
train_sents, dev_sents, test_sents = set(), set(), set()
docs = session.query(Document).order_by(Document.name).all()
for i, doc in enumerate(docs):
    for s in doc.sentences:
        if doc.name in train_ids:
            train_sents.add(s)
        elif doc.name in dev_ids:
            dev_sents.add(s)
        elif doc.name in test_ids:
            test_sents.add(s)
        else:
            raise Exception('ID <{0}> not found in any id set'.format(doc.name))
    

1905
160
30


### Candidate Extraction

Establish the "subclasses" of the Candidates to be extracted. (Type is written as typ3 because type is a keyword)

In [None]:
from snorkel.models import Candidate, candidate_subclass

BiomarkerCondition = candidate_subclass('BiomarkerCondition', ['biomarker', 'condition'])
BiomarkerType = candidate_subclass('BiomarkerType', ['biomarker', 'typ3'])
BiomarkerMedium = candidate_subclass('BiomarkerMedium', ['biomarker', 'medium'])
BiomarkerLevelUnit = candidate_subclass('BiomarkerLevelUnit', ['biomarker', 'level', 'unit'])
BiomarkerDrug = candidate_subclass('BiomarkerDrug', ['biomarker', 'drug'])

Establish the number of ngrams, or the number of words, to look for each entity. For example, ngrams = 2 means that the matchers will match up to 2 words. 

In [4]:
from snorkel.models import candidate_subclass
from snorkel.candidates import Ngrams, CandidateExtractor
from snorkel.matchers import PersonMatcher
import matchers
from snorkel.models import Document
from snorkel.viewer import SentenceNgramViewer
import os

biomarker_ngrams = Ngrams(n_max=1)
condition_ngrams = Ngrams(n_max=7)
type_ngrams = Ngrams(n_max=5)
medium_ngrams = Ngrams(n_max=5)
level_ngrams = Ngrams(n_max=8)
unit_ngrams = Ngrams(n_max=8)
drug_ngrams = Ngrams(n_max=5)





ASDFASDF
['Ym', 'yottameter', 'Zm', 'zettameter', 'Em', 'exameter', 'Pm', 'petameter', 'Tm', 'terameter', 'Gm', 'gigameter', 'Mm', 'megameter', 'km', 'kilometer', 'hm', 'hectometer', 'dam', 'dekameter', 'dm', 'decimeter', 'cm', 'centimeter', '\\u03bcm', 'micrometer', 'nm', 'nanometer', 'pm', 'picometer', 'fm', 'femtometer', 'am', 'attometer', 'zm', 'zeptometer', 'ym', 'yoctometer', 'Ym2', 'square yottameter', 'Zm2', 'square zettameter', 'Em2', 'square exameter', 'Pm2', 'square petameter', 'Tm2', 'square terameter', 'Gm2', 'square gigameter', 'Mm2', 'square megameter', 'km2', 'square kilometer', 'hm2', 'square hectometer', 'dam2', 'square dekameter', 'dm2', 'square decimeter', 'cm2', 'square centimeter', '\\u03bcm2', 'square micrometer', 'nm2', 'square nanometer', 'pm2', 'square picometer', 'fm2', 'square femtometer', 'am2', 'square attometer', 'zm2', 'square zeptometer', 'ym2', 'square yoctometer', 'Ym2', 'cubic yottameter', 'Zm2', 'cubic zettameter', 'Em2', 'cubic exameter', 'Pm2', 'c

Initialize the Matcher objects, regular expressions and dictionaries that are used to match the desired entities

In [None]:
# Create our two Matchers
bMatcher = matchers.getBiomarkerMatcher()
cMatcher = matchers.getDiseaseMatcher()
tMatcher = matchers.getTypeMatcher()
mMatcher = matchers.getMediumMatcher()
lMatcher = matchers.getLevelsMatcher()
uMatcher = matchers.getUnitsMatcher()
dMatcher = matchers.getDrugMatcher()



Create CandidateExtractors using the previously defined subclasses. These take in the matchers for each entity in the subclass and then use these matchers to generate the Candidates

In [None]:
# Building the CandidateExtractor 
bc_candidate_extractor = CandidateExtractor(BiomarkerCondition, [biomarker_ngrams, condition_ngrams], [bMatcher, cMatcher])
bt_candidate_extractor = CandidateExtractor(BiomarkerType, [biomarker_ngrams, type_ngrams], [bMatcher, tMatcher])
bm_candidate_extractor = CandidateExtractor(BiomarkerMedium, [biomarker_ngrams, medium_ngrams], [bMatcher, mMatcher])
blu_candidate_extractor = CandidateExtractor(BiomarkerLevelUnit, [biomarker_ngrams, level_ngrams, unit_ngrams], [bMatcher, lMatcher, uMatcher])
bd_candidate_extractor = CandidateExtractor(BiomarkerDrug, [biomarker_ngrams, drug_ngrams], [bMatcher, dMatcher])

extractors = [bc_candidate_extractor, bt_candidate_extractor, bm_candidate_extractor, blu_candidate_extractor, bd_candidate_extractor]

In [13]:
for extractor in extractors:
    for k, sents in enumerate([train_sents, dev_sents, test_sents]):
        extractor.apply(sents, split=k, clear=False)
        session.commit()




Running UDF...

Running UDF...

Running UDF...

Running UDF...

Running UDF...

Running UDF...

Running UDF...

Running UDF...

Running UDF...

Running UDF...

Running UDF...

Running UDF...

Running UDF...

Running UDF...

Running UDF...



In case of specificity issues, for medium, condition, and drug, grabs the adjectives in front of the entity as well. The goal is to have more specific entities, such as esophaegal cancer rather than just cancer. 

In [14]:
import candidate_adjective_fixer
import candidate_adjective_fixer_medium
import candidate_adjective_fixer_drug 

c_dev_cands = session.query(BiomarkerCondition).filter(BiomarkerCondition.split == 1).all()
m_dev_cands = session.query(BiomarkerMedium).filter(BiomarkerMedium.split == 1).all()
d_dev_cands = session.query(BiomarkerDrug).filter(BiomarkerDrug.split == 1).all()

print len(c_dev_cands)
print len(m_dev_cands)
print len(d_dev_cands)

candidate_adjective_fixer_drug.add_adj_candidate(session, BiomarkerDrug, d_dev_cands)
candidate_adjective_fixer_medium.add_adj_candidate(session, BiomarkerMedium, m_dev_cands)
candidate_adjective_fixer.add_adj_candidate(session, BiomarkerCondition, c_dev_cands)


571
369
265
 added to Decitabine
 added to orlistat
 added to decitabine
 added to decitabine
 added to oxaliplatin
 added to irinotecan
 added to gemcitabine
 added to fluorouracil
 added to oxaliplatin
 added to irinotecan
 added to gemcitabine
 added to fluorouracil
 added to oxaliplatin
 added to irinotecan
 added to gemcitabine
 added to fluorouracil
 added to oxaliplatin
 added to irinotecan
 added to gemcitabine
 added to fluorouracil
 added to sorafenib
 added to sorafenib
 added to sorafenib
 added to sorafenib
 added to sorafenib
 added to sorafenib
 added to sorafenib
 added to sorafenib
 added to pemetrexed
 added to docetaxel
 added to irinotecan
 added to pemetrexed
 added to docetaxel
 added to irinotecan
 added to pemetrexed
 added to docetaxel
 added to irinotecan
 added to pemetrexed
 added to docetaxel
 added to irinotecan
 added to estradiol
 added to estradiol
 added to estradiol
 added to estradiol
 added to estradiol
 added to estradiol
 added to estradiol
 added

Checking Biomarker: 28260649::span:1230:1231
Checking Condition: 28260649::span:162:175
1
baseline  added to serum
Checking Biomarker: 28260649::span:1103:1109
Checking Condition: 28260649::span:162:175
1
baseline  added to serum
Checking Biomarker: 28260649::span:1280:1285
Checking Condition: 28260649::span:162:175
1
 added to serum
 added to serum
 added to serum
 added to serum
 added to lymph
tumor  added to blood
Checking Biomarker: 28264009::span:273:275
Checking Condition: 28264009::span:72:82
0
Couldn't find: 28264009::span:72:82
ADDING NEW CANDIDATE
 added to astrocytes
 added to blood
dead  added to astrocytes
Checking Biomarker: 28264063::span:1257:1259
Checking Condition: 28264063::span:154:168
0
Couldn't find: 28264063::span:154:168
ADDING NEW CANDIDATE
dead  added to astrocytes
Checking Biomarker: 28264063::span:1257:1259
Checking Condition: 28264063::span:32:46
0
Couldn't find: 28264063::span:32:46
ADDING NEW CANDIDATE
pregnancy-associated  added to plasma
Checking Bioma

1
 added to lymph
 added to blood
 added to plasma
 added to blood
 added to plasma
 added to blood
 added to plasma
 added to serum
 added to serum
 added to serum
 added to Serum
retroperitoneal robot-assisted  added to lymph
Checking Biomarker: 28262537::span:54:61
Checking Condition: 28262537::span:0:35
0
Couldn't find: 28262537::span:0:35
ADDING NEW CANDIDATE
 added to CSF
 added to cerebrospinal fluid
 added to CSF
 added to CSF
 added to cerebrospinal fluid
 added to CSF
 added to cerebrospinal fluid
 added to CSF
 added to CSF
 added to cerebrospinal fluid
 added to CSF
 added to CSF
 added to cerebrospinal fluid
 added to CSF
 added to CSF
 added to cerebrospinal fluid
 added to CSF
 added to CSF
 added to cerebrospinal fluid
 added to CSF
 added to CSF
 added to cerebrospinal fluid
 added to CSF
 added to CSF
 added to cerebrospinal fluid
 added to CSF
 added to CSF
 added to cerebrospinal fluid
 added to CSF
 added to CSF
 added to cerebrospinal fluid
 added to CSF
 added to

Checking Biomarker: 28263391::span:871:872
Checking Condition: 28263391::span:70:89
0
Couldn't find: 28263391::span:70:89
ADDING NEW CANDIDATE
Checking Biomarker: 28264096::span:1760:1761
Checking Condition: 28264096::span:255:266
0
Couldn't find: 28264096::span:255:266
ADDING NEW CANDIDATE
Checking Biomarker: 28264096::span:1741:1743
Checking Condition: 28264096::span:255:266
1
Checking Biomarker: 28264096::span:1874:1875
Checking Condition: 28264096::span:255:266
1
Checking Biomarker: 28264096::span:1613:1615
Checking Condition: 28264096::span:255:266
1
Checking Biomarker: 28264096::span:1951:1952
Checking Condition: 28264096::span:255:266
1
Checking Biomarker: 28264096::span:1680:1681
Checking Condition: 28264096::span:255:266
1
Checking Biomarker: 28264096::span:1937:1938
Checking Condition: 28264096::span:255:266
1
Checking Biomarker: 28264096::span:1809:1811
Checking Condition: 28264096::span:255:266
1
Checking Biomarker: 28264096::span:1666:1667
Checking Condition: 28264096::spa

1
Checking Biomarker: 28261908::span:1009:1014
Checking Condition: 28261908::span:323:360
1
Checking Biomarker: 28261908::span:855:859
Checking Condition: 28261908::span:173:206
1
Checking Biomarker: 28261908::span:855:859
Checking Condition: 28261908::span:442:476
1
Checking Biomarker: 28261908::span:855:859
Checking Condition: 28261908::span:488:505
1
Checking Biomarker: 28261908::span:855:859
Checking Condition: 28261908::span:323:360
1
Checking Biomarker: 28260175::span:107:113
Checking Condition: 28260175::span:13:47
0
Couldn't find: 28260175::span:13:47
ADDING NEW CANDIDATE
Checking Biomarker: 28260175::span:157:160
Checking Condition: 28260175::span:13:47
1
Checking Biomarker: 28260391::span:434:440
Checking Condition: 28260391::span:86:107
0
Couldn't find: 28260391::span:86:107
ADDING NEW CANDIDATE
Checking Biomarker: 28262611::span:506:507
Checking Condition: 28262611::span:141:160
0
Couldn't find: 28262611::span:141:160
ADDING NEW CANDIDATE
Checking Biomarker: 28261345::span:

Checking Biomarker: 28261333::span:295:299
Checking Condition: 28261333::span:151:198
0
Couldn't find: 28261333::span:151:198
ADDING NEW CANDIDATE
Checking Biomarker: 28263701::span:1335:1336
Checking Condition: 28263701::span:57:86
0
Couldn't find: 28263701::span:57:86
ADDING NEW CANDIDATE
Checking Biomarker: 28261336::span:31:35
Checking Condition: 28261336::span:163:186
0
Couldn't find: 28261336::span:163:186
ADDING NEW CANDIDATE
Checking Biomarker: 28263037::span:1312:1321
Checking Condition: 28263037::span:91:116
0
Couldn't find: 28263037::span:91:116
ADDING NEW CANDIDATE
Checking Biomarker: 28263037::span:1460:1462
Checking Condition: 28263037::span:91:116
1
Checking Biomarker: 28263037::span:1332:1335
Checking Condition: 28263037::span:91:116
1
Checking Biomarker: 28260162::span:108:114
Checking Condition: 28260162::span:92:119
0
Couldn't find: 28260162::span:92:119
ADDING NEW CANDIDATE
Checking Biomarker: 28260162::span:618:624
Checking Condition: 28260162::span:97:124
0
Couldn

In [15]:
# print len(dev_cands)
print session.query(BiomarkerCondition).filter(BiomarkerCondition.split == 1).count()
session.commit()

675


### Labelling Functions

The weak supervision portion of the pipeline. Defined labelling functions 

In [None]:
import re
#from snorkel.lf_terms import *
from snorkel.lf_helpers import  *
from snorkel.lf_helpers import get_sent_candidate_spans
from snorkel.lf_helpers import get_left_tokens, get_right_tokens
from random import randint
import cPickle
from PyDictionary import PyDictionary



#umls_dict              = load_umls_dictionary()
#chemicals              = load_chemdner_dictionary()
#abbrv2text, text2abbrv = load_specialist_abbreviations()

keyWords = ["associate", "express", "marker", "biomarker", "elevated", "decreased",
            "correlation", "correlates", "found", "diagnose", "variant", "appear",
            "connect", "relate", "exhibit", "indicate", "signify", "show", "demonstrate",
            "reveal", "suggest", "evidence", "elevation", "indication", "diagnosis",
            "variation", "modification", "suggestion", "link", "derivation", "denote",
            "denotation", "demonstration", "magnification", "depression", "boost", "level",
            "advance", "augmentation", "lessening", "enhancement", "expression", "buildup",
            "diminishing", "diminishment", "reduction", "drop", "dwindling", "lowering"]

negationWords = ["not", "nor", "neither"]

toAdd = []
for keyword in keyWords:
    syns = (PyDictionary().synonym(keyword))
    if not syns == None:
        for syn in syns:
            if not syn in keyWords and not syn in toAdd:
                toAdd.append(syn)
for word in toAdd:
    keyWords.append(word)

markerDatabase = []
with open('databases/markerData.pickle', 'rb') as f:
    markerDatabase = cPickle.load(f)

    
knowAbbreviations = []
with open('databases/abbreviations.com.pkl', 'rb') as f:
    knowAbbreviations = cPickle.load(f)
    

# Biomarker Validity 

def LF_markerDatabase(c):
    if(c.biomarker.get_span() in markerDatabase):
        return 1
    else:
        return 0
def LF_abstract_titleWord(c):
    words_in_between = []
    for thing in get_between_tokens(c):
        words_in_between.append(thing)
    if(len(words_in_between) > 1 and words_in_between[0] == ":"):
        return -1

def LF_single_letter(c):
    if(len(c.biomarker.get_span()) < 2):
        return -1
    else:
        return 0

def LF_known_abs(c):
    if(c.biomarker.get_span() in knowAbbreviations):
        return -1
    
def LF_same_thing(c):
    if(c[0].get_span() == c[1].get_span()):
        return -1
    
def LF_roman_numeral(c):
    biomarker = (c.biomarker.get_span())
    unicodedata.normalize('NFKD', biomarker).encode('ascii','ignore')
    if re.match(r'((?<=\s)|(?<=^))(M{1,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})|M{0,4}(CM|CD|D?C{1,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})|M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{1,3})(IX|IV|V?I{0,3})|M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{1,3}))(?=\s)',
                biomarker):
        return -1 
    
    
# Disease Specific
    
def LF_distance(c):
    '''<TYPE> cancer'''
    x = 0
    for thing in (get_between_tokens(c)):
        x+=1
    if(x > 8):
        return -1
    else:
        return 1
    '''if(len(get_between_tokens(c)) < 8):
        return 1'''

def LF_keyword(c):
    for keyword in keyWords:
#         print keyword
        if(keyword in get_between_tokens(c)):
            if("not" in get_between_tokens(c)):
                return -1
            else:
                return 1
    return 0
    
def LF_auxpass(c):
    if not 'auxpass' in get_between_tokens(c, attrib='dep_labels'):
        return -1
    else:
        return 1
    
with open('databases/common2000.pkl', 'rb') as f:
    common2000 = cPickle.load(f)

def LF_common_2000(c):
    if(c.condition.get_span() in common2000):
        return -1

# Medium Specific
# Type Specific
# Drug Specific
# Level/Units Specific



In [None]:
BiomarkerSpecificLFs = [LF_markerDatabase, LF_abstract_titleWord, LF_single_letter, LF_known_abs, LF_same_thing, LF_roman_numeral]
DiseaseSpecificLFs = [LF_distance,  LF_keyword, LF_auxpass, LF_common_2000]
MediumSpecificLFs = []
TypeSpecificLFs = []
DrugSpecificLFs = []
LevelUnitSpecificLFs = []

In [None]:
from snorkel.annotations import LabelAnnotator
labeler = LabelAnnotator(lfs=LFs)

In [None]:
%time L_train = labeler.apply(split=0)
L_train

In [None]:
L_train.lf_stats(session)

In [None]:
from snorkel.learning.structure import DependencySelector
ds = DependencySelector()
deps = ds.select(L_train, threshold=0.1)
len(deps)

In [None]:
deps

In [None]:
deps = set()

In [None]:
from snorkel.learning import GenerativeModel

gen_model = GenerativeModel(lf_propensity=True)
gen_model.train(
    L_train, deps=deps, decay=0.95, step_size=0.1/L_train.shape[0], reg_param=0.0
)

In [None]:
train_marginals = gen_model.marginals(L_train)

In [None]:
import matplotlib.pyplot as plt
plt.hist(train_marginals, bins=20)
plt.show()

In [None]:
gen_model.learned_lf_stats()

In [None]:
from snorkel.annotations import save_marginals
save_marginals(session, L_train, train_marginals)

In [None]:
from load_external_annotations_new import load_external_labels
load_external_labels(session, BiomarkerCondition, 'Biomarker', 'Condition', 'articles/disease_gold_labels.tsv', dev_cands, annotator_name='gold')


In [None]:
from snorkel.annotations import load_gold_labels
L_gold_dev = load_gold_labels(session, annotator_name='gold', split=1)
L_gold_dev
print L_gold_dev


In [None]:
for thing in L_gold_dev:
    print thing

In [None]:
L_dev = labeler.apply_existing(split=1)
_ = gen_model.score(session, L_dev, L_gold_dev)

In [None]:
for candidate in dev_cands:
    print candidate[1].get_stable_id()

In [None]:
L_dev.lf_stats(session, L_gold_dev, gen_model.learned_lf_stats()['Accuracy'])

In [None]:
from snorkel.viewer import SentenceNgramViewer
labeled = []
for c in session.query(BiomarkerCondition).filter(BiomarkerCondition.split == 1).all():
    if LF_markerDatabase(c) == 1:
        labeled.append(c)
SentenceNgramViewer(labeled, session, n_per_page=3)

In [None]:
import numpy as np

# Load dev labels and convert to [0, 1] range
from snorkel.annotations import load_gold_labels
L_gold_dev = load_gold_labels(session, annotator_name='gold', split=1)
dev_labels = (np.ravel(L_gold_dev.todense()) + 1) / 2

In [None]:
from snorkel.annotations import FeatureAnnotator
featurizer = FeatureAnnotator()

%time F_train = featurizer.apply(split=0)
F_train

In [None]:
%%time
F_dev  = featurizer.apply_existing(split=1)
F_test = featurizer.apply_existing(split=2)

In [None]:
from snorkel.annotations import load_marginals
train_marginals = load_marginals(session, F_train, split=0)

In [None]:
from snorkel.learning import SparseLogisticRegression
disc_model = SparseLogisticRegression()

In [None]:
from snorkel.learning.utils import MentionScorer
from snorkel.learning import RandomSearch, ListParameter, RangeParameter

# Searching over learning rate
rate_param = RangeParameter('lr', 1e-6, 1e-2, step=1, log_base=10)
l1_param  = RangeParameter('l1_penalty', 1e-6, 1e-2, step=1, log_base=10)
l2_param  = RangeParameter('l2_penalty', 1e-6, 1e-2, step=1, log_base=10)

searcher = RandomSearch(session, disc_model, F_train, train_marginals, [rate_param, l1_param, l2_param], n=20)

In [None]:
from snorkel.annotations import load_gold_labels
L_gold_dev = load_gold_labels(session, annotator_name='gold', split=1)
L_gold_dev

import numpy as np
np.random.seed(1701)
searcher.fit(F_dev, L_gold_dev, n_epochs=50, rebalance=0.5, print_freq=25)

### Scoring on the test set



In [None]:
test_cands = session.query(BiomarkerCondition).filter(BiomarkerCondition.split == 2).all()
train_cands = session.query(BiomarkerCondition).filter(BiomarkerCondition.split == 0).all()

In [None]:
from load_external_annotations_new import load_external_labels
load_external_labels(session, BiomarkerCondition, 'Biomarker', 'Condition', 'articles/disease_test_labels.tsv', test_cands, annotator_name='gold')

from snorkel.annotations import load_gold_labels
L_gold_test = load_gold_labels(session, annotator_name='gold', split=2)
L_gold_test

tp, fp, tn, fn = disc_model.score(session, F_test, L_gold_test)

In [None]:
tp, fp, tn, fn = disc_model.score(session, F_test, L_gold_test)

In [None]:
print tp

In [None]:
from snorkel.contrib.rnn import reRNN

train_kwargs = {
    'lr':         0.01,
    'dim':        100,
    'n_epochs':   50,
    'dropout':    0.5,
    'rebalance':  0.25,
    'print_freq': 5
}

lstm = reRNN(seed=1701, n_threads=None)
lstm.train(train_cands, train_marginals, dev_candidates=dev_cands, dev_labels=dev_labels, **train_kwargs)

In [None]:
lstm.save("biomarkercondition.lstm")