In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

from snorkel import SnorkelSession

session = SnorkelSession()

from snorkel.models import candidate_subclass

ChemicalDisease = candidate_subclass('ChemicalDisease', ['chemical', 'disease'])

train_cands = session.query(ChemicalDisease).filter(ChemicalDisease.split == 0).all()
dev_cands = session.query(ChemicalDisease).filter(ChemicalDisease.split == 1).all()

In [None]:
import bz2
from six.moves.cPickle import load

with bz2.BZ2File('data/ctd.pkl.bz2', 'rb') as ctd_f:
    ctd_unspecified, ctd_therapy, ctd_marker = load(ctd_f)
    
    
def cand_in_ctd_unspecified(c):
    return 1 if c.get_cids() in ctd_unspecified else 0

def cand_in_ctd_therapy(c):
    return 1 if c.get_cids() in ctd_therapy else 0

def cand_in_ctd_marker(c):
    return 1 if c.get_cids() in ctd_marker else 0

def LF_in_ctd_unspecified(c):
    if(cand_in_ctd_unspecified(c)==1):
        return (-1,1)
    else:
        return (0,0)

def LF_in_ctd_therapy(c):
    if(cand_in_ctd_therapy(c)==1):
        return (-1,1)
    else:
        return (0,0)

def LF_in_ctd_marker(c):
    if(cand_in_ctd_marker(c)==1):
        return (1,1)
    else:
        return (0,0)

In [None]:
from __future__ import division

def LF_closer_chem(c):
    # Get distance between chemical and disease
    chem_start, chem_end = c.chemical.get_word_start(), c.chemical.get_word_end()
    dis_start, dis_end = c.disease.get_word_start(), c.disease.get_word_end()
    if dis_start < chem_start:
        dist = chem_start - dis_end
    else:
        dist = dis_start - chem_end
    # Try to find chemical closer than @dist/2 in either direction
    sent = c.get_parent()
    closest_other_chem = float('inf')
    #print("LF_CHEM",len(sent.words),dis_end + dist // 2)
    for i in range(dis_end, min(len(sent.words), dis_end + dist // 2)):
        et, cid = sent.entity_types[i], sent.entity_cids[i]
        if et == 'Chemical' and cid != sent.entity_cids[chem_start]:
            return (-1,1)
    for i in range(max(0, dis_start - dist // 2), dis_start):
        et, cid = sent.entity_types[i], sent.entity_cids[i]
        if et == 'Chemical' and cid != sent.entity_cids[chem_start]:
            return (-1,1)
    return (0,0)

def LF_closer_dis(c):
    # Get distance between chemical and disease
    chem_start, chem_end = c.chemical.get_word_start(), c.chemical.get_word_end()
    dis_start, dis_end = c.disease.get_word_start(), c.disease.get_word_end()
    if dis_start < chem_start:
        dist = chem_start - dis_end
    else:
        dist = dis_start - chem_end
    # Try to find chemical disease than @dist/8 in either direction
    sent = c.get_parent()
    for i in range(chem_end, min(len(sent.words), chem_end + dist // 8)):
        et, cid = sent.entity_types[i], sent.entity_cids[i]
        if et == 'Disease' and cid != sent.entity_cids[dis_start]:
            return (-1,1)
    for i in range(max(0, chem_start - dist // 8), chem_start):
        et, cid = sent.entity_types[i], sent.entity_cids[i]
        if et == 'Disease' and cid != sent.entity_cids[dis_start]:
            return (-1,1)
    return (0,0)

In [None]:
from load_external_annotations import load_external_labels
load_external_labels(session, ChemicalDisease, split=1, annotator='gold')

from snorkel.annotations import load_gold_labels
L_gold_dev = load_gold_labels(session, annotator_name='gold', split=1)
L_gold_dev

In [None]:
#gold_labels_dev = [x[0,0] for x in L_gold_dev.todense()]
#for i,L in enumerate(gold_labels_dev):
#    print(i,gold_labels_dev[i])

gold_labels_dev = []
for i,L in enumerate(L_gold_dev):
    gold_labels_dev.append(L[0,0])
    
    
print(len(gold_labels_dev))
print(gold_labels_dev.count(1),gold_labels_dev.count(-1))

In [None]:
from gensim.parsing.preprocessing import STOPWORDS
import gensim.matutils as gm

from gensim.models.keyedvectors import KeyedVectors

# Load pretrained model (since intermediate data is not included, the model cannot be refined with additional data)
model = KeyedVectors.load_word2vec_format('../glove_w2v.txt', binary=False)  # C binary format


wordvec_unavailable= set()
def write_to_file(wordvec_unavailable):
    with open("wordvec_unavailable.txt","w") as f:
        for word in wordvec_unavailable:
            f.write(word+"\n")

def preprocess(tokens):
    btw_words = [word for word in tokens if word not in STOPWORDS]
    btw_words = [word for word in btw_words if word.isalpha()]
    return btw_words

def get_word_vectors(btw_words): # returns vector of embeddings of words
    word_vectors= []
    for word in btw_words:
        try:
            word_v = np.array(model[word])
            word_v = word_v.reshape(len(word_v),1)
            #print(word_v.shape)
            word_vectors.append(model[word])
        except:
            wordvec_unavailable.add(word)
    return word_vectors

def get_similarity(word_vectors,target_word): # sent(list of word vecs) to word similarity
    similarity = 0
    target_word_vector = 0
    try:
        target_word_vector = model[target_word]
    except:
        wordvec_unavailable.add(target_word+" t")
        return similarity
    target_word_sparse = gm.any2sparse(target_word_vector,eps=1e-09)
    for wv in word_vectors:
        wv_sparse = gm.any2sparse(wv, eps=1e-09)
        similarity = max(similarity,gm.cossim(wv_sparse,target_word_sparse))
    return similarity


In [None]:
# ##### Continuous ################

# softmax_Threshold = 0.3
# LF_Threshold = 0.3

# import re
# from snorkel.lf_helpers import (
#     get_left_tokens, get_right_tokens, get_between_tokens,
#     get_text_between, get_tagged_text,
# )

# import re
# from snorkel.lf_helpers import (
#     get_tagged_text,
#     rule_regex_search_tagged_text,
#     rule_regex_search_btw_AB,
#     rule_regex_search_btw_BA,
#     rule_regex_search_before_A,
#     rule_regex_search_before_B,
# )

# def ltp(x):
#     return '(' + '|'.join(x) + ')'

# causal = ['induced', 'caused', 'due','associated with']

# def LF_induce(c):
#     return (1,1) if re.search(r'{{A}}.{0,20}induc.{0,20}{{B}}', get_tagged_text(c), flags=re.I) else (0,0)

# def LF_causal(c):
#     sc = 0
#     word_vectors = get_word_vectors(get_between_tokens(c))
#     for w in causal:
#         sc=max(sc,get_similarity(word_vectors,w))
#     if(re.search('{{A}}.{0,50}(not|no|none).{0,20}' + ltp(causal) + '.{0,50}{{B}}', get_tagged_text(c), re.I)):
#         return (0,0)
#     else:
#         return (1,sc)
    
# def LF_induce_name(c):
#     return (1,1) if 'induc' in c.chemical.get_span().lower() else (0,0)   

    
# def LF_c_induced_d(c):
#     return (1,1) if (
#         ('{{A}} {{B}}' in get_tagged_text(c)) and 
#         (('-induc' in c[0].get_span().lower()) or ('-assoc' in c[0].get_span().lower()))
#         ) else (0,0)

    
# treat = ['treat', 'effective', 'prevent', 'resistant', 'slow', 'promise', 'therap']

# def LF_treat(c):
#     global LF_Threshold
#     sc = 0
#     word_vectors = get_word_vectors(get_between_tokens(c))
#     for w in treat:
#         sc=max(sc,get_similarity(word_vectors,w))
#     if(re.search('{{A}}.{0,50}(not|no|none).{0,20}' + ltp(treat) + '.{0,50}{{B}}', get_tagged_text(c), re.I)):
#         return (0,0)
#     else:
#         return (-1,sc)
    
# def LF_treat_d(c):
#     sc = 0
#     word_vectors = get_word_vectors(get_left_tokens(c[1]))
#     for w in treat:
#         sc=max(sc,get_similarity(word_vectors,w))
#     if(re.search('(not|no|none) .{0,50} {{B}}', get_tagged_text(c), re.I)):
#         return (0,0)
#     else:
#         return (-1,sc)
    
# def LF_c_d(c):
#     return (1,1) if ('{{A}} {{B}}' in get_tagged_text(c)) else (0,0)

    
# pat_terms = ['in a patient with ', 'in patients with']
# def LF_in_patient_with(c):
#     return (-1,1) if re.search(ltp(pat_terms) + '{{B}}', get_tagged_text(c), flags=re.I) else (0,0)

# uncertain = ['combin', 'possible', 'unlikely']

# def LF_uncertain(c):
#     sc = 0
#     word_vectors = get_word_vectors(get_left_tokens(c[1]))
#     for w in uncertain:
#         sc=max(sc,get_similarity(word_vectors,w))
#     if(re.search('(not|no|none) .{0,50} {{B}}', get_tagged_text(c), re.I)):
#         return (0,0)
#     else:
#         return (-1,sc)
    
# def LF_far_c_d(c):
#     if(rule_regex_search_btw_AB(c, '.{100,5000}', -1)==-1):
#         return (-1,1)
#     else:
#         return (0,0)

# def LF_far_d_c(c):
#     if(rule_regex_search_btw_BA(c, '.{100,5000}', -1)==-1):
#         return (-1,1)
#     else:
#         return (0,0)
    
# def LF_develop_d_following_c(c):
#     sc1 = 0
#     sc2 = 0
#     word_vectors = get_word_vectors(get_left_tokens(c[1]))
#     sc1=max(sc1,get_similarity(word_vectors,'develop'))
    
#     word_vectors = get_word_vectors(get_between_tokens(c))
#     sc2=max(sc2,get_similarity(word_vectors,'following'))
    
#     sc = (sc1+sc2)/2
#     if(re.search('(not|no|none) .{0,50} {{B}}', get_tagged_text(c), re.I)):
#         return (0,0)
#     else:
#         return (1,sc)
    

# def LF_risk_d(c):
#     sc = 0
#     word_vectors = get_word_vectors(get_left_tokens(c[1]))
#     sc=max(sc,get_similarity(word_vectors,'risk'))
#     if(re.search(' (not|no|none) .{0,50}{{B}}', get_tagged_text(c), re.I)):
#         return (0,0)
#     else:
#         return (1,sc)
    
# def LF_improve_before_disease(c):
#     sc = 0
#     word_vectors = get_word_vectors(get_left_tokens(c[1]))
#     sc=max(sc,get_similarity(word_vectors,'improve'))
#     for nw in Not:
#         if nw in get_between_tokens(c):
#             return (0,0)
#     else:
#         return (1,sc)
    
# procedure, following = ['inject', 'administrate'], ['following']
# def LF_d_following_c(c):
#     sc1 = 0
#     sc2 = 0
#     word_vectors = get_word_vectors(get_between_tokens(c))
#     for w in following:
#         sc1=max(sc1,get_similarity(word_vectors,w))
    
#     word_vectors = get_word_vectors(get_right_tokens(c[1]))
#     for w in procedure:
#         sc2=max(sc2,get_similarity(word_vectors,w))
    
#     sc = (sc1+sc2)/2
#     return (1,sc)

# def LF_measure(c):
#     sc = 0
#     word_vectors = get_word_vectors(get_left_tokens(c[0]))
#     sc=max(sc,get_similarity(word_vectors,'measure'))
#     return (-1,sc)
    
# def LF_level(c):
#     sc = 0
#     word_vectors = get_word_vectors(get_right_tokens(c[0]))
#     sc=max(sc,get_similarity(word_vectors,'level'))
#     return (-1,sc)

# def LF_neg_d(c):
#     return (-1,1) if re.search('(none|not|no) .{0,25}{{B}}', get_tagged_text(c), flags=re.I) else (0,0)

    
# WEAK_PHRASES = ['none', 'although', 'was carried out', 'was conducted',
#                 'seems', 'suggests', 'risk', 'implicated',
#                'aim', 'investigate','assess','study']


# def LF_weak_assertions(c):
#     sc = 0
#     word_vectors = get_word_vectors(get_left_tokens(c[1]))
#     for w in WEAK_PHRASES:
#         sc=max(sc,get_similarity(word_vectors,w))
#     for nw in Not:
#         if nw in get_between_tokens(c):
#             return (0,0)
#     else:
#         return (-1,sc)


In [None]:
import re
from snorkel.lf_helpers import (
    get_tagged_text,
    rule_regex_search_tagged_text,
    rule_regex_search_btw_AB,
    rule_regex_search_btw_BA,
    rule_regex_search_before_A,
    rule_regex_search_before_B,
)

from snorkel.lf_helpers import (
    get_left_tokens, get_right_tokens, get_between_tokens,
    get_text_between, get_tagged_text,
)

# List to parenthetical
def ltp(x):
    return '(' + '|'.join(x) + ')'

def LF_induce(c):
    return (1,1) if re.search(r'{{A}}.{0,20}induc.{0,20}{{B}}', get_tagged_text(c), flags=re.I) else (0,0)

causal_past = ['induced', 'caused', 'due']
def LF_d_induced_by_c(c):
    return (rule_regex_search_btw_BA(c, '.{0,50}' + ltp(causal_past) + '.{0,9}(by|to).{0,50}', 1),1)

# def LF_d_induced_by_c(c):
#     sc = 0
#     word_vectors = get_word_vectors(get_between_tokens(c))
#     for w in causal_past:
#         sc=max(sc,get_similarity(word_vectors,w))
#     return (1,sc)

def LF_d_induced_by_c_tight(c):
    return (rule_regex_search_btw_BA(c, '.{0,50}' + ltp(causal_past) + ' (by|to) ', 1),1)

def LF_induce_name(c):
    return (1,1) if 'induc' in c.chemical.get_span().lower() else (0,0)     

causal = ['cause[sd]?', 'induce[sd]?', 'associated with']
def LF_c_cause_d(c):
    return (1,1) if (
        re.search(r'{{A}}.{0,50} ' + ltp(causal) + '.{0,50}{{B}}', get_tagged_text(c), re.I)
        and not re.search('{{A}}.{0,50}(not|no).{0,20}' + ltp(causal) + '.{0,50}{{B}}', get_tagged_text(c), re.I)
    ) else (0,0)

treat = ['treat', 'effective', 'prevent', 'resistant', 'slow', 'promise', 'therap']
def LF_d_treat_c(c):
    return (rule_regex_search_btw_BA(c, '.{0,50}' + ltp(treat) + '.{0,50}', -1),1)
def LF_c_treat_d(c):
    return (rule_regex_search_btw_AB(c, '.{0,50}' + ltp(treat) + '.{0,50}', -1),1)

def LF_treat_d(c):
    return (rule_regex_search_before_B(c, ltp(treat) + '.{0,50}', -1),1)


# def LF_treat_d(c):
#     sc = 0
#     word_vectors = get_word_vectors(get_left_tokens(c[1],5))
#     for w in treat:
#         sc=max(sc,get_similarity(word_vectors,w))
#     if(re.search('(not|no|none) .{0,50} {{B}}', get_tagged_text(c), re.I)):
#         return (0,0)
#     else:
#         return (-1,sc)
    
def LF_c_treat_d_wide(c):
    return (rule_regex_search_btw_AB(c, '.{0,200}' + ltp(treat) + '.{0,200}', -1),1)

def LF_c_d(c):
    return (1,1) if ('{{A}} {{B}}' in get_tagged_text(c)) else (0,0)

def LF_c_induced_d(c):
    return (1,1) if (
        ('{{A}} {{B}}' in get_tagged_text(c)) and 
        (('-induc' in c[0].get_span().lower()) or ('-assoc' in c[0].get_span().lower()))
        ) else (0,0)

def LF_improve_before_disease(c):
    return (rule_regex_search_before_B(c, 'improv.*', -1),1)

pat_terms = ['in a patient with ', 'in patients with']
def LF_in_patient_with(c):
    return (-1,1) if re.search(ltp(pat_terms) + '{{B}}', get_tagged_text(c), flags=re.I) else (0,0)

uncertain = ['combin', 'possible', 'unlikely']
def LF_uncertain(c):
    return (rule_regex_search_before_A(c, ltp(uncertain) + '.*', -1),1)

# def LF_uncertain(c):
#     sc = 0
#     word_vectors = get_word_vectors(get_left_tokens(c[1],5))
#     for w in uncertain:
#         sc=max(sc,get_similarity(word_vectors,w))
#     if(re.search('(not|no|none) .{0,50} {{B}}', get_tagged_text(c), re.I)):
#         return (0,0)
#     else:
#         return (-1,sc)
    
def LF_induced_other(c):
    return (rule_regex_search_tagged_text(c, '{{A}}.{20,1000}-induced {{B}}', -1),1)

def LF_far_c_d(c):
    return (rule_regex_search_btw_AB(c, '.{100,5000}', -1),1)

def LF_far_d_c(c):
    return (rule_regex_search_btw_BA(c, '.{100,5000}', -1),1)

def LF_risk_d(c):
    return (rule_regex_search_before_B(c, 'risk of ', 1),1)


# def LF_risk_d(c):
#     sc = 0
#     word_vectors = get_word_vectors(get_left_tokens(c[1],5))
#     sc=max(sc,get_similarity(word_vectors,'risk'))
#     return (1,sc)

def LF_develop_d_following_c(c):
    return (1,1) if re.search(r'develop.{0,25}{{B}}.{0,25}following.{0,25}{{A}}', get_tagged_text(c), flags=re.I) else (0,0)

procedure, following = ['inject', 'administrat'], ['following']
def LF_d_following_c(c):
    return (1,1) if re.search('{{B}}.{0,50}' + ltp(following) + '.{0,20}{{A}}.{0,50}' + ltp(procedure), get_tagged_text(c), flags=re.I) else (0,0)

def LF_measure(c):
    return (-1,1) if re.search('measur.{0,75}{{A}}', get_tagged_text(c), flags=re.I) else (0,0)

def LF_level(c):
    return (-1,1) if re.search('{{A}}.{0,25} level', get_tagged_text(c), flags=re.I) else (0,0)

def LF_neg_d(c):
    return (-1,1) if re.search('(none|not|no) .{0,25}{{B}}', get_tagged_text(c), flags=re.I) else (0,0)

WEAK_PHRASES = ['none', 'although', 'was carried out', 'was conducted',
                'seems', 'suggests', 'risk', 'implicated',
               'the aim', 'to (investigate|assess|study)']

WEAK_RGX = r'|'.join(WEAK_PHRASES)

def LF_weak_assertions(c):
    return (-1,1) if re.search(WEAK_RGX, get_tagged_text(c), flags=re.I) else (0,0)


In [None]:
# def LF_ctd_marker_c_d(c):
#     l,s = LF_c_d(c)
#     return (l*cand_in_ctd_marker(c),s)

# def LF_ctd_marker_induce(c):
#     l,s = LF_c_induced_d(c)
#     return (l*cand_in_ctd_marker(c),s)

# def LF_ctd_therapy_treat(c):
#     l,s = LF_treat(c)
#     return (l* cand_in_ctd_therapy(c),s)

# def LF_ctd_unspecified_treat(c):
#     l,s = LF_treat(c)
#     return (l * cand_in_ctd_unspecified(c),s)

# def LF_ctd_unspecified_induce(c):
#     l,s = LF_c_induced_d(c)
#     return (l*cand_in_ctd_unspecified(c),s)


In [None]:
def LF_ctd_marker_c_d(c):
    l,s = LF_c_d(c)
    return (l*cand_in_ctd_marker(c),s)

def LF_ctd_marker_induce(c):
    l1,s1 = LF_c_induced_d(c)
    l2,s2 = LF_d_induced_by_c_tight(c)
    return ((l1 or l2) * cand_in_ctd_marker(c),(s1*s2))

def LF_ctd_therapy_treat(c):
    l,s = LF_c_treat_d_wide(c)
    return (l* cand_in_ctd_therapy(c),s)

def LF_ctd_unspecified_treat(c):
    l,s = LF_c_treat_d_wide(c)
    return (l* cand_in_ctd_unspecified(c),s)

def LF_ctd_unspecified_induce(c):
    l1,s1 = LF_c_induced_d(c)
    l2,s2 = LF_d_induced_by_c_tight(c)
    return ((l1 or l2) * cand_in_ctd_unspecified(c),(s1*s2))

In [None]:
# import numpy as np
# import math

# LFs = [LF_in_ctd_unspecified,LF_in_ctd_marker,LF_in_ctd_therapy,LF_closer_chem, 
#        LF_closer_dis,LF_causal,LF_c_induced_d,LF_c_d,LF_in_patient_with,LF_uncertain,
#        LF_far_c_d,LF_far_d_c,LF_develop_d_following_c,LF_d_following_c,LF_measure,
#       LF_level,LF_neg_d,LF_weak_assertions,LF_ctd_marker_c_d,LF_ctd_therapy_treat,
#       LF_ctd_unspecified_treat,LF_ctd_unspecified_induce,LF_improve_before_disease,
#        LF_risk_d,LF_treat,LF_treat_d,LF_induce,LF_induce_name]


In [None]:
import numpy as np
import math

import matplotlib.pyplot as plt

LFs = [
    LF_c_cause_d,
    LF_c_d,
    LF_c_induced_d,
    LF_c_treat_d,
    LF_c_treat_d_wide,
    LF_closer_chem,
    LF_closer_dis,
    LF_ctd_marker_c_d,
    LF_ctd_marker_induce,
    LF_ctd_therapy_treat,
    LF_ctd_unspecified_treat,
    LF_ctd_unspecified_induce,
    LF_d_following_c,
    LF_d_induced_by_c,
    LF_d_induced_by_c_tight,
    LF_d_treat_c,
    LF_develop_d_following_c,
    LF_far_c_d,
    LF_far_d_c,
    LF_improve_before_disease,
    LF_in_ctd_therapy,
    LF_in_ctd_marker,
    LF_in_patient_with,
    LF_induce,
    LF_induce_name,
    LF_induced_other,
    LF_level,
    LF_measure,
    LF_neg_d,
    LF_risk_d,
    LF_treat_d,
    LF_uncertain,
    LF_weak_assertions
]

In [None]:
''' output:

    [[[L_x1],[S_x1]],
     [[L_x2],[S_x2]],
     ......
     ......
    ]

'''
def get_L_S_Tensor(cands): 
    
    L_S = []
    for ci in cands:
        L_S_ci=[]
        L=[]
        S=[]
        P_ik = []
        for LF in LFs:
            #print LF.__name__
            l,s = LF(ci)
            L.append(l)
            S.append((s+1)/2)  #to scale scores in [0,1] 
        L_S_ci.append(L)
        L_S_ci.append(S)
        L_S.append(L_S_ci) 
    return L_S

def get_L_S(cands):  # sign gives label abs value gives score
    
    L_S = []
    for ci in cands:
        l_s=[]
        for LF in LFs:
            #print LF.__name__
            l,s = LF(ci)
            s= (s+1)/2  #to scale scores in [0,1] 
            l_s.append(l*s)
        L_S.append(l_s)
    return L_S

def get_Initial_P_cap_L_S(L_S):
    P_cap = []
    for L,S in L_S:
        P_ik = []
        denominator=float(L.count(1)+L.count(-1))
        if(denominator==0):
            denominator=1
        P_ik.append(L.count(1)/denominator)
        P_ik.append(L.count(-1)/denominator)
        P_cap.append(P_ik)
    return P_cap



In [None]:
# import matplotlib.pyplot as plt
   
from sklearn.metrics import precision_recall_fscore_support

import cPickle as pkl

# dev_L_S = get_L_S_Tensor(dev_cands)
# train_L_S = get_L_S_Tensor(train_cands)
# test_L_S = get_L_S_Tensor(test_cands)


# train_P_cap= get_Initial_P_cap_L_S(train_L_S) 

# dev_P_cap = get_Initial_P_cap_L_S(dev_L_S)

# test_P_cap = get_Initial_P_cap_L_S(test_L_S)

import cPickle as pkl

# pkl.dump(dev_L_S,open("dev_L_S.p","wb"))
# pkl.dump(train_L_S,open("train_L_S.p","wb"))
# pkl.dump(test_L_S,open("test_L_S.p","wb"))

# pkl.dump(train_P_cap,open("train_P_cap.p","wb"))
# pkl.dump(dev_P_cap,open("dev_P_cap.p","wb"))
# pkl.dump(test_P_cap,open("test_P_cap.p","wb"))

In [None]:
#prepare batch data
# train_L_S_batch,dev_L_S_batch = get_L_S_batch()
# train_P_cap_batch,dev_P_cap_batch = get_P_cap_batch()

In [None]:
from sklearn.metrics import precision_recall_fscore_support

import cPickle as pkl


# pkl.dump(dev_L_S,open("dev_L_S.p","wb"))
# pkl.dump(train_L_S,open("train_L_S.p","wb"))
#pkl.dump(test_L_S,open("test_L_S.p","wb"))

#pkl.dump(train_P_cap,open("train_P_cap.p","wb"))
#pkl.dump(dev_P_cap,open("dev_P_cap.p","wb"))
#pkl.dump(test_P_cap,open("test_P_cap.p","wb"))

dev_L_S = pkl.load( open( "dev_L_S.p", "rb" ) )
train_L_S = pkl.load( open( "train_L_S.p", "rb" ) )
# test_L_S = pkl.load( open( "test_L_S.p", "rb" ) )

# train_P_cap = pkl.load( open( "train_P_cap.p", "rb" ) )
# dev_P_cap = pkl.load( open( "dev_P_cap.p", "rb" ) )
# test_P_cap = pkl.load( open( "test_P_cap.p", "rb" ) )

def get_L_S_batch():
    dev_L_batch = []
    dev_S_batch = []
    dev_L_S_batch = []
    train_L_batch = []
    train_S_batch = []
    train_L_S_batch = []
    for l,s in train_L_S:
        train_L_batch.append(l)
        train_S_batch.append(s)
    train_L_S_batch = [train_L_batch, train_S_batch]
    for l,s in dev_L_S:
        dev_L_batch.append(l)
        dev_S_batch.append(s)
    dev_L_S_batch = [dev_L_batch, dev_S_batch]
    return train_L_S_batch,dev_L_S_batch


def get_P_cap_batch():
    kp1_train= []
    kn1_train = []
    kp1_dev= []
    kn1_dev = []
    for pci in train_P_cap:
        kp1_train.append(pci[0])
        kn1_train.append(pci[1])
    for pci in dev_P_cap:
        kp1_dev.append(pci[0])
        kn1_dev.append(pci[1])
    return [kp1_train,kn1_train],[kp1_dev,kn1_dev]
        
def get_mini_batches(X,P_cap,bsize): #X : (train/dev/)_L_S_batch
    for i in range(0, len(X[0]) - bsize + 1, bsize):
        indices = slice(i, i + bsize)
        #print(indices)
        yield [X[0][indices],X[1][indices]],P_cap[indices]

# train_L_S_batch,dev_L_S_batch = get_L_S_batch()

#for x in get_mini_batches(train_L_S_batch,200):
#    print(len(x),len(x[0]),len(x[0][0]))
    


In [None]:
#dep types:

def similar(i,j,k,L,y): # L : L_S_Tensor
    if(L[i][0][j]==L[i][0][k]):
        return 1
    return 0

def similar_r(i,j,k,L,y): 
    if(L[i][0][j]==L[i][0][k]):
        return 1-abs(L[i][1][j]-L[i][1][j])
    return 0

def fixing(i,j,k,L,y):
    if(L[i][0][j]==0 and L[i][0][k]!=0):
        return -1
    if(L[i][0][j]==-y and L[i][0][k]==y):
        return 1
    return 0

def reinforcing(i,j,k,L,y):
    if(L[i][0][j]==0 and L[i][0][k]!=0):
        return -1
    if(L[i][0][j]==y and L[i][0][k]==y):
        return 1
    return 0
       
def exclusive(i,j,k,L,y):
    if(L[i][0][j]!=0 and L[i][0][k]!=0):
        return -1
    return 0
      
def phi(i,j,k,L,y):
    return L[i][0][j]*L[i][1][j]*y

dep_names = [similar_r,fixing,reinforcing,exclusive]


def similar_(lij,lik,y): # L : L_S_Tensor
    if(lij==lik):
        return 1
    return 0

def similar_r_(lij,lik,y):
    return 1-abs(lij-lik)

def fixing_(lij,lik,y):
    if(lij==0 and lik!=0):
        return -1
    if(lij==-y and lik==y):
        return 1
    return 0

def reinforcing_(lij,lik,y):
    if(lij==0 and lik!=0):
        return -1
    if(lij==y and lik==y):
        return 1
    return 0
       
def exclusive_(lij,lik,y):
    if(lij!=0 and lik!=0):
        return -1
    return 0
      
# def phi(i,j,k,L,y):
#     return L[i][0][j]*L[i][1][j]*y

dep_names_ = [similar_r_,fixing_,reinforcing_,exclusive_]


def get_L_S_Tensor(cands): 
    
    L_S = []
    for ci in cands:
        L_S_ci=[]
        L=[]
        S=[]
        P_ik = []
        for LF in LFs:
            #print LF.__name__
            l,s = LF(ci)
            L.append(l)
            S.append((s+1)/2)  #to scale scores in [0,1] 
        L_S_ci.append(L)
        L_S_ci.append(S)
        L_S.append(L_S_ci) 
    return L_S

    

In [None]:
print(len(train_L_S))
print(len(train_L_S[0]))
print(len(train_L_S[0][0]))
print(len(dev_L_S))
print(train_L_S[0][0])
print(dep_names[3])
print(dep_names[0](0,3,4,train_L_S,1))

In [None]:
for j in range(len(LFs)):
    X_train = []
    for i in range(len(train_L_S)):
        x_t = []
        for y in [-1,1]:
            x_1 = []
            for t in range(len(dep_names)):
                for k in range(len(LFs)):
                    if(j!=k):
                        x_1.append(dep_names[t](i,j,k,train_L_S,y))
            x_t.append(x_1)
        X_train.append(x_t)
    pkl.dump(X_train,open("train_X_dep_"+str(j)+".p","wb"))

In [None]:
X_dev = []
for i in range(len(dev_L_S)):
    X_dj = []
    for j in range(len(LFs)):
        x_d = []
        for y in [-1,1]:
            x_1 = []
            for t in range(len(dep_names)):
                for k in range(len(LFs)):
                    if(j!=k):
                        x_1.append(dep_names[t](i,j,k,dev_L_S,y))
            x_d.append(x_1)
        X_dj.append(x_d)
    X_dev.append(X_dj)
pkl.dump(X_dev,open("dev_X_dep.p","wb"))

In [None]:
#### training 

Thetas = []
for j in range(len(LFs)):
    train_X = pkl.load(open("train_X_dep_"+str(j)+".p", "rb" ))
#     print(len(train_X))
#     print(len(train_X[0]))
#     print(len(train_X[0][0]))
    t = train_NN(train_X)
    Thetas.append(t)
print(len(Thetas))
print(len(Thetas[0]))
pkl.dump(Thetas,open("Thetas_X_dep.p","wb"))

In [None]:
Thetas = pkl.load(open("Thetas_X_dep.p","rb")) # 33 * 160
X_dev = pkl.load(open("dev_X_dep.p", "rb" )) 
print(len(X_dev))
print(len(X_dev[0]))
print(len(X_dev[0][0]))
print(len(X_dev[0][0][0]))

In [None]:
# Algorithm 

for j in range(len(LFs)):
    X_train = []
    theta = np.random.rand((len(LFs)-1)*len(dep_names))
    for ep in range(1,11):
        for i in range(len(train_L_S)):
            x_t = []
            for t in range(len(dep_names)):
                for k in range(len(LFs)):
                    if(j!=k):
                        for la in [-1,0,1]:
                            for y in [-1,1]:
                                x_1.append(dep_names_[t](la,train_L_S[i][0][k],y))
                x_t.append(x_1)
            X_train.append(x_t)
   

In [80]:
# test 

import numpy as np
Thetas = np.array(Thetas)
X_dev = np.array(X_dev)
print(X_dev.shape)
print(X_dev[:,:,0,:].shape)
print(X_dev[:,:,1,:].shape)
print(Thetas.shape)
print()
pl = []
for x in X_dev:
    p = np.argmax([np.sum(x[:,0,:]*Thetas),np.sum(x[:,1,:]*Thetas)])
    pl.append(p)
predicted_labels = [-1 if x==0 else x for x in pl]
print(predicted_labels.count(-1),predicted_labels.count(1))
print(precision_recall_fscore_support(np.array(gold_labels_dev),np.array(predicted_labels),average='macro'))


(888, 33, 2, 160)
(888, 33, 160)
(888, 33, 160)
(33, 160)

497 391
(0.70487117075856676, 0.72719594594594594, 0.70682610142713365, None)


In [76]:
# with deps
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow.contrib.tensorboard.plugins import projector
from tensorflow.contrib.layers import l1_regularizer


def train_NN(X_train):
    print(len(X_train))
    X_val = X_train[-800:]
    X_train = X_train[:len(X_train)-800]
    print(len(X_val),len(X_train)) 
    result_dir = "./"
    config = projector.ProjectorConfig()
    tf.logging.set_verbosity(tf.logging.INFO)
    summary_writer = tf.summary.FileWriter(result_dir)

    tf.reset_default_graph()
    
    dim = 2

    _x = tf.placeholder(tf.float64,shape=(dim,len(X_train[0][0])))

    alphas = tf.get_variable('alpha', _x.get_shape()[-1],initializer=tf.truncated_normal_initializer(1,0.1,12),
                            dtype=tf.float64)

    thetas = tf.get_variable('theta', _x.get_shape()[-1],initializer=tf.truncated_normal_initializer(2,0.2,12),
                            dtype=tf.float64)

    p1,n1 = tf.unstack(_x)
                        
    prelu_out_p1 = tf.maximum(tf.subtract(p1,alphas), tf.zeros(shape=(len(X_train[0][0])),dtype=tf.float64))        

    prelu_out_n1 = tf.maximum(tf.subtract(n1,alphas), tf.zeros(shape=(len(X_train[0][0])),dtype=tf.float64))        


    phi_p1 = tf.reduce_sum(tf.multiply(prelu_out_p1,thetas))

    phi_n1 = tf.reduce_sum(tf.multiply(prelu_out_n1,thetas))

    phi_out = tf.stack([phi_n1,phi_p1])
    
    predict = tf.argmax(tf.nn.softmax(phi_out))

    loss = tf.negative(tf.reduce_logsumexp(phi_out))

    
    regularization_penalty = tf.contrib.layers.apply_regularization(l1_regularizer(1.0),[thetas])

    regularized_loss = loss + regularization_penalty 

    train_step = tf.train.GradientDescentOptimizer(0.0001).minimize(regularized_loss) 

    check_op = tf.add_check_numerics_ops()

    sess = tf.Session()
    init = tf.global_variables_initializer()
    sess.run(init)

    for i in range(1):
        c = 0
        te_prev=1
        total_te = 0
        for x in X_train:

            a,t,te_curr,_ = sess.run([alphas,thetas,loss,train_step],feed_dict={_x:x})
            total_te+=te_curr

            if(abs(te_curr-te_prev)<1e-200):
                print("too low")
                break

            if(c%2000==0):
                pl = []
                t_de=0
                for x in X_val:
                    a,t,de_curr = sess.run([alphas,thetas,loss],feed_dict={_x:x})
                    t_de+=de_curr
                print("val err:",t_de/len(X_val))
                print(total_te/2000)
                total_te=0
#                 print(a)
#                 print(t)
                print()
            c+=1
            te_prev = te_curr
#         pl = []
#         for L_S_i in dev_L_S:
#             p = sess.run(predict,feed_dict={_x:L_S_i})
#             pl.append(p)
#         predicted_labels = [-1 if x==0 else x for x in pl]
#         print(i,total_te)
#         print(predicted_labels.count(-1),predicted_labels.count(1))
#         print(precision_recall_fscore_support(np.array(gold_labels_dev),np.array(predicted_labels),average='macro'))
        t = sess.run(thetas)
        return t

In [37]:
# with deps
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow.contrib.tensorboard.plugins import projector


def train_NN(X_train):
    print(len(X_train))
    X_val = X_train[-800:]
    X_train = X_train[:len(X_train)-800]
    print(len(X_val),len(X_train)) 
    result_dir = "./"
    config = projector.ProjectorConfig()
    tf.logging.set_verbosity(tf.logging.INFO)
    summary_writer = tf.summary.FileWriter(result_dir)

    tf.reset_default_graph()
    
    dim = 2

    _x = tf.placeholder(tf.float64,shape=(dim,len(X_train[0][0])))

    alphas = tf.get_variable('alpha', _x.get_shape()[-1],initializer=tf.truncated_normal_initializer(1,0.1,12),
                            dtype=tf.float64)

    thetas = tf.get_variable('theta', _x.get_shape()[-1],initializer=tf.truncated_normal_initializer(2,0.2,12),
                            dtype=tf.float64)

    p1,n1 = tf.unstack(_x)
                        
    prelu_out_p1 = tf.maximum(tf.subtract(p1,alphas), tf.zeros(shape=(len(X_train[0][0])),dtype=tf.float64))        

    prelu_out_n1 = tf.maximum(tf.subtract(n1,alphas), tf.zeros(shape=(len(X_train[0][0])),dtype=tf.float64))        


    phi_p1 = tf.reduce_sum(tf.multiply(prelu_out_p1,thetas))

    phi_n1 = tf.reduce_sum(tf.multiply(prelu_out_n1,thetas))

    phi_out = tf.stack([phi_n1,phi_p1])
    
    predict = tf.argmax(tf.nn.softmax(phi_out))

    loss = tf.negative(tf.reduce_logsumexp(phi_out))

    train_step = tf.train.GradientDescentOptimizer(0.0001).minimize(loss) 

    check_op = tf.add_check_numerics_ops()

    sess = tf.Session()
    init = tf.global_variables_initializer()
    sess.run(init)

    for i in range(1):
        c = 0
        te_prev=1
        total_te = 0
        for x in X_train:

            a,t,te_curr,_ = sess.run([alphas,thetas,loss,train_step],feed_dict={_x:x})
            total_te+=te_curr

            if(abs(te_curr-te_prev)<1e-200):
                print("too low")
                break

            if(c%2000==0):
                pl = []
                t_de=0
                for x in X_val:
                    a,t,de_curr = sess.run([alphas,thetas,loss],feed_dict={_x:x})
                    t_de+=de_curr
                print("val err:",t_de/len(X_val))
                print(total_te/2000)
                total_te=0
#                 print(a)
#                 print(t)
                print()
            c+=1
            te_prev = te_curr
#         pl = []
#         for L_S_i in dev_L_S:
#             p = sess.run(predict,feed_dict={_x:L_S_i})
#             pl.append(p)
#         predicted_labels = [-1 if x==0 else x for x in pl]
#         print(i,total_te)
#         print(predicted_labels.count(-1),predicted_labels.count(1))
#         print(precision_recall_fscore_support(np.array(gold_labels_dev),np.array(predicted_labels),average='macro'))
        t = sess.run(thetas)
        return t

In [32]:
#All discrete except LF_d_induced_by_c + seed
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow.contrib.tensorboard.plugins import projector

def train_NN():
    print()
    result_dir = "./"
    config = projector.ProjectorConfig()
    tf.logging.set_verbosity(tf.logging.INFO)
    summary_writer = tf.summary.FileWriter(result_dir)

    tf.reset_default_graph()

    dim = 2 #(labels,scores)

    _x = tf.placeholder(tf.float64,shape=(dim,len(LFs)))

    alphas = tf.get_variable('alpha', _x.get_shape()[-1],initializer=tf.truncated_normal_initializer(1,0.1,12),
                            dtype=tf.float64)

    thetas = tf.get_variable('theta', _x.get_shape()[-1],initializer=tf.truncated_normal_initializer(2,0.2,12),
                            dtype=tf.float64)

    l,s = tf.unstack(_x)

    prelu_out_s = tf.maximum(tf.subtract(s,alphas), tf.zeros(shape=(len(LFs)),dtype=tf.float64))        

    mul_L_S = tf.multiply(l,prelu_out_s)

    phi_p1 = tf.reduce_sum(tf.multiply(mul_L_S,thetas))

    phi_n1 = tf.reduce_sum(tf.multiply(tf.negative(mul_L_S),thetas))

    phi_out = tf.stack([phi_n1,phi_p1])
    
    predict = tf.argmax(tf.nn.softmax(phi_out))

    loss = tf.negative(tf.reduce_logsumexp(phi_out))

    train_step = tf.train.GradientDescentOptimizer(0.0001).minimize(loss) 


    check_op = tf.add_check_numerics_ops()

    sess = tf.Session()
    init = tf.global_variables_initializer()
    sess.run(init)

    for i in range(1):
        c = 0
        te_prev=1
        total_te = 0
        for L_S_i in train_L_S:

            a,t,te_curr,_ = sess.run([alphas,thetas,loss,train_step],feed_dict={_x:L_S_i})
            total_te+=te_curr

            if(abs(te_curr-te_prev)<1e-200):
                print("too low")
                break

            if(c%100==0):
                pl = []
                t_de=0
                for L_S_i in dev_L_S:
                    a,t,de_curr,p = sess.run([alphas,thetas,loss,predict],feed_dict={_x:L_S_i})
                    pl.append(p)
                    t_de+=de_curr
                predicted_labels = [-1 if x==0 else x for x in pl]
                print("dev err:",t_de/888)
                print(total_te/100)
                total_te=0
                print(a)
                print(t)
                print()
                print(predicted_labels.count(-1),predicted_labels.count(1))
                print(c," d ",precision_recall_fscore_support(np.array(gold_labels_dev),np.array(predicted_labels),average='macro'))
            c+=1
            te_prev = te_curr
        pl = []
        for L_S_i in dev_L_S:
            p = sess.run(predict,feed_dict={_x:L_S_i})
            pl.append(p)
        predicted_labels = [-1 if x==0 else x for x in pl]
        print(i,total_te)
        print(predicted_labels.count(-1),predicted_labels.count(1))
        print(precision_recall_fscore_support(np.array(gold_labels_dev),np.array(predicted_labels),average='macro'))
    
train_NN()


dev err: -0.712200279855
-0.00699214594392
[ 1.11754463  0.81421453  1.00955965  1.00062431  1.10637787  1.04133855
  1.169206    1.03120785  1.14633471  1.0647233   0.98357064  1.06661405
  0.98238184  1.07858159  1.04081247  1.04223612  0.85334134  0.91993454
  1.05962482  0.83641602  1.03537624  0.94141469  0.99621468  1.16301113
  0.87373343  1.17747756  1.06808572  1.06850778  1.05747627  0.99034372
  0.82199156  0.83837747  0.96215054]
[ 2.23508926  1.62842906  2.01911931  2.00124861  2.21275574  2.0826771
  2.33841199  2.0624157   2.29266943  2.12944659  1.96714129  2.13322809
  1.96476369  2.15716318  2.08162495  2.08447223  1.70668268  1.83986908
  2.11924965  1.67283205  2.07075248  1.88287139  1.99242937  2.32602226
  1.74746687  2.35495511  2.13617144  2.13701556  2.11495255  1.98068744
  1.64398312  1.67675493  1.92430107]

433 455
0  d  (0.74865111793518258, 0.77956081081081074, 0.73813646844857261, None)
too low
0 -5.85618950661
433 455
(0.74865111793518258, 0.779560810

In [33]:
#################################################old ##############

#All discrete snorkel Obj + removed alphas
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow.contrib.tensorboard.plugins import projector

def train_NN():
    print()
    result_dir = "./"
    config = projector.ProjectorConfig()
    tf.logging.set_verbosity(tf.logging.INFO)
    summary_writer = tf.summary.FileWriter(result_dir)

    tf.reset_default_graph()

    dim = 2 #(labels,scores)

    _x = tf.placeholder(tf.float64,shape=(dim,len(LFs)))

#     alphas = tf.get_variable('alpha', _x.get_shape()[-1],initializer=tf.truncated_normal_initializer(1,0.1,1217),
#                             dtype=tf.float64)

#     alphas = tf.constant([0],dtype=tf.float64,shape=len(LFs))
    thetas = tf.get_variable('theta', _x.get_shape()[-1],initializer=tf.truncated_normal_initializer(2,1,12),
                            dtype=tf.float64)

    l,s = tf.unstack(_x)

#     prelu_out_s = tf.maximum(tf.subtract(s,alphas), tf.zeros(shape=(len(LFs)),dtype=tf.float64))        

    mul_L_S = l

    phi_p1 = tf.reduce_sum(tf.multiply(mul_L_S,thetas))

    phi_n1 = tf.reduce_sum(tf.multiply(tf.negative(mul_L_S),thetas))

    phi_out = tf.stack([phi_n1,phi_p1])
    
    predict = tf.argmax(tf.nn.softmax(phi_out))

    loss = tf.negative(tf.reduce_logsumexp(phi_out))

    train_step = tf.train.GradientDescentOptimizer(0.0001).minimize(loss) 


    check_op = tf.add_check_numerics_ops()

    sess = tf.Session()
    init = tf.global_variables_initializer()
    sess.run(init)

    for i in range(1):
        c = 0
        te_prev=1
        total_te = 0
        for L_S_i in train_L_S:

            t,te_curr,_ = sess.run([thetas,loss,train_step],feed_dict={_x:L_S_i})
            total_te+=te_curr

#             if(abs(te_curr-te_prev)<1e-200):
#                 print("too low")
#                 break

            if(c%100==0):
                pl = []
                t_de=0
                for L_S_i in dev_L_S:
                    t,de_curr,p = sess.run([thetas,loss,predict],feed_dict={_x:L_S_i})
                    pl.append(p)
                    t_de+=de_curr
                predicted_labels = [-1 if x==0 else x for x in pl]
                print("dev err:",t_de/888)
                print(total_te/100)
                total_te=0
                print(t)
                print()
                print(predicted_labels.count(-1),predicted_labels.count(1))
                print(c," d ",precision_recall_fscore_support(np.array(gold_labels_dev),np.array(predicted_labels),average='macro'))
            c+=1
            te_prev = te_curr
        pl = []
        for L_S_i in dev_L_S:
            p = sess.run(predict,feed_dict={_x:L_S_i})
            pl.append(p)
        predicted_labels = [-1 if x==0 else x for x in pl]
        print(i,total_te)
        print(predicted_labels.count(-1),predicted_labels.count(1))
        print(precision_recall_fscore_support(np.array(gold_labels_dev),np.array(predicted_labels),average='macro'))
    
train_NN()


dev err: -3.60517677736
-0.0865666342491
[ 3.17544629  0.14214532  2.09559654  2.00634306  3.0638787   2.41338549
  3.69205997  2.31207849  3.46334713  2.64733297  1.83570643  2.66614047
  1.82381844  2.78581588  2.40812473  2.42236116  0.53341341  1.19934539
  2.59624823  0.36416023  2.35386239  1.41425373  1.96214684  3.63011132
  0.73733434  3.77477556  2.6808572   2.68507779  2.57476273  1.90343721
  0.21991559  0.38377467  1.62150535]

602 286
0  d  (0.68739400134748974, 0.6841216216216216, 0.68564471843754848, None)
dev err: -3.60615247481
-2.9584399638
[ 3.17570043  0.14264532  2.09609654  2.00654305  3.06437869  2.41458216
  3.69205997  2.31257849  3.46404713  2.64753297  1.83590643  2.66674047
  1.82381844  2.78660941  2.40842472  2.42236116  0.53341341  1.19979384
  2.59769944  0.36416023  2.35484227  1.41572046  1.96214684  3.63029681
  0.73794878  3.77497554  2.6808572   2.6850738   2.57506025  1.90337955
  0.21983238  0.38377467  1.6216291 ]

602 286
100  d  (0.6873940013

dev err: -3.62961459819
-3.60372938017
[ 3.17910342  0.15551621  2.10639601  2.01593976  3.07925966  2.43869423
  3.694357    2.32510422  3.47614713  2.65603297  1.84550573  2.67674046
  1.82411755  2.79082648  2.41082471  2.42752129  0.53350954  1.21009442
  2.60905997  0.36435147  2.39024006  1.431047    1.96214684  3.63210903
  0.74825395  3.77976936  2.68104209  2.68510043  2.57661063  1.90344162
  0.2215097   0.38579574  1.63136575]

602 286
1600  d  (0.68739400134748974, 0.6841216216216216, 0.68564471843754848, None)
dev err: -3.63066623398
-3.25962385308
[ 3.17980333  0.15571613  2.10649601  2.01613975  3.07955948  2.44084626
  3.69455484  2.32530414  3.47674713  2.65603297  1.84580554  2.67734046
  1.82411755  2.79190537  2.41132471  2.42804923  0.53350954  1.21019207
  2.61080535  0.36435147  2.39091782  1.43149065  1.96214684  3.63270898
  0.74835395  3.77976936  2.68114116  2.68510043  2.57669221  1.90344162
  0.22126592  0.38581769  1.63219973]

602 286
1700  d  (0.68739400

dev err: -3.65385103993
-3.0417076459
[ 3.18306429  0.16930397  2.1177941   2.02295266  3.09136286  2.47081322
  3.69674206  2.33824693  3.48994713  2.66173297  1.85341511  2.68794046
  1.82411755  2.79621286  2.4141247   2.43170011  0.53354221  1.22171813
  2.62297117  0.36435147  2.4207447   1.44470786  1.96244041  3.63566107
  0.75791077  3.78706014  2.68279559  2.68539887  2.57843199  1.90378167
  0.22222332  0.38636307  1.64104679]

602 286
3200  d  (0.68739400134748974, 0.6841216216216216, 0.68564471843754848, None)
dev err: -3.65567075994
-4.29047286134
[ 3.18316427  0.17071383  2.1190941   2.02315266  3.09186267  2.47270087
  3.69674206  2.33974678  3.49134713  2.66213297  1.85381492  2.68874046
  1.82411755  2.79651208  2.4142247   2.43197568  0.53354221  1.22294765
  2.62366972  0.36435147  2.42292379  1.44586449  1.96244041  3.63566107
  0.75862828  3.78795823  2.68279559  2.68539887  2.57871311  1.90378167
  0.22234508  0.38636307  1.64241623]

602 286
3300  d  (0.687394001

dev err: -3.67820206916
-2.91997017934
[ 3.1870207   0.18563581  2.13099282  2.02925232  3.10272957  2.4981415
  3.70193814  2.35402727  3.50394713  2.66753296  1.85981451  2.69924045
  1.82421726  2.7994446   2.4156247   2.43545689  0.53359672  1.2381852
  2.63616273  0.36438637  2.45158202  1.46050849  1.96292659  3.63745308
  0.76907863  3.79374654  2.68416829  2.68577958  2.58054049  1.90363635
  0.22438356  0.38714339  1.65151524]

602 286
4800  d  (0.68739400134748974, 0.6841216216216216, 0.68564471843754848, None)
dev err: -3.67944049416
-3.5530666023
[ 3.18721826  0.18634547  2.13149282  2.0301523   3.10432919  2.50004316
  3.70213795  2.35471856  3.50444713  2.66823296  1.86061415  2.69964045
  1.82421726  2.79963413  2.4156247   2.43563045  0.53359672  1.23913928
  2.63636259  0.36438637  2.45242931  1.46306422  1.96292659  3.63775281
  0.76968023  3.79403811  2.68416829  2.68577958  2.58054049  1.90363635
  0.22429911  0.38714339  1.65184999]

602 286
4900  d  (0.68739400134

dev err: -3.70235733601
-3.70375898682
[ 3.19334877  0.19825932  2.1415924   2.03724791  3.11792284  2.52206073
  3.70453632  2.36631762  3.51784713  2.67583296  1.86941406  2.71024045
  1.82421726  2.80597573  2.41942469  2.43926643  0.53365218  1.25327163
  2.64385287  0.36428637  2.48811345  1.47216206  1.96302659  3.64019902
  0.77953607  3.7992286   2.68446643  2.68606318  2.58320148  1.90378109
  0.22615913  0.38783725  1.65964576]

602 286
6400  d  (0.68739400134748974, 0.6841216216216216, 0.68564471843754848, None)
dev err: -3.70440031892
-4.77495549433
[ 3.19344876  0.19884888  2.1418924   2.03804791  3.11982224  2.52395371
  3.70453632  2.36690719  3.51824713  2.67763296  1.87061406  2.71054045
  1.82421726  2.80627563  2.41952469  2.44006078  0.53365218  1.25496842
  2.64539488  0.36428637  2.49129166  1.47144973  1.96302659  3.64019902
  0.7797368   3.79982787  2.6845655   2.68606318  2.5839805   1.90378109
  0.22605919  0.38853725  1.66052475]

602 286
6500  d  (0.68739400

dev err: -3.72788181234
-3.46397383145
[ 3.19674477  0.212064    2.1520909   2.04289165  3.13176384  2.55001623
  3.70643271  2.37954     3.53114713  2.68363296  1.8789119   2.72104045
  1.82421726  2.81254049  2.42272469  2.44335448  0.53361153  1.27000863
  2.65905101  0.36511658  2.52345601  1.48184267  1.9632882   3.64341354
  0.78768145  3.8079106   2.68622455  2.68682974  2.58669367  1.90363185
  0.22762533  0.38940935  1.6705904 ]

602 286
8000  d  (0.68739400134748974, 0.6841216216216216, 0.68564471843754848, None)
dev err: -3.73019469906
-5.33904634792
[ 3.19690171  0.21326377  2.1528909   2.04369165  3.13356384  2.55175012
  3.70643271  2.38073977  3.53204713  2.68523296  1.8803119   2.72184045
  1.82421726  2.812764    2.42282469  2.44335448  0.53361153  1.2713831
  2.65923366  0.36511658  2.52769015  1.48395465  1.9632882   3.64362431
  0.78850046  3.80861053  2.68622455  2.68682974  2.58679363  1.90381931
  0.22780017  0.39040818  1.67118087]

602 286
8100  d  (0.687394001

In [34]:
#All discrete except LF_Uncertain, LF_Treat_d class- weighted
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow.contrib.tensorboard.plugins import projector

def train_NN(weight):
    print("weight:",weight)
    result_dir = "./"
    config = projector.ProjectorConfig()
    tf.logging.set_verbosity(tf.logging.INFO)
    summary_writer = tf.summary.FileWriter(result_dir)

    tf.reset_default_graph()

    dim = 2 #(labels,scores)

    _x = tf.placeholder(tf.float64,shape=(dim,len(LFs)))

    alphas = tf.get_variable('alpha', _x.get_shape()[-1],initializer=tf.truncated_normal_initializer(0,1,1324),
                            dtype=tf.float64)

    thetas = tf.get_variable('theta', _x.get_shape()[-1],initializer=tf.truncated_normal_initializer(2,0.2,1324),
                            dtype=tf.float64)

    l,s = tf.unstack(_x)

    prelu_out_s = tf.maximum(tf.subtract(s,alphas), tf.zeros(shape=(len(LFs)),dtype=tf.float64))        

    mul_L_S = tf.multiply(l,prelu_out_s)

    phi_p1 = tf.reduce_sum(tf.multiply(mul_L_S,thetas))

    phi_n1 = tf.reduce_sum(tf.multiply(tf.negative(mul_L_S),thetas))

    phi_out = tf.stack([phi_n1,phi_p1])
    
    

    
    class_weights = tf.constant([weight, 1.0 - weight],dtype=tf.float64)
   
    weighted_phi_out = tf.multiply(phi_out, class_weights) 
    
    predict = tf.argmax(tf.nn.softmax(weighted_phi_out))
    
    loss = tf.negative(tf.reduce_logsumexp(weighted_phi_out))

    train_step = tf.train.GradientDescentOptimizer(0.0001).minimize(loss) 


    check_op = tf.add_check_numerics_ops()

    sess = tf.Session()
    init = tf.global_variables_initializer()
    sess.run(init)

    for i in range(1):
        c = 0
        te_prev=1
        total_te = 0
        for L_S_i in train_L_S:

            a,t,te_curr,_ = sess.run([alphas,thetas,loss,train_step],feed_dict={_x:L_S_i})
            total_te+=te_curr

            if(abs(te_curr-te_prev)<1e-200):
                print("too low")
                break

            if(c%100==0):
                pl = []
                t_de=0
                for L_S_i in dev_L_S:
                    a,t,de_curr,p = sess.run([alphas,thetas,loss,predict],feed_dict={_x:L_S_i})
                    pl.append(p)
                    t_de+=de_curr
                predicted_labels = [-1 if x==0 else x for x in pl]
#                 print("dev err:",t_de/888)
#                 print(total_te/100)
                total_te=0
#                 print(a)
#                 print(t)
#                 print()
#                 print(predicted_labels.count(-1),predicted_labels.count(1))
#                 print(c," d ",precision_recall_fscore_support(np.array(gold_labels_dev),np.array(predicted_labels),average='macro'))
            c+=1
            te_prev = te_curr
        pl = []
        for L_S_i in dev_L_S:
            p = sess.run(predict,feed_dict={_x:L_S_i})
            pl.append(p)
        predicted_labels = [-1 if x==0 else x for x in pl]
        print(i,total_te)
#         print(predicted_labels.count(-1),predicted_labels.count(1))
#         print(precision_recall_fscore_support(np.array(gold_labels_dev),np.array(predicted_labels),average='macro'))
        p,r,f,_ = precision_recall_fscore_support(np.array(gold_labels_dev),np.array(predicted_labels),average='macro')
        return (p,r)
#train_NN()

In [35]:
#All discrete except LF_d_induced_by_c + seed
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow.contrib.tensorboard.plugins import projector

def train_NN():
    print()
    result_dir = "./"
    config = projector.ProjectorConfig()
    tf.logging.set_verbosity(tf.logging.INFO)
    summary_writer = tf.summary.FileWriter(result_dir)

    tf.reset_default_graph()

    dim = 2 #(labels,scores)

    _x = tf.placeholder(tf.float64,shape=(dim,len(LFs)))

    alphas = tf.get_variable('alpha', _x.get_shape()[-1],initializer=tf.truncated_normal_initializer(1,0.1,12),
                            dtype=tf.float64)

    thetas = tf.get_variable('theta', _x.get_shape()[-1],initializer=tf.truncated_normal_initializer(2,0.2,12),
                            dtype=tf.float64)

    l,s = tf.unstack(_x)

    prelu_out_s = tf.maximum(tf.subtract(s,alphas), tf.zeros(shape=(len(LFs)),dtype=tf.float64))        

    mul_L_S = tf.multiply(l,prelu_out_s)

    phi_p1 = tf.reduce_sum(tf.multiply(mul_L_S,thetas))

    phi_n1 = tf.reduce_sum(tf.multiply(tf.negative(mul_L_S),thetas))

    phi_out = tf.stack([phi_n1,phi_p1])
    
    predict = tf.argmax(tf.nn.softmax(phi_out))

    loss = tf.negative(tf.reduce_logsumexp(phi_out))

    train_step = tf.train.GradientDescentOptimizer(0.0001).minimize(loss) 


    check_op = tf.add_check_numerics_ops()

    sess = tf.Session()
    init = tf.global_variables_initializer()
    sess.run(init)

    for i in range(1):
        c = 0
        te_prev=1
        total_te = 0
        for L_S_i in train_L_S:

            a,t,te_curr,_ = sess.run([alphas,thetas,loss,train_step],feed_dict={_x:L_S_i})
            total_te+=te_curr

            if(abs(te_curr-te_prev)<1e-200):
                print("too low")
                break

            if(c%100==0):
                pl = []
                t_de=0
                for L_S_i in dev_L_S:
                    a,t,de_curr,p = sess.run([alphas,thetas,loss,predict],feed_dict={_x:L_S_i})
                    pl.append(p)
                    t_de+=de_curr
                predicted_labels = [-1 if x==0 else x for x in pl]
                print("dev err:",t_de/888)
                print(total_te/100)
                total_te=0
                print(a)
                print(t)
                print()
                print(predicted_labels.count(-1),predicted_labels.count(1))
                print(c," d ",precision_recall_fscore_support(np.array(gold_labels_dev),np.array(predicted_labels),average='macro'))
            c+=1
            te_prev = te_curr
        pl = []
        for L_S_i in dev_L_S:
            p = sess.run(predict,feed_dict={_x:L_S_i})
            pl.append(p)
        predicted_labels = [-1 if x==0 else x for x in pl]
        print(i,total_te)
        print(predicted_labels.count(-1),predicted_labels.count(1))
        print(precision_recall_fscore_support(np.array(gold_labels_dev),np.array(predicted_labels),average='macro'))
    
train_NN()


dev err: -0.712200279855
-0.00699214594392
[ 1.11754463  0.81421453  1.00955965  1.00062431  1.10637787  1.04133855
  1.169206    1.03120785  1.14633471  1.0647233   0.98357064  1.06661405
  0.98238184  1.07858159  1.04081247  1.04223612  0.85334134  0.91993454
  1.05962482  0.83641602  1.03537624  0.94141469  0.99621468  1.16301113
  0.87373343  1.17747756  1.06808572  1.06850778  1.05747627  0.99034372
  0.82199156  0.83837747  0.96215054]
[ 2.23508926  1.62842906  2.01911931  2.00124861  2.21275574  2.0826771
  2.33841199  2.0624157   2.29266943  2.12944659  1.96714129  2.13322809
  1.96476369  2.15716318  2.08162495  2.08447223  1.70668268  1.83986908
  2.11924965  1.67283205  2.07075248  1.88287139  1.99242937  2.32602226
  1.74746687  2.35495511  2.13617144  2.13701556  2.11495255  1.98068744
  1.64398312  1.67675493  1.92430107]

433 455
0  d  (0.74865111793518258, 0.77956081081081074, 0.73813646844857261, None)
too low
0 -5.85618950661
433 455
(0.74865111793518258, 0.779560810

In [36]:
#All discrete except  LF_d_induced_by_c
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow.contrib.tensorboard.plugins import projector

def train_NN():
    print()
    result_dir = "./"
    config = projector.ProjectorConfig()
    tf.logging.set_verbosity(tf.logging.INFO)
    summary_writer = tf.summary.FileWriter(result_dir)

    tf.reset_default_graph()

    dim = 2 #(labels,scores)

    _x = tf.placeholder(tf.float64,shape=(dim,len(LFs)))

    alphas = tf.get_variable('alpha', _x.get_shape()[-1],initializer=tf.truncated_normal_initializer(1,0.5),
                            dtype=tf.float64)

    thetas = tf.get_variable('theta', _x.get_shape()[-1],initializer=tf.truncated_normal_initializer(2,0.2),
                            dtype=tf.float64)

    l,s = tf.unstack(_x)

    prelu_out_s = tf.maximum(tf.subtract(s,alphas), tf.zeros(shape=(len(LFs)),dtype=tf.float64))        

    mul_L_S = tf.multiply(l,prelu_out_s)

    phi_p1 = tf.reduce_sum(tf.multiply(mul_L_S,thetas))

    phi_n1 = tf.reduce_sum(tf.multiply(tf.negative(mul_L_S),thetas))

    phi_out = tf.stack([phi_n1,phi_p1])
    
    predict = tf.argmax(tf.nn.softmax(phi_out))

    loss = tf.negative(tf.reduce_logsumexp(phi_out))

    train_step = tf.train.GradientDescentOptimizer(0.0001).minimize(loss) 


    check_op = tf.add_check_numerics_ops()

    sess = tf.Session()
    init = tf.global_variables_initializer()
    sess.run(init)

    for i in range(1):
        c = 0
        te_prev=1
        total_te = 0
        for L_S_i in train_L_S:

            a,t,te_curr,_ = sess.run([alphas,thetas,loss,train_step],feed_dict={_x:L_S_i})
            total_te+=te_curr

#             if(abs(te_curr-te_prev)<1e-500):
#                 print("too low")
#                 break

            if(c%100==0):
                pl = []
                t_de=0
                for L_S_i in dev_L_S:
                    a,t,de_curr,p = sess.run([alphas,thetas,loss,predict],feed_dict={_x:L_S_i})
                    pl.append(p)
                    t_de+=de_curr
                predicted_labels = [-1 if x==0 else x for x in pl]
                print("dev err:",t_de/888)
                print(total_te/100)
                total_te=0
                print(a)
                print(t)
                print()
                print(predicted_labels.count(-1),predicted_labels.count(1))
                print(c," d ",precision_recall_fscore_support(np.array(gold_labels_dev),np.array(predicted_labels),average='macro'))
            c+=1
            te_prev = te_curr
        pl = []
        for L_S_i in dev_L_S:
            p = sess.run(predict,feed_dict={_x:L_S_i})
            pl.append(p)
        predicted_labels = [-1 if x==0 else x for x in pl]
        print(i,total_te)
        print(predicted_labels.count(-1),predicted_labels.count(1))
        print(precision_recall_fscore_support(np.array(gold_labels_dev),np.array(predicted_labels),average='macro'))
    
train_NN()


dev err: -1.20590665188
-0.0385422680085
[ 1.19628046  0.9276742   1.31060039  0.16308348  0.99501871  0.94911692
  0.87817252  1.37103561  1.40198128  1.29779521  0.45856949  0.34815053
  0.79839148  0.92115982  0.73630584  1.32568734  1.86127909  1.18305477
  1.39213626  0.81193337  0.05625099  1.7474816   1.61647412  1.39171585
  0.84157517  0.94045897  0.99171402  0.66098666  0.88376345  0.39514781
  0.83746709  0.88067378  1.9023922 ]
[ 2.08271356  2.24232077  2.35752004  2.34143747  1.93734187  1.74820904
  1.81243605  1.83522727  2.06406405  2.16052834  1.62621938  2.07120538
  1.89038508  2.11583798  1.73162465  2.0658606   2.13936376  2.23522882
  1.90706103  2.06488591  1.9984338   2.25715343  2.24537339  1.88512249
  2.06115614  2.23695593  1.91862153  2.33915741  1.90807928  2.07088382
  2.20570595  1.78381309  1.83477735]

794 94
0  d  (0.64078996730800153, 0.55996621621621623, 0.54001554001553997, None)
dev err: -1.20920768972
-1.04683382798
[ 1.19628046  0.92675673  1.3

dev err: -1.24709343626
-1.33423245234
[ 1.19628046  0.91842687  1.31060039  0.15215331  0.98234163  0.93944707
  0.8767785   1.37103561  1.40198128  1.29779521  0.45068039  0.34004725
  0.79825396  0.92079265  0.73624159  1.32568734  1.86127909  1.18305477
  1.39213626  0.81193337  0.00831409  1.7474816   1.61647412  1.39171585
  0.83442307  0.93731614  0.99152858  0.66035571  0.88282672  0.3943918
  0.83494498  0.87854638  1.9023922 ]
[ 2.08271356  2.24263757  2.35752004  2.34536606  1.93741534  1.74851678
  1.81253022  1.83522727  2.06406405  2.16052834  1.62886261  2.07384697
  1.89039975  2.11585159  1.73163436  2.0658606   2.13936376  2.23522882
  1.90706103  2.06488591  2.02151164  2.25715343  2.24537339  1.88512249
  2.06171758  2.23704164  1.91862234  2.33924891  1.90813651  2.07110472
  2.20589313  1.78395654  1.83477735]

794 94
900  d  (0.64078996730800153, 0.55996621621621623, 0.54001554001553997, None)
dev err: -1.25165073042
-1.23405407094
[ 1.19628046  0.91689245  1.310

dev err: -1.29665164964
-1.33013717103
[ 1.19628046  0.91141433  1.31060039  0.13976025  0.96669601  0.92222497
  0.87546967  1.37103561  1.40198128  1.29779521  0.44247248  0.33382147
  0.79818516  0.92034683  0.73547771  1.32568734  1.86127909  1.18305477
  1.39213626  0.8116531  -0.04667258  1.7474816   1.61647412  1.39171585
  0.82957494  0.9328985   0.99149383  0.66044658  0.88171011  0.394533
  0.83157701  0.87551303  1.9023922 ]
[ 2.08271356  2.2429032   2.35752004  2.34987401  1.93762036  1.74919715
  1.81261963  1.83522727  2.06406405  2.16052834  1.63164861  2.07571155
  1.89040709  2.1158682   1.73175081  2.0658606   2.13936376  2.23522882
  1.90706103  2.06491145  2.04904459  2.25715343  2.24537339  1.88512249
  2.06211207  2.2371696   1.91862249  2.33923571  1.90820536  2.07106343
  2.20614753  1.78416544  1.83477735]

794 94
1800  d  (0.64078996730800153, 0.55996621621621623, 0.54001554001553997, None)
dev err: -1.29954064831
-1.07259500803
[ 1.19628046  0.91085349  1.310

dev err: -1.34003946701
-1.15109612231
[ 1.19628046  0.90063342  1.31060039  0.1301735   0.954924    0.90889415
  0.87463454  1.37103561  1.40198128  1.29779521  0.4350103   0.32505926
  0.79818516  0.9188522   0.734333    1.32568734  1.86127909  1.18305477
  1.39213626  0.8116531  -0.09275964  1.7474816   1.61647412  1.39171585
  0.82215121  0.92704527  0.99083915  0.66044658  0.88056531  0.39382918
  0.83052354  0.87490122  1.9023922 ]
[ 2.08271356  2.24335434  2.35752004  2.35340002  1.93785788  1.74984001
  1.81267716  1.83522727  2.06406405  2.16052834  1.63421314  2.07836089
  1.89040709  2.1159249   1.73192597  2.0658606   2.13936376  2.23522882
  1.90706103  2.06491145  2.07296338  2.25715343  2.24537339  1.88512249
  2.06273829  2.23735255  1.91862547  2.33923571  1.90827663  2.07126928
  2.20622815  1.78420821  1.83477735]

794 94
2700  d  (0.64078996730800153, 0.55996621621621623, 0.54001554001553997, None)
dev err: -1.34586075979
-1.2558948374
[ 1.19628046  0.90048295  1.31

dev err: -1.38761873755
-1.35802626684
[ 1.19628046  0.8921065   1.31060039  0.1198623   0.94260698  0.89192681
  0.87454908  1.37103561  1.40198128  1.29779521  0.42817836  0.31790698
  0.79818516  0.91797637  0.73275413  1.32568734  1.86127909  1.18305477
  1.39213626  0.8116531  -0.14176319  1.7474816   1.61647412  1.39171585
  0.81504248  0.92190397  0.99040821  0.65996304  0.87886985  0.39381299
  0.82842445  0.8740081   1.9023922 ]
[ 2.08271356  2.24374773  2.35752004  2.35723011  1.93818292  1.75080474
  1.81268307  1.83522727  2.06406405  2.16052834  1.63658739  2.08036092
  1.89040709  2.1159585   1.73216879  2.0658606   2.13936376  2.23522882
  1.90706103  2.06491145  2.09920691  2.25715343  2.24537339  1.88512249
  2.06336287  2.23752591  1.91862756  2.33930593  1.90838341  2.071274
  2.20639032  1.78427101  1.83477735]

797 91
3600  d  (0.63264025811077251, 0.55489864864864868, 0.53249507481261971, None)
dev err: -1.39303201925
-1.50207803139
[ 1.19628046  0.8910159   1.310

dev err: -1.43695285918
-1.27410057574
[ 1.19628046  0.8836576   1.31060039  0.10978784  0.92998359  0.87719336
  0.87272423  1.37103561  1.40198128  1.29779521  0.42138711  0.3108613
  0.79811635  0.91744275  0.73192759  1.32568734  1.86127909  1.18305477
  1.39213626  0.81157661 -0.19161768  1.7474816   1.61647412  1.39171585
  0.80884067  0.9168904   0.98959312  0.65918883  0.87843872  0.39398616
  0.82603249  0.87232842  1.9023922 ]
[ 2.08271356  2.24416936  2.35752004  2.36100976  1.93859719  1.75177536
  1.81281021  1.83522727  2.06406405  2.16052834  1.63897231  2.0825273
  1.89041444  2.11597919  1.73229647  2.0658606   2.13936376  2.23522882
  1.90706103  2.06491842  2.12673271  2.25715343  2.24537339  1.88512249
  2.06392746  2.2377063   1.91863178  2.33941857  1.90841081  2.07122332
  2.20657753  1.78439034  1.83477735]

797 91
4500  d  (0.63264025811077251, 0.55489864864864868, 0.53249507481261971, None)
dev err: -1.44141239127
-1.27959106622
[ 1.19628046  0.88222122  1.310

dev err: -1.48924609915
-1.71071108381
[ 1.19628046  0.87625527  1.31060039  0.09991948  0.91600843  0.86351994
  0.87135828  1.37103561  1.40198128  1.29779521  0.41472843  0.30370502
  0.79811635  0.91687027  0.73069925  1.32568734  1.86127909  1.18305477
  1.39213626  0.81169605 -0.24327032  1.7474816   1.61647412  1.39171585
  0.80243914  0.91218683  0.98977566  0.65899085  0.87756884  0.39291784
  0.82179753  0.87107271  1.9023922 ]
[ 2.08271356  2.24456478  2.35752004  2.36474784  1.93915157  1.75278645
  1.81290659  1.83522727  2.06406405  2.16052834  1.64133459  2.08460802
  1.89041444  2.11600144  1.73248691  2.0658606   2.13936376  2.23522882
  1.90706103  2.06490752  2.15609674  2.25715343  2.24537339  1.88512249
  2.06452976  2.23788572  1.91863079  2.33944741  1.90846639  2.07153611
  2.20691529  1.78448058  1.83477735]

797 91
5400  d  (0.63264025811077251, 0.55489864864864868, 0.53249507481261971, None)
dev err: -1.49538858978
-1.41869202525
[ 1.19628046  0.87504847  1.3

dev err: -1.55398366777
-1.98888705645
[ 1.19628046  0.86693849  1.31060039  0.09166537  0.90466002  0.84883969
  0.87071324  1.37103561  1.40198128  1.29779521  0.40787143  0.29648771
  0.79811635  0.91613564  0.72983425  1.32568734  1.86127909  1.18305477
  1.39213626  0.81169605 -0.30697081  1.7474816   1.61647412  1.39171585
  0.79559332  0.90716413  0.98938684  0.658447    0.87537277  0.39223994
  0.82008352  0.87081434  1.9023922 ]
[ 2.08271356  2.24509715  2.35752004  2.36790151  1.93967569  1.75399003
  1.81295244  1.83522727  2.06406405  2.16052834  1.64379187  2.08692957
  1.89041444  2.11603026  1.73262147  2.0658606   2.13936376  2.23522882
  1.90706103  2.06490752  2.19344382  2.25715343  2.24537339  1.88512249
  2.06519554  2.23808824  1.91863288  2.33952672  1.90860844  2.07173483
  2.20705427  1.78449926  1.83477735]

797 91
6300  d  (0.63264025811077251, 0.55489864864864868, 0.53249507481261971, None)
dev err: -1.56241714657
-1.76111526923
[ 1.19628046  0.86720377  1.3

dev err: -1.6239585298
-1.38254539494
[ 1.19628046  0.85915845  1.31060039  0.08126949  0.88668106  0.83229842
  0.86964196  1.37103561  1.40198128  1.29779521  0.39576464  0.29044012
  0.79811635  0.91520793  0.72896264  1.32568734  1.86127909  1.18305477
  1.39213626  0.81100147 -0.3715608   1.7474816   1.61647412  1.39171585
  0.79283271  0.9000893   0.98911477  0.65727858  0.87312977  0.39145275
  0.81810047  0.86905887  1.9023922 ]
[ 2.08271356  2.24557118  2.35752004  2.37190839  1.94064162  1.75549227
  1.8130291   1.83522727  2.06406405  2.16052834  1.64819118  2.0888664
  1.89041444  2.11606703  1.73275748  2.0658606   2.13936376  2.23522882
  1.90706103  2.06497095  2.23253067  2.25715343  2.24537339  1.88512249
  2.06546985  2.23839254  1.9186344   2.33969753  1.90875612  2.07196587
  2.20721669  1.78462708  1.83477735]

797 91
7200  d  (0.63264025811077251, 0.55489864864864868, 0.53249507481261971, None)
dev err: -1.63142330158
-1.65004594541
[ 1.19628046  0.85844856  1.310

dev err: -1.69263810502
-2.43558580867
[ 1.19628046  0.8519076   1.31060039  0.07463931  0.87438835  0.81290099
  0.86937767  1.37103561  1.40198128  1.29779521  0.38845501  0.28543504
  0.79811635  0.9156311   0.72833415  1.32568734  1.86127909  1.18305477
  1.39213626  0.81011957 -0.43306315  1.7474816   1.61647412  1.39171585
  0.78800307  0.89281104  0.98835343  0.65673352  0.87155199  0.39044474
  0.81524965  0.8661274   1.9023922 ]
[ 2.08271356  2.24603703  2.35752004  2.37448409  1.94139763  1.75745061
  1.81304811  1.83522727  2.06406405  2.16052834  1.65088463  2.09047575
  1.89041444  2.11604991  1.73285581  2.0658606   2.13936376  2.23522882
  1.90706103  2.06505181  2.27083152  2.25715343  2.24537339  1.88512249
  2.06595924  2.23872893  1.91863883  2.33977741  1.90886158  2.0722621
  2.20745332  1.78484438  1.83477735]

797 91
8100  d  (0.63264025811077251, 0.55489864864864868, 0.53249507481261971, None)
dev err: -1.6999336546
-1.49037761599
[ 1.19628046  0.85260094  1.310