In [1]:
from draco.learn import linear
from draco.learn import data_util
from draco.run import run
from draco.spec import Task, Query
from draco.learn.helper import count_violations
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
import logging

logging.basicConfig()
logging.getLogger().setLevel(logging.WARN)

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [2]:
test_size = 0.3

In [3]:
data, _ = data_util.load_data(test_size=test_size)
data = data[data.source == 'kim2018']

raw_data = data_util.load_neg_pos_specs()

In [4]:
X = data.negative - data.positive
y = np.ones(len(X))

In [5]:
clf = linear.train_model(X, test_size=test_size)
pred = clf.predict(X)

Train score:  0.9582366589327146
Dev score:  0.978494623655914


In [6]:
weights = clf.coef_[0]
names = X.columns

In [7]:
const_dict = dict(zip(map(lambda x: f'{x}_weight', names), map(int, weights*10000)))
const_dict_no_weight = dict(zip(names, map(int, weights*10000)))
weight_dict = dict(zip(names, weights))

const_dict, weight_dict;

In [8]:
only_correct = X[pred == 1]

len(only_correct), len(data)

(595, 617)

In [None]:
def aggregate(violations, weights):
    ''' Get the cost from a dictionary of violations. '''
    ret = 0.0
    for n, v in violations.items():
        ret += weights[n] * v
    return ret

files = ['define.lp', 'hard.lp', 'soft.lp', 'assign_weights.lp', 'output.lp']  # cost.lp

for i in range(len(only_correct)):
    name = only_correct.iloc[i].name
    
    print(name)
    
    pair = raw_data[name]

    task_n = Task(pair.data, Query.from_vegalite(pair.negative), task=pair.task)
    task_p = Task(pair.data, Query.from_vegalite(pair.positive), task=pair.task)
    
    assert clf.predict([X.loc[name]]) == 1
    
    data_entry = data.loc[name]
    
    # print("Prediction negative", clf.coef_.dot(np.array([data_entry.negative]).T))
    print("Prediction negative:", clf.decision_function([data_entry.negative])[0])
    # print("Prediction positive", clf.coef_.dot(np.array([data_entry.positive]).T))
    print("Prediction positive:", clf.decision_function([data_entry.positive])[0])

    # the violation vectors have to be the same
    assert count_violations(task_n) == data_entry.negative[data_entry.negative > 0].to_dict()

    violations_n = count_violations(task_n)
    violations_p = count_violations(task_p)
    
    cn = aggregate(violations_n, weight_dict)
    cp = aggregate(violations_p, weight_dict)
    
    print(cn, cp)
    assert cn > cp, "Violations should result in correct cost"

    cn = aggregate(violations_n, const_dict_no_weight)
    cp = aggregate(violations_p, const_dict_no_weight)
    
    print(cn, cp)
    assert cn > cp, "Violations with asp weights should result in correct cost"
    
#     cn = run(task_n, constants=const_dict, files=files, silence_warnings=True, debug=True).cost
#     cp = run(task_p, constants=const_dict, files=files, silence_warnings=True, debug=True).cost
#     print(cn, cp)
#     assert cn > cp, "Costs should have correct order"
    
    print()