# Find Bugs

Use this notebook to find wrong predicitons. We can then later visualize the pairs of specs.

In [122]:
from draco.learn import data_util
from draco.learn import linear
from sklearn import svm
import random
import json

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [123]:
data = data_util.load_data()
train, _ = data_util.split_dataset(data, ratio=0.7)
X, y = linear.prepare_data(train)

clf = svm.LinearSVC(C=1)

clf.fit(X, y)

print("Score: ", clf.score(X, y))

Score:  0.980722891566265


In [124]:
# get the pairs that are predicted wrong
negative_pairs = X[y == 0]

predicted = clf.predict(negative_pairs)
bug_idx = predicted != 0  # idx in pairs

np.arange(len(bug_idx))[bug_idx]

array([ 36, 106, 124, 131, 167, 187, 237, 399])

In [125]:
# sanity check, these should be the same as the bugs for positive pairs
positive_pairs = X[y == 1]

predicted = clf.predict(positive_pairs)
bug_idx = predicted != 1

np.arange(len(bug_idx))[bug_idx]

array([ 36, 106, 124, 131, 167, 187, 237, 399])

In [126]:
# get the indexes in the original data
bugs = train.index[bug_idx]
bugs

Int64Index([119, 69, 404, 388, 161, 328, 403, 531], dtype='int64')

In [127]:
raw_data = data_util.get_raw_data()

In [131]:
# TODO: generate better data

vals = [{
    'q1': random.normalvariate(2, 2),
    'q2': random.normalvariate(2, 2),
    'n': random.randint(0,7)
} for _ in range(10)]

In [132]:
bug_specs = []

for i in bugs:
    schema, negative, positive = raw_data[i]
    negative['data'] = {
        'values': vals
    }
    positive['data'] = {
        'values': vals
    }
    bug_specs.append({
        'negative': negative,
        'positive': positive
    })

In [133]:
with open('../data/bugs/bugs.json', 'w') as f:
    json.dump(bug_specs, f, indent=2)