# Find Bugs

Use this notebook to find wrong predicitons. We can then later visualize the pairs of specs.

In [11]:
from draco.learn import data_util
from draco.learn import linear
from sklearn import svm
import random
import json
import numpy as np

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [2]:
train_dev, _ = data_util.load_data()

X = train_dev.positive - train_dev.negative
X = X.as_matrix()

## Cross validation

Cross validate the model by running it over various subsets of the input data.

In [3]:
from sklearn.model_selection import LeaveOneOut

loo = LeaveOneOut()

bug_idx = []
bug_confidence = []

for train_index, test_index in loo.split(X):
    X_train, X_test = X[train_index], X[test_index]
    
    y_train = np.ones(len(X_train))
    
    # swap first example
    X_train[0] = -X_train[0]
    y_train[0] = -y_train[0]

    clf = svm.LinearSVC(C=1, fit_intercept=False)
    clf.fit(X_train, y_train)
    
    if clf.predict(X_test)[0] != 1:
        bug_idx.append(test_index[0])
        bug_confidence.append(clf.decision_function(X_test)[0])
        print("Found bug", test_index[0])

Found bug 748


In [4]:
bug_idx, bug_confidence

([748], [-0.18631813592593938])

## Process bugs

In [5]:
# get the indexes in the original data
bugs = train_dev.index[bug_idx]
bugs

Int64Index([1760], dtype='int64')

In [6]:
pos_neg_data = data_util.load_neg_pos_data()

In [7]:
# TODO: generate better data

vals = [{
    'q1': round(random.normalvariate(2, 2), 3),
    'q2': round(random.normalvariate(2, 2), 3),
    'n': random.randint(0,7)
} for _ in range(10)]

In [8]:
bug_specs = []

for i, bi in enumerate(bugs):
    example = pos_neg_data[bi]
    
    negative = example.negative
    positive = example.positive
    
    negative['data'] = {
        'values': vals
    }
    positive['data'] = {
        'values': vals
    }
    bug_specs.append({
        'negative': negative,
        'positive': positive,
        'confidence': bug_confidence[i]
    })

In [9]:
bug_specs

[{'confidence': -0.18631813592593938,
  'negative': {'data': {'values': [{'n': 3, 'q1': 3.423, 'q2': 1.115},
     {'n': 1, 'q1': 2.607, 'q2': 4.2},
     {'n': 4, 'q1': 3.515, 'q2': 6.028},
     {'n': 4, 'q1': 1.57, 'q2': 2.401},
     {'n': 2, 'q1': 3.238, 'q2': -0.063},
     {'n': 6, 'q1': 4.194, 'q2': 0.738},
     {'n': 6, 'q1': 1.464, 'q2': 0.718},
     {'n': 2, 'q1': 2.783, 'q2': -0.224},
     {'n': 2, 'q1': -0.72, 'q2': 0.27},
     {'n': 6, 'q1': 4.512, 'q2': 3.771}]},
   'encoding': {'x': {'field': 'n', 'type': 'ordinal'},
    'y': {'aggregate': 'mean', 'field': 'q2', 'type': 'quantitative'}},
   'mark': 'bar'},
  'positive': {'data': {'values': [{'n': 3, 'q1': 3.423, 'q2': 1.115},
     {'n': 1, 'q1': 2.607, 'q2': 4.2},
     {'n': 4, 'q1': 3.515, 'q2': 6.028},
     {'n': 4, 'q1': 1.57, 'q2': 2.401},
     {'n': 2, 'q1': 3.238, 'q2': -0.063},
     {'n': 6, 'q1': 4.194, 'q2': 0.738},
     {'n': 6, 'q1': 1.464, 'q2': 0.718},
     {'n': 2, 'q1': 2.783, 'q2': -0.224},
     {'n': 2, 'q1'

In [10]:
with open('../data/bugs/bugs.json', 'w') as f:
    json.dump(bug_specs, f, indent=2)