# Find Bugs

Use this notebook to find wrong predicitons. We can then later visualize the pairs of specs.

In [11]:
from draco.learn import data_util
from draco.learn import linear
from sklearn import svm
import random
import json
import numpy as np

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [2]:
train_dev, _ = data_util.load_data()

X = train_dev.positive - train_dev.negative
X = X.as_matrix()

## Cross validation

Cross validate the model by running it over various subsets of the input data.

In [3]:
from sklearn.model_selection import LeaveOneOut

loo = LeaveOneOut()

bug_idx = []
bug_confidence = []

for train_index, test_index in loo.split(X):
    X_train, X_test = X[train_index], X[test_index]
    
    size = len(X_train)
    
    y_train = np.ones(size)
    
    idx = np.ones(size, dtype=bool)
    idx[:int(size/2)] = False
    np.random.shuffle(idx)
    
    X_train[idx] = -X_train[idx]
    y_train[idx] = -y_train[idx]

    clf = svm.LinearSVC(C=1, fit_intercept=False)
    clf.fit(X_train, y_train)
    
    if clf.predict(X_test)[0] != 1:
        bug_idx.append(test_index[0])
        bug_confidence.append(clf.decision_function(X_test)[0])
        print("Found bug", test_index[0])

Found bug 3
Found bug 17
Found bug 18
Found bug 30
Found bug 33
Found bug 38
Found bug 52
Found bug 53
Found bug 59
Found bug 64
Found bug 73
Found bug 78
Found bug 79
Found bug 99
Found bug 100
Found bug 101
Found bug 104
Found bug 121
Found bug 124
Found bug 128
Found bug 131
Found bug 133
Found bug 142
Found bug 157
Found bug 158
Found bug 164
Found bug 165
Found bug 182
Found bug 183
Found bug 189
Found bug 199
Found bug 201
Found bug 205
Found bug 212
Found bug 222
Found bug 234
Found bug 237
Found bug 238
Found bug 242
Found bug 244
Found bug 249
Found bug 250
Found bug 255
Found bug 261
Found bug 279
Found bug 284
Found bug 303
Found bug 305
Found bug 306
Found bug 310
Found bug 316
Found bug 337
Found bug 338
Found bug 340
Found bug 343
Found bug 351
Found bug 353
Found bug 357
Found bug 372
Found bug 377
Found bug 379
Found bug 381
Found bug 384
Found bug 385
Found bug 389
Found bug 390
Found bug 395
Found bug 402
Found bug 405
Found bug 408
Found bug 425
Found bug 430
Found b

In [4]:
bug_idx, bug_confidence

([3,
  17,
  18,
  30,
  33,
  38,
  52,
  53,
  59,
  64,
  73,
  78,
  79,
  99,
  100,
  101,
  104,
  121,
  124,
  128,
  131,
  133,
  142,
  157,
  158,
  164,
  165,
  182,
  183,
  189,
  199,
  201,
  205,
  212,
  222,
  234,
  237,
  238,
  242,
  244,
  249,
  250,
  255,
  261,
  279,
  284,
  303,
  305,
  306,
  310,
  316,
  337,
  338,
  340,
  343,
  351,
  353,
  357,
  372,
  377,
  379,
  381,
  384,
  385,
  389,
  390,
  395,
  402,
  405,
  408,
  425,
  430,
  431,
  434,
  437,
  440,
  446,
  448,
  449,
  450,
  454,
  465,
  467,
  469,
  476,
  481,
  507,
  511,
  520,
  525,
  526,
  528,
  536,
  539,
  540,
  547,
  548,
  556,
  564,
  567,
  573,
  578,
  582,
  583,
  587,
  589,
  593,
  594,
  597,
  599,
  605,
  618,
  628,
  630,
  635,
  637,
  640,
  643,
  648,
  665,
  677,
  678,
  681,
  682,
  685,
  695,
  700,
  701,
  706,
  716,
  721,
  728,
  731,
  738,
  744,
  750,
  752,
  760,
  763,
  765,
  767,
  779,
  783,
  804,
  807,


## Process bugs

In [5]:
# get the indexes in the original data
bugs = train_dev.index[bug_idx]
bugs

Int64Index([1048,  409,   12,  593,  163,  538,  725,  584,  786, 1176,
            ...
              15, 1360, 1585, 1411,  728,  209, 1337,  508,  144,  715],
           dtype='int64', length=215)

In [6]:
pos_neg_data = data_util.load_neg_pos_data()

In [7]:
# TODO: generate better data

vals = [{
    'q1': round(random.normalvariate(2, 2), 3),
    'q2': round(random.normalvariate(2, 2), 3),
    'n': random.randint(0,7)
} for _ in range(10)]

In [8]:
bug_specs = []

for i, bi in enumerate(bugs):
    example = pos_neg_data[bi]
    
    negative = example.negative
    positive = example.positive
    
    negative['data'] = {
        'values': vals
    }
    positive['data'] = {
        'values': vals
    }
    bug_specs.append({
        'negative': negative,
        'positive': positive,
        'confidence': bug_confidence[i]
    })

In [12]:
len(bug_specs)

215

In [10]:
with open('../data/bugs/bugs.json', 'w') as f:
    json.dump(bug_specs, f, indent=2)