# Find Bugs

Use this notebook to find wrong predicitons. We can then later visualize the pairs of specs.

In [1]:
from draco.learn import data_util
from draco.learn import linear
from sklearn import svm
import random
import json
import numpy as np



In [2]:
train_dev, _ = data_util.load_data()

X = train_dev.positive - train_dev.negative
X = X.as_matrix()

## Cross validation

Cross validate the model by running it over various subsets of the input data.

In [3]:
from sklearn.model_selection import LeaveOneOut

loo = LeaveOneOut()

bug_idx = []
bug_confidence = []

for train_index, test_index in loo.split(X):
    X_train, X_test = X[train_index], X[test_index]
    
    size = len(X_train)
    
    y_train = np.ones(size)
    
    idx = np.ones(size, dtype=bool)
    idx[:int(size/2)] = False
    np.random.shuffle(idx)
    
    X_train[idx] = -X_train[idx]
    y_train[idx] = -y_train[idx]

    clf = svm.LinearSVC(C=1, fit_intercept=False)
    clf.fit(X_train, y_train)
    
    if clf.predict(X_test)[0] != 1:
        bug_idx.append(test_index[0])
        bug_confidence.append(clf.decision_function(X_test)[0])
        print("Found bug", test_index[0],)

Found bug 9
Found bug 11
Found bug 19
Found bug 54
Found bug 66
Found bug 68
Found bug 70
Found bug 95
Found bug 97
Found bug 111
Found bug 113
Found bug 116
Found bug 142
Found bug 144
Found bug 158
Found bug 187
Found bug 206
Found bug 218
Found bug 221
Found bug 228
Found bug 243
Found bug 258
Found bug 265
Found bug 283
Found bug 288
Found bug 290
Found bug 296
Found bug 302
Found bug 308
Found bug 310
Found bug 331
Found bug 333
Found bug 348
Found bug 365
Found bug 369
Found bug 375
Found bug 377
Found bug 404
Found bug 422
Found bug 424
Found bug 490
Found bug 498
Found bug 499
Found bug 507
Found bug 510
Found bug 531
Found bug 542
Found bug 559
Found bug 562
Found bug 586
Found bug 591
Found bug 603
Found bug 610
Found bug 611
Found bug 612
Found bug 626
Found bug 630
Found bug 637
Found bug 646
Found bug 659
Found bug 673
Found bug 676
Found bug 689
Found bug 706
Found bug 710
Found bug 744
Found bug 745
Found bug 754
Found bug 759
Found bug 763
Found bug 764
Found bug 765
Fo

In [4]:
len(bug_idx)

77

## Process bugs

In [5]:
# get the indexes in the original data
bugs = train_dev.index[bug_idx]
bugs

Int64Index([1003, 1089,  464,  418,  902,  512,  587,  560,  951,  835,  374,
             121,  989,  836, 1009,  625,    0,  815,  898,  830,  639,   82,
             466,  173,  814,  828,  116, 1101,  563,  467,  816,  421,  462,
             162,    4,  427,  893,  522, 1119,  377,  425,   36,  105,  944,
             565,  820,    1,  460,  423,  942,  380,  624,  182,  617,  300,
             465,  206,  738,   79,  100,   44,   24,  477,  420,  911,  152,
             889,  871,   77,  469,  564,  461,  896,  627,  562,  468,  508],
           dtype='int64')

In [6]:
pos_neg_data = data_util.load_neg_pos_specs()

In [7]:
# TODO: generate better data

vals = [{
    'q1': round(random.normalvariate(2, 2), 3),
    'q2': round(random.normalvariate(2, 2), 3),
    'n': random.randint(0,7)
} for _ in range(10)]

In [8]:
bug_specs = []

for i, bi in enumerate(bugs):
    example = pos_neg_data[bi]
    
    negative = example.negative
    positive = example.positive
    
    negative['data'] = {
        'values': vals
    }
    positive['data'] = {
        'values': vals
    }
    bug_specs.append({
        'first': negative,
        'second': positive,
        'properties': {
            'confidence': bug_confidence[i],
            'source': example.source,
            'task': example.task
        }
    })

In [9]:
len(bug_specs)

77

In [10]:
with open('../data/spec_pairs/bugs.json', 'w') as f:
    json.dump({
        "headers": {
            "first": {
                "title": "Negative",
                "subtitle": "but was predicted as better"
            },
            "second": {
                "title": "Positive",
                "subtitle": "but was predicted as worse"
            }
        },
        "specs": bug_specs
    }, f, indent=2)