# Find Bugs

Use this notebook to find wrong predicitons. We can then later visualize the pairs of specs.

In [10]:
from draco.learn import data_util
from draco.learn import linear
from sklearn import svm
import random
import json
import numpy as np

In [2]:
train_dev, _ = data_util.load_data()

X = train_dev.negative - train_dev.positive
X = X.as_matrix()

## Cross validation

Cross validate the model by running it over various subsets of the input data.

In [3]:
from sklearn.model_selection import LeaveOneOut

loo = LeaveOneOut()

bug_idx = []
bug_confidence = []

for train_index, test_index in loo.split(X):
    X_train, X_test = X[train_index], X[test_index]
    
    size = len(X_train)
    
    y_train = np.ones(size)
    
    idx = np.ones(size, dtype=bool)
    idx[:int(size/2)] = False
    np.random.shuffle(idx)
    
    X_train[idx] = -X_train[idx]
    y_train[idx] = -y_train[idx]

    clf = svm.LinearSVC(C=1, fit_intercept=False)
    clf.fit(X_train, y_train)
    
    if clf.predict(X_test)[0] != 1:
        bug_idx.append(test_index[0])
        bug_confidence.append(clf.decision_function(X_test)[0])
        print("Found bug", test_index[0],)

Found bug 15
Found bug 18
Found bug 57
Found bug 67
Found bug 84
Found bug 97
Found bug 98
Found bug 116
Found bug 135
Found bug 142
Found bug 171
Found bug 191
Found bug 198
Found bug 202
Found bug 203
Found bug 221
Found bug 228
Found bug 249
Found bug 262
Found bug 309
Found bug 311
Found bug 313
Found bug 319
Found bug 320
Found bug 327
Found bug 328
Found bug 346
Found bug 361
Found bug 385
Found bug 398
Found bug 404
Found bug 413
Found bug 416
Found bug 419
Found bug 428
Found bug 430
Found bug 476
Found bug 493
Found bug 515
Found bug 519
Found bug 523
Found bug 543
Found bug 578
Found bug 595
Found bug 621
Found bug 630
Found bug 635
Found bug 654
Found bug 662
Found bug 671
Found bug 673
Found bug 682
Found bug 702
Found bug 709
Found bug 714
Found bug 716
Found bug 750
Found bug 780
Found bug 795
Found bug 804
Found bug 813


In [4]:
len(bug_idx)

61

## Process bugs

In [5]:
# get the indexes in the original data
bugs = train_dev.index[bug_idx]
bugs

Index(['kim2018-34', 'kim2018-22', 'kim2018-563', 'kim2018-160',
       'kim2018-813', 'kim2018-834', 'kim2018-119', 'kim2018-1007',
       'kim2018-576', 'kim2018-2', 'kim2018-622', 'saket2018-0',
       'kim2018-171', 'kim2018-812', 'kim2018-637', 'kim2018-997',
       'kim2018-98', 'kim2018-86', 'kim2018-891', 'kim2018-696',
       'kim2018-559', 'kim2018-33', 'saket2018-9', 'kim2018-77',
       'saket2018-7', 'kim2018-416', 'saket2018-4', 'kim2018-268',
       'kim2018-585', 'kim2018-940', 'kim2018-941', 'kim2018-510',
       'kim2018-894', 'kim2018-829', 'kim2018-464', 'kim2018-565',
       'kim2018-610', 'kim2018-900', 'kim2018-492', 'kim2018-32',
       'kim2018-704', 'kim2018-904', 'kim2018-506', 'kim2018-1117',
       'kim2018-902', 'kim2018-97', 'kim2018-998', 'kim2018-827',
       'kim2018-150', 'kim2018-103', 'kim2018-987', 'kim2018-828',
       'kim2018-896', 'kim2018-514', 'kim2018-114', 'kim2018-887',
       'kim2018-459', 'kim2018-520', 'kim2018-457', 'kim2018-625',
   

In [6]:
pos_neg_data = data_util.load_neg_pos_specs()

In [7]:
bug_specs = []

for i, bi in enumerate(bugs):
    example = pos_neg_data[bi]
    
    negative = example.negative
    positive = example.positive
    
    data = example.data
    data.fill_with_random_content()
    vals = data.content
    
    negative['data'] = {
        'values': vals
    }
    positive['data'] = {
        'values': vals
    }
    bug_specs.append({
        'first': negative,
        'second': positive,
        'properties': {
            'confidence': bug_confidence[i],
            'source': example.source,
            'task': example.task
        }
    })

In [8]:
len(bug_specs)

61

In [9]:
with open('../data/spec_pairs/bugs.json', 'w') as f:
    json.dump({
        "headers": {
            "first": {
                "title": "Negative",
                "subtitle": "but was predicted as better"
            },
            "second": {
                "title": "Positive",
                "subtitle": "but was predicted as worse"
            }
        },
        "specs": bug_specs
    }, f, indent=2)