# Find Bugs

Use this notebook to find wrong predicitons. We can then later visualize the pairs of specs.

In [2]:
from draco.learn import data_util
from draco.learn import linear
from sklearn import svm
import random
import json
import numpy as np

In [3]:
train_dev, _ = data_util.load_data()

X = train_dev.positive - train_dev.negative
X = X.as_matrix()

## Cross validation

Cross validate the model by running it over various subsets of the input data.

In [4]:
from sklearn.model_selection import LeaveOneOut

loo = LeaveOneOut()

bug_idx = []
bug_confidence = []

for train_index, test_index in loo.split(X):
    X_train, X_test = X[train_index], X[test_index]
    
    size = len(X_train)
    
    y_train = np.ones(size)
    
    idx = np.ones(size, dtype=bool)
    idx[:int(size/2)] = False
    np.random.shuffle(idx)
    
    X_train[idx] = -X_train[idx]
    y_train[idx] = -y_train[idx]

    clf = svm.LinearSVC(C=1, fit_intercept=False)
    clf.fit(X_train, y_train)
    
    if clf.predict(X_test)[0] != 1:
        bug_idx.append(test_index[0])
        bug_confidence.append(clf.decision_function(X_test)[0])
        print("Found bug", test_index[0],)

Found bug 25
Found bug 39
Found bug 58
Found bug 96
Found bug 125
Found bug 140
Found bug 146
Found bug 161
Found bug 164
Found bug 167
Found bug 191
Found bug 199
Found bug 209
Found bug 229
Found bug 234
Found bug 278
Found bug 329
Found bug 337
Found bug 354
Found bug 362
Found bug 383
Found bug 387
Found bug 398
Found bug 412
Found bug 426
Found bug 428
Found bug 434
Found bug 437
Found bug 438
Found bug 444
Found bug 457
Found bug 488
Found bug 509
Found bug 530
Found bug 547
Found bug 552
Found bug 556
Found bug 557
Found bug 575
Found bug 581
Found bug 584
Found bug 587
Found bug 594
Found bug 650
Found bug 664
Found bug 668
Found bug 669
Found bug 671
Found bug 673
Found bug 675
Found bug 678
Found bug 711
Found bug 721
Found bug 735
Found bug 767
Found bug 770
Found bug 778
Found bug 790
Found bug 793
Found bug 796
Found bug 808
Found bug 824


In [5]:
len(bug_idx)

62

## Process bugs

In [6]:
# get the indexes in the original data
bugs = train_dev.index[bug_idx]
bugs

Index(['younghoon-637', 'manual-1', 'younghoon-204', 'younghoon-561',
       'younghoon-987', 'younghoon-103', 'younghoon-741', 'younghoon-563',
       'younghoon-829', 'younghoon-180', 'younghoon-894', 'younghoon-2',
       'manual-0', 'younghoon-160', 'younghoon-1087', 'younghoon-900',
       'younghoon-86', 'younghoon-465', 'younghoon-77', 'younghoon-1001',
       'younghoon-891', 'younghoon-560', 'younghoon-463', 'younghoon-79',
       'younghoon-380', 'younghoon-940', 'younghoon-814', 'younghoon-492',
       'younghoon-421', 'younghoon-520', 'younghoon-33', 'younghoon-625',
       'younghoon-812', 'younghoon-510', 'younghoon-459', 'younghoon-826',
       'younghoon-475', 'younghoon-813', 'younghoon-902', 'younghoon-810',
       'younghoon-457', 'younghoon-119', 'younghoon-565', 'younghoon-1117',
       'younghoon-97', 'younghoon-1045', 'younghoon-998', 'younghoon-896',
       'younghoon-1007', 'younghoon-834', 'younghoon-464', 'younghoon-559',
       'younghoon-480', 'younghoon-17

In [7]:
pos_neg_data = data_util.load_neg_pos_specs()

In [13]:
bug_specs = []

for i, bi in enumerate(bugs):
    example = pos_neg_data[bi]
    
    negative = example.negative
    positive = example.positive
    
    data = example.data
    data.fill_with_random_content()
    vals = data.content
    
    negative['data'] = {
        'values': vals
    }
    positive['data'] = {
        'values': vals
    }
    bug_specs.append({
        'first': negative,
        'second': positive,
        'properties': {
            'confidence': bug_confidence[i],
            'source': example.source,
            'task': example.task
        }
    })

In [14]:
len(bug_specs)

62

In [15]:
with open('../data/spec_pairs/bugs.json', 'w') as f:
    json.dump({
        "headers": {
            "first": {
                "title": "Negative",
                "subtitle": "but was predicted as better"
            },
            "second": {
                "title": "Positive",
                "subtitle": "but was predicted as worse"
            }
        },
        "specs": bug_specs
    }, f, indent=2)