# Find Bugs

Use this notebook to find wrong predicitons. We can then later visualize the pairs of specs.

In [10]:
from draco.learn import data_util
from draco.learn import linear
from sklearn import svm
import random
import json
import numpy as np

In [2]:
train_dev, _ = data_util.load_data()

X = train_dev.negative - train_dev.positive
X = X.as_matrix()

## Cross validation

Cross validate the model by running it over various subsets of the input data.

In [3]:
from sklearn.model_selection import LeaveOneOut

loo = LeaveOneOut()

bug_idx = []
bug_confidence = []

for train_index, test_index in loo.split(X):
    X_train, X_test = X[train_index], X[test_index]
    
    size = len(X_train)
    
    y_train = np.ones(size)
    
    idx = np.ones(size, dtype=bool)
    idx[:int(size/2)] = False
    np.random.shuffle(idx)
    
    X_train[idx] = -X_train[idx]
    y_train[idx] = -y_train[idx]

    clf = svm.LinearSVC(C=1, fit_intercept=False)
    clf.fit(X_train, y_train)
    
    if clf.predict(X_test)[0] != 1:
        bug_idx.append(test_index[0])
        bug_confidence.append(clf.decision_function(X_test)[0])
        print("Found bug", test_index[0],)

Found bug 9
Found bug 47
Found bug 55
Found bug 62
Found bug 63
Found bug 65
Found bug 103
Found bug 115
Found bug 137
Found bug 146
Found bug 148
Found bug 151
Found bug 181
Found bug 247
Found bug 272
Found bug 295
Found bug 298
Found bug 353
Found bug 391
Found bug 394
Found bug 395
Found bug 424
Found bug 493
Found bug 538
Found bug 540
Found bug 549
Found bug 564
Found bug 581
Found bug 591
Found bug 605


In [4]:
len(bug_idx)

30

## Process bugs

In [5]:
# get the indexes in the original data
bugs = train_dev.index[bug_idx]
bugs

Index(['younghoon-14', 'younghoon-660', 'younghoon-460', 'younghoon-414',
       'younghoon-566', 'younghoon-415', 'younghoon-367', 'younghoon-351',
       'bahador-4', 'younghoon-672', 'younghoon-30', 'younghoon-58',
       'younghoon-423', 'younghoon-709', 'younghoon-28', 'younghoon-750',
       'younghoon-353', 'younghoon-303', 'younghoon-24', 'younghoon-624',
       'younghoon-556', 'younghoon-59', 'manual-0', 'younghoon-671',
       'younghoon-623', 'younghoon-107', 'younghoon-826', 'younghoon-676',
       'bahador-7', 'younghoon-509'],
      dtype='object')

In [6]:
pos_neg_data = data_util.load_neg_pos_specs()

In [7]:
bug_specs = []

for i, bi in enumerate(bugs):
    example = pos_neg_data[bi]
    
    negative = example.negative
    positive = example.positive
    
    data = example.data
    data.fill_with_random_content()
    vals = data.content
    
    negative['data'] = {
        'values': vals
    }
    positive['data'] = {
        'values': vals
    }
    bug_specs.append({
        'first': negative,
        'second': positive,
        'properties': {
            'confidence': bug_confidence[i],
            'source': example.source,
            'task': example.task
        }
    })

In [8]:
len(bug_specs)

30

In [9]:
with open('../data/spec_pairs/bugs.json', 'w') as f:
    json.dump({
        "headers": {
            "first": {
                "title": "Negative",
                "subtitle": "but was predicted as better"
            },
            "second": {
                "title": "Positive",
                "subtitle": "but was predicted as worse"
            }
        },
        "specs": bug_specs
    }, f, indent=2)