# Find Bugs

Use this notebook to find wrong predicitons. We can then later visualize the pairs of specs.

In [11]:
from draco.learn import data_util
from draco.learn import linear
from sklearn import svm
import random
import json
import numpy as np

In [2]:
train_dev, _ = data_util.load_data()

X = train_dev.positive - train_dev.negative
X = X.as_matrix()

## Cross validation

Cross validate the model by running it over various subsets of the input data.

In [3]:
from sklearn.model_selection import LeaveOneOut

loo = LeaveOneOut()

bug_idx = []
bug_confidence = []

for train_index, test_index in loo.split(X):
    X_train, X_test = X[train_index], X[test_index]
    
    size = len(X_train)
    
    y_train = np.ones(size)
    
    idx = np.ones(size, dtype=bool)
    idx[:int(size/2)] = False
    np.random.shuffle(idx)
    
    X_train[idx] = -X_train[idx]
    y_train[idx] = -y_train[idx]

    clf = svm.LinearSVC(C=1, fit_intercept=False)
    clf.fit(X_train, y_train)
    
    if clf.predict(X_test)[0] != 1:
        bug_idx.append(test_index[0])
        bug_confidence.append(clf.decision_function(X_test)[0])
        print("Found bug", test_index[0],)

Found bug 8
Found bug 9
Found bug 10
Found bug 19
Found bug 26
Found bug 32
Found bug 34
Found bug 54
Found bug 65
Found bug 66
Found bug 68
Found bug 70
Found bug 85
Found bug 87
Found bug 99
Found bug 113
Found bug 115
Found bug 116
Found bug 120
Found bug 142
Found bug 146
Found bug 150
Found bug 158
Found bug 165
Found bug 169
Found bug 176
Found bug 184
Found bug 205
Found bug 206
Found bug 210
Found bug 218
Found bug 221
Found bug 225
Found bug 230
Found bug 241
Found bug 242
Found bug 258
Found bug 265
Found bug 266
Found bug 267
Found bug 281
Found bug 283
Found bug 288
Found bug 290
Found bug 296
Found bug 297
Found bug 299
Found bug 302
Found bug 304
Found bug 310
Found bug 330
Found bug 331
Found bug 333
Found bug 344
Found bug 345
Found bug 348
Found bug 349
Found bug 350
Found bug 357
Found bug 359
Found bug 360
Found bug 367
Found bug 369
Found bug 377
Found bug 383
Found bug 392
Found bug 400
Found bug 404
Found bug 406
Found bug 422
Found bug 423
Found bug 424
Found bug

In [4]:
len(bug_idx)

139

## Process bugs

In [5]:
# get the indexes in the original data
bugs = train_dev.index[bug_idx]
bugs

Int64Index([ 125, 1003,  474,  464,  764,  299,  426,  418,  805,  902,
            ...
             469,  461,  896,  933,  515,  513, 1110,  468,  357,  508],
           dtype='int64', length=139)

In [6]:
pos_neg_data = data_util.load_neg_pos_data()

In [7]:
# TODO: generate better data

vals = [{
    'q1': round(random.normalvariate(2, 2), 3),
    'q2': round(random.normalvariate(2, 2), 3),
    'n': random.randint(0,7)
} for _ in range(10)]

In [8]:
bug_specs = []

for i, bi in enumerate(bugs):
    example = pos_neg_data[bi]
    
    negative = example.negative
    positive = example.positive
    
    negative['data'] = {
        'values': vals
    }
    positive['data'] = {
        'values': vals
    }
    bug_specs.append({
        'first': negative,
        'second': positive,
        'properties': {
            'confidence': bug_confidence[i],
            'source': example.source,
            'task': example.task
        }
    })

In [9]:
len(bug_specs)

139

In [10]:
with open('../data/spec_pairs/bugs.json', 'w') as f:
    json.dump({
        "headers": {
            "first": {
                "title": "Negative",
                "subtitle": "but was predicted as better"
            },
            "second": {
                "title": "Positive",
                "subtitle": "but was predicted as worse"
            }
        },
        "specs": bug_specs
    }, f, indent=2)