In [1]:
%run header.ipynb

This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.
Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.
You can install the OpenMP library by the following command: ``brew install libomp``.
Using TensorFlow backend.


In [2]:
class MyModel:
    verbose = 0
    def __init__(self, params={}):
        self.params = params
    def fit(self, train, train_answers):
        pass
    def predict(self, X):
        return None

dummy_model = MyModel()
dummy_model.verbose

0

In [3]:
def area(box):
    return (box[2] - box[0]) * (box[3] - box[1])

def intersection_over_union(boxes):
    assert(len(boxes) == 8)
    boxA = boxes[:4]
    boxB = boxes[4:]
    
    boxAArea = area(boxA)
    boxBArea = area(boxB)
    
    if (boxAArea == 0 or boxBArea == 0):
        return 0
        
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])

    interArea = max(0, xB - xA) * max(0, yB - yA)

    
    iou = interArea / float(boxAArea + boxBArea - interArea)
    return iou

def get_score(actual, pred):
    return pred.merge(actual, on=["itemId"])[['Xmin', 'Ymin', 'Xmax', 'Ymax', 'Xmin_true',\
      'Ymin_true', 'Xmax_true', 'Ymax_true']].apply(lambda x: intersection_over_union(x.values), axis=1).mean()

def cross_validation(model, data, answers, n_folds=5, n_iters=5, seed=2707, verbose=1, split_by='itemId'):
    scores = []
    if split_by:
        ids = data[split_by].unique()
    else:
        ids = np.arange(data.shape[0])
        
    for iter in range(n_iters):
        kf = KFold(n_splits=n_folds, random_state=seed+iter, shuffle=True)
        print(kf)
        fold = 0
        for train_index, test_index in kf.split(ids):
            fold += 1
            if verbose >= 2:
                print('Prepare data: %s' % datetime.datetime.now())
                
            if split_by:
                data_train = data[data[split_by].isin(ids[train_index])]
                data_test = data[data[split_by].isin(ids[test_index])]
            else:
                data_train = data.iloc[train_index,:]
                data_test = data.iloc[test_index,:]

            if verbose >= 2:
                print('Fit: %s' % datetime.datetime.now())
            model.fit(data_train, answers[answers.itemId.isin(data_train.itemId)])
            if verbose >= 2:
                print('Predict: %s' % datetime.datetime.now())

            pred = model.predict(data_test)
            score = get_score(answers[answers.itemId.isin(data_test.itemId)], pred)
            scores.append(score)

            #if verbose >= 1:
            print('%2d -%2d : %1.4f, mean=%1.4f' % (iter, fold, score, np.mean(scores)))
    return np.mean(scores)