In [6]:
def gini_index(groups, classes):
    # count all samples at a splitting
    n = sum([len(group) for group in groups])
    # get weighted gini index for each group
    gini = 0
    for g in groups:
        size = len(g)
        # avoid divided by 0
        if size == 0:
            continue
        score = 0
        # score the group based on the score for each class
        for c in classes:
            p = [row[-1] for row in g].count(c) / float(size)
            score += p*p
        # weighted group score
        gini += (1 - score) * size / n
    return gini
# test Gini values
#print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
#print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))

In [3]:
# Split a dataset based on an attribute and an attribute value
def test_split(index, value, dataset):
    left, right = list(), list()
    for row in dataset:
        if row[index] < value:
            left.append(row)
        else:
            right.append(row)
    return left, right

In [4]:
# Select the best split point for a dataset
def get_split(dataset):
    class_values = list(set(row[-1] for row in dataset))
    b_index, b_value, b_score, b_groups = 999, 999, 999, None
    for index in range(len(dataset[0])-1):
        for row in dataset:
            groups = test_split(index, row[index], dataset)
            gini = gini_index(groups, class_values)
            if gini < b_score:
                b_index, b_value, b_score, b_groups = index, row[index], gini, groups
    return {'index':b_index, 'value':b_value, 'groups':b_groups}

In [7]:
dataset = [[2.771244718,1.784783929,0],
    [1.728571309,1.169761413,0],
    [3.678319846,2.81281357,0],
    [3.961043357,2.61995032,0],
    [2.999208922,2.209014212,0],
    [7.497545867,3.162953546,1],
    [9.00220326,3.339047188,1],
    [7.444542326,0.476683375,1],
    [10.12493903,3.234550982,1],
    [6.642287351,3.319983761,1]]
split = get_split(dataset)
print('Split: [X%d < %.3f]' % ((split['index']+1), split['value']))

Split: [X1 < 6.642]
