# Decision Tree

In this notebook, we will introduce one of the most popular supervised machine learning algorithms called decision tree. It can be used for both regression and classification algorithm. Here we mainly discuss decision tree from classification perspective.

## Introduction 


## Building Tree Model

## Splitting Data into Different Regions

## How to Choose Best Split?

## Decision Tree Algorithm

In [2]:
import numpy as np
import matplotlib.pylab as plt
import seaborn as sns

from abc import ABCMeta, abstractclassmethod

%matplotlib inline

$$H(S)=-\sum_{c \in C}p(c)log_2\big(p(c)\big)$$

In [9]:
def entropy(x):
    # remove all zero and negative values
    x = x[x > 0]
    return -np.sum(x * np.log2(x))

$$I = H(S) - \sum_{i \in (1, 2)}\frac{S^{i}}{S}H(S^{i})$$

In [19]:
def expected_entropy(s_i, s_size):
    s_i_size = s_i.shape[0]
    if s_i_size == 0:
        return 0.0
    return ((s_i_size) / s_size) * entropy(s_i)
    
def information_gain(x, cutoff_index):
    x_size = x.shape[0]
    left = expected_entropy(x[:cutoff_index], x_size)
    right = expected_entropy(x[cutoff_index:], x_size)
    return entropy(x) - (left + right)

In [23]:
class Node(object):
    def __init__(self, id=None, description=None):
        self._id = id
        self._description = description
        
    @property
    def id(self):
        return self._id
    
    @id.setter
    def id(self, value):
        self._id = value
        
    @property
    def description(self):
        return self._description
    
    @description.setter
    def description(self, value):
        self._description = value  
        
class Leaf(Node):
    def __init__(self, values, n_classes, id=None, description=None):
        Node.__init__(id, description)
        self._values = values
        
    @property
    def values(self):
        return self._values
    
    @values.setter
    def values(self, value):
        self._values = value
        
class Internal(Node):
    def __init__(self, dim, threshold, left_child, right_child, node_id=None, description=None):
        Node.__init__(id, description)
        self._dim = dim
        self._threshold = threshold
        self._left_child = left_child
        self._right_child = right_child
        
    @property
    def dim(self):
        return self._dim
        
    @dim.setter
    def dim(self, value):
        self._dim = value
        
    @property
    def threshold(self):
        return self._threshold
    
    @threshold.setter
    def threshold(self, value):
        self._threshold = threshold
        
    @property
    def left_child(self):
        return self._left_child
    
    @left_child.setter
    def left_child(self, value):
        self._left_child = value
        
    @property
    def right_child(self):
        return self._right_child
    
    @right_child.setter
    def right_child(self, value):
        self._right_child = value

In [None]:
class BaseTree(metaclass=ABCMeta):
    def __init__(self, max_depth=None, n_min_leaf=2, n_trials=None):
        pass
    
    def fit(self, X_train, y_train):
        pass
    
    def prefict(self, X_test):
        pass
    