In [5]:
import numpy as np

In [89]:
class BinaryTree():
    '''
    Train Data:
    X in shape (m, n)
    m = num_samples, n = num_features,
    Only one_hot values, no multiclass or regression
    Features Must Not be single valued: e.g 
    >>> X = np.array([[0],[0],[0],[0],[0],[0],[0]])
    >>> y = np.array([ 0 , 1 , 0 , 0 , 1 , 1 , 0 ])
    '''
    class Node():

        def __init__(self, depth=0, parent=None):
            self.depth = depth
            self.parent = parent
            self.h = 1

    class Decisive(Node):

        def __init__(self, feature, depth=0, truth=None, parent=None):
            self.feature = feature
            self.children = {}
            self.truth = truth
            super().__init__(depth=depth, parent=parent)
            


    class Root(Decisive):
        
        def __init__(self,feature):
            super().__init__(feature=feature, truth=None)

    class Decision(Decisive):
        
        def __init__(self, feature, parent, depth=0, truth=None):
            super().__init__(feature=feature, depth=depth, truth=truth, parent=parent)

    class Leaf(Node):
        
        def __init__(self, parent, confidence=1, depth=1):
            self.confidence = confidence
            super().__init__(depth=depth, parent=parent)
            

    def __init__(self, threshold=1, depth=3):
        self.threshold = np.clip(threshold, 0, 1)
        self.max_depth=np.maximum(1,depth)
        self.root  = None
        self.current_feature = None
        self.X = None
        self.y = None


    def H(self, x):
        if not x:
            return 0
        if x==1:
            return 0
        return -x*np.log2(x) - (1-x) * np.log2(1-x)
    
    def split(self, X, feature):
        '''
        Returns
        ======================
        feature=True, feature=False
        '''
        return X[X[:,feature]==self.current_feature.truth], X[X[:,feature]!=self.current_feature.truth]
        
    def information_gain(self, X, y, feature):
        '''
        Return 
        =================
        Information gain, whether to flip positive and negative
        '''
        if y.size == 0:
            raise KeyError('Y got size of zero')
        
        X = np.concatenate([X,y.reshape(-1,1)], axis=-1)

        tot = X.shape[0]
        
        positive, negative = self.split(X,feature)
        
        w1, w0 = positive.shape[0]/tot ,negative.shape[0]/tot
        
        p1, p0 =  (positive[:,-1]==1).mean(), (negative[:-1]==1).mean()
        
        return self.H(np.mean(y)) - (w1 * self.H(p1) + w0 * self.H(p0))

    def get_best(self, X :np.ndarray,y :np.ndarray) -> int:
        '''
        Get the best feature to split on
        
        Returns :
        _________________________________
        
        best_feature_column
        '''
        best = 0
        max = 0
        for i in range(X.shape[-1]):
            self.current_feature.truth =  int((np.sum(X[:,i]==y)) >= np.sum(X[:,i]!=y))
            x= self.information_gain(X,y,i)
            print('Information Gain of Feature', i, ' : ', x)
            if best < x:
                best = x
                max = i
        self.current_feature.truth =  int((np.sum(X[:,max]==y)) >= np.sum(X[:,max]!=y))
        return max


    def train(self, X, y, node : Node= None):

        if node is None:
            print('Initializing Root')
            node = self.Root(feature=None)
            self.root =  node
        self.current_feature = node
        print('Current data\n', X, '\n', y)
        print('Getting Best feature')
        best_feature = self.get_best(X, y)
        self.current_feature.feature = best_feature
        print('Retrieved best feature : ',best_feature)


        # end_conditions = False        #Calc p
        # if node is not None:
        #     child = int(len(node.parent.children) == 0)
        #     print('Poitive : ', child)
        #     p = y.mean() if child else 1 - y.mean()
        #     print('Confidence', p)
        #     if p >= self.threshold:
        #         return self.Leaf(node.parent, confidence=p, depth=node.depth)
        
        
        # print(flip)
        # print(best_feature)
        
    
        tmp = X[:,best_feature]==self.current_feature.truth
        tmp_bar = X[:,best_feature]!=self.current_feature.truth
        positive, negative = X[tmp], X[tmp_bar]
        pos_y, neg_y = y[tmp], y[tmp_bar]
        print('Positive split : \n', positive,'\n', pos_y)
        print('Negative split : \n', negative,'\n', neg_y)
   
        for i, (X,y) in enumerate(([positive, pos_y], [negative, neg_y])):
            print(f'Setting Child {i} of Depth' ,node.depth+1)
            node.children[i] = self.train(X=np.delete(X, best_feature, axis=-1), y=y,node=self.Decisive(feature=None,depth=node.depth+1, parent=node))
        return node
    
    def pretty_print(self, node=None):

        
        if node is None:
            node = self.root
        print('='+'======'*node.depth+'>', 'Depth :', node.depth,'    |||     Entropy : ', node.h, end='')
        if type(node) == self.Leaf:
            print('   |||    Confidence', node.confidence)
            return
        else:
            print('   |||    Feature', node.feature)
        for child in node.children.values():
            self.pretty_print(child)
        
    def predict(self, X, node=None):

        if node is None:
            node = self.root
         


In [90]:
features = np.random.randint(size =(10,3), low=0, high=2)
labels = np.array([1,1,1,1,1,0,0,0,0,0])
features = np.array([
    [0,0,0,0,0,0,1,1,1,1],
    [0,0,0,0,0,1,1,1,1,0],
    [0,0,0,0,0,0,1,1,1,1],
    [0,1,0,0,1,1,0,1,1,1],
    [1,1,0,0,1,1,1,0,1,1]]).T
features, labels

(array([[0, 0, 0, 0, 1],
        [0, 0, 0, 1, 1],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 1, 1],
        [0, 1, 0, 1, 1],
        [1, 1, 1, 0, 1],
        [1, 1, 1, 1, 0],
        [1, 1, 1, 1, 1],
        [1, 0, 1, 1, 1]]),
 array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0]))

In [94]:
tree1 = BinaryTree()
node = tree1.train(X=features, y=labels)

Initializing Root
Current data
 [[0 0 0 0 1]
 [0 0 0 1 1]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 1 1]
 [0 1 0 1 1]
 [1 1 1 0 1]
 [1 1 1 1 0]
 [1 1 1 1 1]
 [1 0 1 1 1]] 
 [1 1 1 1 1 0 0 0 0 0]
Getting Best feature
Information Gain of Feature 0  :  0.26902447555119613
Information Gain of Feature 1  :  0.224355041087696
Information Gain of Feature 2  :  0.26902447555119613
Information Gain of Feature 3  :  0.09291839354354581
Information Gain of Feature 4  :  0.030758007896897643
Retrieved best feature :  0
Positive split : 
 [[0 0 0 0 1]
 [0 0 0 1 1]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 1 1]
 [0 1 0 1 1]] 
 [1 1 1 1 1 0]
Negative split : 
 [[1 1 1 0 1]
 [1 1 1 1 0]
 [1 1 1 1 1]
 [1 0 1 1 1]] 
 [0 0 0 0]
Setting Child 0 of Depth 1
Current data
 [[0 0 0 1]
 [0 0 1 1]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 1 1]
 [1 0 1 1]] 
 [1 1 1 1 1 0]
Getting Best feature
Information Gain of Feature 0  :  nan
Information Gain of Feature 1  :  nan
Information Gain of Feature 2  :  0.16454712442101982
Information Gain of Fe

  p1, p0 =  (positive[:,-1]==self.current_feature.truth).mean(), (negative[:-1]==self.current_feature.truth).mean()


IndexError: index 0 is out of bounds for axis 1 with size 0

In [96]:
features, labels

(array([[0, 0, 0, 0, 1],
        [0, 0, 0, 1, 1],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 1, 1],
        [0, 1, 0, 1, 1],
        [1, 1, 1, 0, 1],
        [1, 1, 1, 1, 0],
        [1, 1, 1, 1, 1],
        [1, 0, 1, 1, 1]]),
 array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0]))

In [98]:
tree2 = BinaryTree()
node = BinaryTree.Root(feature=None)
tree2.root =  node
tree2.current_feature = node
tree2.get_best(features, labels)

Information Gain of Feature 0  :  0.26902447555119613
Information Gain of Feature 1  :  0.224355041087696
Information Gain of Feature 2  :  0.26902447555119613
Information Gain of Feature 3  :  0.09291839354354581
Information Gain of Feature 4  :  0.030758007896897643


0

In [58]:
tree1.pretty_print()

=> Depth : 0     |||     Entropy :  1   |||    Feature 0


In [24]:
tree1.predict(np.array([[1,1,1,1,0,1,1]]))