In [4]:
import numpy as np

In [370]:
class BinaryTree():

    class Root():
        def __init__(self,feature):
            self.depth = 0
            self.positive = None
            self.negative = None
            self.feature = feature
            self.nochild = False


    class Decision(Root):
        def __init__(self, feature=0, parent=None, depth=0):
            self.parent = parent
            self.depth = depth
            super().__init__(feature=feature)

    class Leaf():
        def __init__(self, parent, confidence=1, depth=0):
            self.parent = parent
            self.confidence = confidence
            self.depth = depth

    def __init__(self, threshold=1, depth=3):
        self.threshold = np.clip(threshold, 0, 1)
        self.max_depth=np.maximum(1,depth)
        self.root  = None
        self.current_feature = None

    def H(self, x):
        if not x:
            return 0
        if x==1:
            return 0
        return -x*np.log2(x) - (1-x) * np.log2(1-x)
    
    def split(self, data, feature):
        '''
        Returns
        ======================
        feature=True, feature=False
        '''
        return data[...,data[feature]==1], data[...,data[feature]==0]

    def p_w(self, positive, negative):
        '''
        Returns
        ======================
        positive weight, positive p, negative weight, negative p
        '''
        tot = positive.shape[-1] + negative.shape[-1]
        return positive.shape[-1]/tot , positive[-1].mean(), negative.shape[-1]/tot, negative[-1].mean()
    
    def information_gain(self, data, feature):
        '''
        Return 
        =================
        weight_1, prob_1, wight_0, prob_0
        '''
        w1, p1, w0, p0 = self.p_w(*self.split(data,feature))
        return self.H(np.mean(data[-1])) - (w1 * self.H(p1) + w0 * self.H(p0))

    def get_best(self, data):
        '''
        Get the best feature to split on.
        '''
        best = 0
        max = 0
        for i in range(data.shape[0]-1):
            x = self.information_gain(data,i)
            if best < x:
                best = x
                max = i
        return max


    def train(self, data, parent = Root(None), pos=None):
        
        
        message = None
        p=0
        print(data.shape)
        if data.shape[0] == 1 and data.shape[1] == 0:
            message = 'No more Features'
        else:
            prob = data[-1].mean()
            p = self.H(prob)        
            print('Current Node Confidence', p)
        if p >= self.threshold :
            if pos is None:
                self.root = self.Root(None)
                self.root.nochild = True
                print('No Children')
                return 0
            message = 'Threshold met at '
        elif parent.depth == self.max_depth:
            message = 'Deepest root met'
        
        if message is not None:
            if pos:
                self.current_feature.positive = self.Leaf(self.current_feature, confidence=p, depth=parent.depth+1)
            else:
                self.current_feature.negative = self.Leaf(self.current_feature, confidence=p, depth=parent.depth+1)
            print(message, pos,'Depth :', parent.depth+1)
            return 0

        current_feature = self.get_best(data)
        if self.root is None:
            self.root = self.Root(feature=current_feature)
            self.current_feature = self.root
        else:
            self.current_feature = self.Decision(current_feature, parent=parent, depth=parent.depth+1)
            if pos:
                parent.positive = self.current_feature
            else:
                parent.negative = self.current_feature
        

        positive, negative = self.split(data,current_feature)


        print(f' Positive:\n{positive}', f' Negative:\n{negative}', f'Current Depth = {self.current_feature.depth}', f'Feature : {self.current_feature.feature}', sep='\n')


        positive = np.delete(positive,np.s_[current_feature],0)
        negative = np.delete(negative,np.s_[current_feature],0)

        self.train( positive, parent=self.current_feature, pos=True)
        self.train( negative, parent=self.current_feature, pos=False)

    def predict(self, data, node=None):

         
        if node is None:
            data = np.concatenate([data, np.arange(data.shape[-1]).reshape(1,-1)], axis=0)
            node = self.root
        if type(node) == self.Leaf:
            print('End leaf met at depth', node.depth, data)
            return data
        elif (type(node) == self.Root and node.nochild):
            return data[:-1]
        positive, negative = self.split(data, node.feature)
        print(positive, negative)
        pos = []
        neg = []
        pos.append(self.predict(positive, node.positive))
        neg.append(self.predict(negative, node.negative))
        return pos,neg


In [371]:
features = np.random.randint(size =(3,10), low=0, high=2)
labels = np.array([1,1,1,1,1,0,0,0,0,0])
features = np.array([[0,0,0,0,0,1,1,1,1,1]])
data = np.concatenate([features, labels.reshape(1,-1)] , axis=0)
data

array([[0, 0, 0, 0, 0, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]])

In [373]:
tree1 = BinaryTree()
tree1.train(data)

(2, 10)
Current Node Confidence 1.0
No Children


0

In [367]:
tree1.predict(np.array([[1,1,1,1,0,1,1]]))

[[1 1 1 1 1 1]
 [0 1 2 3 5 6]] [[0]
 [4]]
End leaf met at depth 1 [[1 1 1 1 1 1]
 [0 1 2 3 5 6]]
End leaf met at depth 1 [[0]
 [4]]


([array([[1, 1, 1, 1, 1, 1],
         [0, 1, 2, 3, 5, 6]])],
 [array([[0],
         [4]])])

In [156]:
c1, c2 = list(tree1.root.__dict__.values())[1:-1]

In [161]:
c1.confidence

1.0

In [162]:
c2.confidence

0.0