In [None]:
import numpy as np

class DecisionTree:
    def __init__(self):
        self.tree = None

    def fit(self, X, y):
        self.tree = self._build_tree(X, y)

    def _calculate_entropy(self, y):
        classes = np.unique(y)
        entropy = 0
        for cls in classes:
            p_cls = np.sum(y == cls) / len(y)
            entropy -= p_cls * np.log2(p_cls)
        return entropy

    def _calculate_information_gain(self, X, y, feature_idx):
        entropy_parent = self._calculate_entropy(y)
        unique_values = np.unique(X[:, feature_idx])
        entropy_children = 0
        for value in unique_values:
            child_indices = np.where(X[:, feature_idx] == value)[0]
            child_entropy = self._calculate_entropy(y[child_indices])
            entropy_children += (len(child_indices) / len(y)) * child_entropy
        information_gain = entropy_parent - entropy_children
        return information_gain

    def _find_best_split(self, X, y):
        best_gain = 0
        best_feature = None
        num_features = X.shape[1]
        for feature_idx in range(num_features):
            gain = self._calculate_information_gain(X, y, feature_idx)
            if gain > best_gain:
                best_gain = gain
                best_feature = feature_idx
        return best_feature

    def _build_tree(self, X, y):
        if len(np.unique(y)) == 1:
            return {'class': y[0]}
        if X.shape[1] == 0:
            return {'class': np.argmax(np.bincount(y))}
        best_feature = self._find_best_split(X, y)
        unique_values = np.unique(X[:, best_feature])
        node = {'feature': best_feature, 'children': {}}
        for value in unique_values:
            child_indices = np.where(X[:, best_feature] == value)[0]
            child_X = X[child_indices]
            child_y = y[child_indices]
            node['children'][value] = self._build_tree(child_X, child_y)
        return node

    def predict(self, X):
        predictions = []
        for sample in X:
            predictions.append(self._traverse_tree(sample, self.tree))
        return np.array(predictions)

    def _traverse_tree(self, sample, node):
        if 'class' in node:
            return node['class']
        feature_value = sample[node['feature']]
        if feature_value not in node['children']:
            return np.argmax(np.bincount(list(node['children'].values())))
        return self._traverse_tree(sample, node['children'][feature_value])

# Example usage
if __name__ == "__main__":
    # Load data
    X = np.load('extracted_features.npy')
    y = np.load('labels.npy')

    # Initialize and train Decision Tree
    dt = DecisionTree()
    dt.fit(X, y)

    # Predict
    predictions = dt.predict(X)
    print(predictions)
