In [15]:
import numpy as np
from collections import Counter

In [16]:
class DecisionTree:
    def __init__(self, max_depth=None, min_samples_split=2):
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.tree = None

    def fit(self, X, y):

        #Build the decision tree recursively.

        self.tree = self._grow_tree(X, y, depth=0)

    def _grow_tree(self, X, y, depth):

        #Recursively build the decision tree.

        num_samples, num_features = X.shape

        # Stopping criteria
        if depth >= self.max_depth or num_samples < self.min_samples_split or len(set(y)) == 1:
            return Counter(y).most_common(1)[0][0]  # Return most common label

        # Find best split
        best_feature, best_threshold = self._best_split(X, y)

        if best_feature is None:
            return Counter(y).most_common(1)[0][0]

        left_mask = X[:, best_feature] < best_threshold
        right_mask = ~left_mask

        left_subtree = self._grow_tree(X[left_mask], y[left_mask], depth + 1)
        right_subtree = self._grow_tree(X[right_mask], y[right_mask], depth + 1)

        return (best_feature, best_threshold, left_subtree, right_subtree)

    def _best_split(self, X, y):

        #Find the best feature and threshold to split on.

        best_gain = -1
        best_feature, best_threshold = None, None

        for feature in range(X.shape[1]):
            thresholds = np.unique(X[:, feature])
            for threshold in thresholds:
                left_mask = X[:, feature] < threshold
                right_mask = ~left_mask

                if np.sum(left_mask) == 0 or np.sum(right_mask) == 0:
                    continue

                gain = self._information_gain(y, y[left_mask], y[right_mask])

                if gain > best_gain:
                    best_gain = gain
                    best_feature = feature
                    best_threshold = threshold

        return best_feature, best_threshold

    def _information_gain(self, y, y_left, y_right):

        #Compute information gain.

        def entropy(y):
            counts = np.bincount(y)
            probs = counts / len(y)
            return -np.sum([p * np.log2(p) for p in probs if p > 0])

        return entropy(y) - (len(y_left) / len(y) * entropy(y_left) + len(y_right) / len(y) * entropy(y_right))

    def predict(self, X):

        #Predict class labels for input samples.

        return np.array([self._predict_one(x, self.tree) for x in X])

    def _predict_one(self, x, tree):

      #Recursively traverses the decision tree to predict a single sample.

      if not isinstance(tree, tuple):  # If it's a leaf node, return the class directly
        return tree
      feature, threshold, left_subtree, right_subtree = tree
      if x[feature] < threshold:
        return self._predict_one(x, left_subtree)
      else:
        return self._predict_one(x, right_subtree)

In [17]:
class RandomForest:
    def __init__(self, n_trees=10, max_depth=None, min_samples_split=2):
        self.n_trees = n_trees
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.trees = []

    def fit(self, X, y):

        # Train multiple decision trees on bootstrap samples.

        for _ in range(self.n_trees):
            indices = np.random.choice(len(X), len(X), replace=True)
            X_sample, y_sample = X[indices], y[indices]
            tree = DecisionTree(max_depth=self.max_depth, min_samples_split=self.min_samples_split)
            tree.fit(X_sample, y_sample)
            self.trees.append(tree)

    def predict(self, X):

        # Predict class labels using majority voting.

        predictions = np.array([tree.predict(X) for tree in self.trees])
        return np.array([Counter(pred).most_common(1)[0][0] for pred in predictions.T])

    def evaluate(self, y_true, y_pred):

        # Compute accuracy.

        accuracy = np.mean(y_true == y_pred)
        return {"accuracy": accuracy}



In [18]:
# Example usage
if __name__ == "__main__":
    # Generate synthetic dataset
    np.random.seed(42)
    X1 = np.random.randn(50, 2) + np.array([2, 2])
    X2 = np.random.randn(50, 2) + np.array([-2, -2])
    X = np.vstack((X1, X2))
    y = np.hstack((np.zeros(50, dtype=int), np.ones(50, dtype=int)))  # Two classes: 0 and 1

    model = RandomForest(n_trees=10, max_depth=5)
    model.fit(X, y)
    predictions = model.predict(X)

    metrics = model.evaluate(y, predictions)

    print("Predictions:", predictions)
    print("Metrics:", metrics)

Predictions: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
Metrics: {'accuracy': 1.0}
