In [5]:
import numpy as np

class DecisionTree:
    def __init__(self, max_depth=None, min_samples_split=2, criterion='gini'):
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.criterion = gini_impurity if criterion == 'gini' else entropy
        self.root = None

    def fit(self, X, y):
        self.root = self._grow_tree(X, y)

    def _grow_tree(self, X, y, depth=0):
        num_samples, num_features = X.shape
        if num_samples >= self.min_samples_split and (self.max_depth is None or depth < self.max_depth):
            best_feature, best_threshold = self._best_split(X, y)
            if best_feature is not None:
                left_idxs, right_idxs = self._split(X[:, best_feature], best_threshold)
                left = self._grow_tree(X[left_idxs, :], y[left_idxs], depth + 1)
                right = self._grow_tree(X[right_idxs, :], y[right_idxs], depth + 1)
                return Node(feature=best_feature, threshold=best_threshold, left=left, right=right)
        return Node(value=most_common_label(y))

    def _best_split(self, X, y):
        num_samples, num_features = X.shape
        if num_samples <= 1:
            return None, None

        best_gain = -1
        best_feature, best_threshold = None, None

        for feature in range(num_features):
            thresholds, classes = zip(*sorted(zip(X[:, feature], y)))
            num_left = [0] * len(set(y))
            num_right = np.bincount(classes)

            for i in range(1, num_samples):
                c = classes[i - 1]
                num_left[c] += 1
                num_right[c] -= 1

                gain = self._information_gain(y, num_left, num_right, i)

                if thresholds[i] == thresholds[i - 1]:
                    continue

                if gain > best_gain:
                    best_gain = gain
                    best_feature = feature
                    best_threshold = (thresholds[i] + thresholds[i - 1]) / 2

        return best_feature, best_threshold

    def _information_gain(self, y, num_left, num_right, split_idx):
        num = len(y)
        p_left = split_idx / num
        p_right = 1 - p_left
        impurity = self.criterion(y)
        left_impurity = self.criterion(num_left)
        right_impurity = self.criterion(num_right)
        return impurity - (p_left * left_impurity + p_right * right_impurity)

    def _split(self, X_column, split_threshold):
        left_idxs = np.argwhere(X_column <= split_threshold).flatten()
        right_idxs = np.argwhere(X_column > split_threshold).flatten()
        return left_idxs, right_idxs

    def predict(self, X):
        return np.array([self._predict(inputs) for inputs in X])

    def _predict(self, inputs):
        node = self.root
        while node.value is None:
            if inputs[node.feature] <= node.threshold:
                node = node.left
            else:
                node = node.right
        return node.value
import numpy as np

class RandomForest:
    def __init__(self, n_estimators=100, max_depth=None, min_samples_split=2, criterion='gini'):
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.criterion = criterion
        self.trees = []

    def fit(self, X, y):
        self.trees = []
        for _ in range(self.n_estimators):
            tree = DecisionTree(max_depth=self.max_depth, min_samples_split=self.min_samples_split, criterion=self.criterion)
            X_sample, y_sample = self._bootstrap_sample(X, y)
            tree.fit(X_sample, y_sample)
            self.trees.append(tree)

    def _bootstrap_sample(self, X, y):
        n_samples = X.shape[0]
        indices = np.random.choice(n_samples, n_samples, replace=True)
        return X[indices], y[indices]

    def predict(self, X):
        tree_predictions = np.array([tree.predict(X) for tree in self.trees])
        return np.swapaxes(tree_predictions, 0, 1).mean(axis=1).round().astype(int)
    # Generate some example data
np.random.seed(0)
X = np.random.rand(100, 2)
y = (X[:, 0] + X[:, 1] > 1).astype(int)

# Initialize and train the random forest model
rf_model = RandomForest(n_estimators=10, max_depth=3)
rf_model.fit(X, y)

# Make predictions
predictions = rf_model.predict(X)

# Print the accuracy
accuracy = np.mean(predictions == y)
print(f"Accuracy: {accuracy:.2f}")



NameError: name 'gini_impurity' is not defined