In [1]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

# Function to create bootstrap samples
def bootstrap_sample(X, y):
    n_samples = X.shape[0]
    indices = np.random.choice(n_samples, n_samples, replace=True)
    return X[indices], y[indices]

# Function to create a random forest using DecisionTreeClassifier
def random_forest(X, y, n_trees=10, max_depth=None, min_samples_split=2):
    forest = []
    for _ in range(n_trees):
        X_sample, y_sample = bootstrap_sample(X, y)
        tree = DecisionTreeClassifier(max_depth=max_depth, min_samples_split=min_samples_split)
        tree.fit(X_sample, y_sample)
        forest.append(tree)
    return forest

# Function to make predictions using the random forest
def predict_forest(forest, X):
    # Get predictions from all trees
    tree_predictions = np.array([tree.predict(X) for tree in forest])
    # Take the majority vote across all trees
    final_predictions = [np.bincount(tree_predictions[:, i]).argmax() for i in range(X.shape[0])]
    return np.array(final_predictions)

# Example dataset
X, y = make_classification(n_samples=100, n_features=5, n_informative=3, n_redundant=0, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the random forest
forest = random_forest(X_train, y_train, n_trees=10, max_depth=5, min_samples_split=2)

# Make predictions
predictions = predict_forest(forest, X_test)
print("Predictions:", predictions)

# Calculate accuracy
accuracy = np.mean(predictions == y_test)
print("Accuracy:", accuracy)


Predictions: [0 0 0 0 1 1 1 0 0 0 1 0 0 0 1 0 0 1 1 1]
Accuracy: 0.8
