In [2]:
import numpy as np

In [3]:


# Define the size of the original dataset
n = 10000  # You can change this to the size of your dataset

# Number of Monte Carlo simulations
num_simulations = 10000  # You can adjust this number

# Initialize a count variable to keep track of the data points included
count_included = 0

# Perform Monte Carlo simulations
for _ in range(num_simulations):
    # Generate a random bootstrapped sample by sampling with replacement
    bootstrap_sample = np.random.choice(range(n), size=n, replace=True)
    
    # Count the unique data points in the bootstrapped sample
    unique_data_points_in_sample = len(np.unique(bootstrap_sample))
    
    # Calculate the percentage of unique data points in the sample
    percentage_included = unique_data_points_in_sample / n
    
    # Add to the count if the percentage is close to 0.632 (63.2%)
    if 0.62 <= percentage_included <= 0.64:
        count_included += 1

# Calculate the estimated probability of including about 63% of the data
estimated_probability = count_included / num_simulations

print(f"Estimated probability of including 63% of data: {estimated_probability}")


Estimated probability of including 63% of data: 0.9955


In [8]:
n = 10
x = np.random.choice(range(n), size=n, replace=True)
print(f"with replace {x} by definition will have duplicates")
x = np.random.choice(range(n), size=n, replace=False)
print(f"without replace {x}")

with replace [1 4 0 0 9 5 8 1 5 5] by definition will have duplicates
without replace [2 0 4 1 3 6 8 9 5 7]


In [9]:
import numpy as np

class AdaBoost:
    def __init__(self, n_estimators=50):
        self.n_estimators = n_estimators
        self.alphas = []
        self.stumps = []
        
    def fit(self, X, y):
        m, n = X.shape
        w = np.ones(m) / m  # Initialize sample weights
        
        for _ in range(self.n_estimators):
            stump = self._fit_stump(X, y, w)
            stump_error = self._stump_error(stump, X, y, w)
            
            # Avoid division by zero
            alpha = 0.5 * np.log((1 - stump_error) / max(stump_error, 1e-10))
            
            # Update sample weights
            w *= np.exp(-alpha * y * stump.predict(X))
            w /= np.sum(w)
            
            # Save alpha and stump
            self.alphas.append(alpha)
            self.stumps.append(stump)
            
    def predict(self, X):
        preds = np.zeros(X.shape[0])
        for alpha, stump in zip(self.alphas, self.stumps):
            preds += alpha * stump.predict(X)
        return np.sign(preds)
    
    def _fit_stump(self, X, y, w):
        m, n = X.shape
        best_stump = None
        min_error = np.inf
        
        for feature in range(n):
            unique_values = np.unique(X[:, feature])
            for value in unique_values:
                for sign in [1, -1]:
                    stump = Stump(feature, value, sign)
                    error = self._stump_error(stump, X, y, w)
                    
                    if error < min_error:
                        min_error = error
                        best_stump = stump
        
        return best_stump
    
    def _stump_error(self, stump, X, y, w):
        return np.sum(w * (stump.predict(X) != y))

class Stump:
    def __init__(self, feature, value, sign):
        self.feature = feature
        self.value = value
        self.sign = sign
        
    def predict(self, X):
        return self.sign * (X[:, self.feature] > self.value)

# Example usage:
if __name__ == "__main__":
    from sklearn.datasets import make_classification
    from sklearn.metrics import accuracy_score

    # Generate a synthetic dataset
    X, y = make_classification(n_samples=100, n_features=2, n_informative=2, n_redundant=0, random_state=42)
    
    # Initialize and train AdaBoost
    adaboost = AdaBoost(n_estimators=50)
    adaboost.fit(X, y)
    
    # Make predictions
    y_pred = adaboost.predict(X)
    
    # Calculate accuracy
    accuracy = accuracy_score(y, y_pred)
    print(f"Accuracy: {accuracy:.2f}")


Accuracy: 0.96
