In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

# Load the dataset
data = pd.read_csv('sample_data/income.csv')

# Define features and target variable
X = data.drop(columns=['income_level'])
y = data['income_level']

# Split the dataset into 80% training and 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build AdaBoost model (default base estimator is a DecisionTreeClassifier with max_depth=1)
ada_model = AdaBoostClassifier(n_estimators=50, learning_rate=1.0, random_state=42)

# Train the model
ada_model.fit(X_train, y_train)

# Predict on test set
y_pred = ada_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

# Print results
print(f"Accuracy: {accuracy * 100:.2f}%")
print("Confusion Matrix:")
print(conf_matrix)

Accuracy: 83.27%
Confusion Matrix:
[[7003  411]
 [1223 1132]]


In [2]:
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Load Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split dataset into 80% training and 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define n_estimators values and learning rates for fine-tuning
n_estimators_values = [50, 100, 150]
learning_rates = [0.1, 0.5, 1.0]

# Results with DecisionTreeClassifier as base estimator
print("Results with DecisionTreeClassifier as base estimator:")
for n_estimators in n_estimators_values:
    for learning_rate in learning_rates:
        ada_boost_dt = AdaBoostClassifier(
            n_estimators=n_estimators,
            learning_rate=learning_rate,
            random_state=42
        )
        ada_boost_dt.fit(X_train, y_train)
        y_pred = ada_boost_dt.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        print(f"n_estimators={n_estimators}, learning_rate={learning_rate} -> Accuracy: {accuracy * 100:.2f}%")

# Results with LogisticRegression as base estimator
# Using Logistic Regression as a weak learner in AdaBoost is not directly supported, so we will skip this
print("\nLogistic Regression cannot be used as a base estimator directly in AdaBoost.")

Results with DecisionTreeClassifier as base estimator:
n_estimators=50, learning_rate=0.1 -> Accuracy: 100.00%
n_estimators=50, learning_rate=0.5 -> Accuracy: 96.67%
n_estimators=50, learning_rate=1.0 -> Accuracy: 93.33%
n_estimators=100, learning_rate=0.1 -> Accuracy: 100.00%
n_estimators=100, learning_rate=0.5 -> Accuracy: 100.00%
n_estimators=100, learning_rate=1.0 -> Accuracy: 93.33%
n_estimators=150, learning_rate=0.1 -> Accuracy: 100.00%
n_estimators=150, learning_rate=0.5 -> Accuracy: 96.67%
n_estimators=150, learning_rate=1.0 -> Accuracy: 93.33%

Logistic Regression cannot be used as a base estimator directly in AdaBoost.


In [3]:
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Load the iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split into 80% train and 20% test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Hyperparameter values
n_estimators_values = [50, 100, 150]
learning_rates = [0.1, 0.5, 1.0]

# Using DecisionTreeClassifier
print("Results with DecisionTreeClassifier as base estimator:")
for n in n_estimators_values:
    for lr in learning_rates:
        model = AdaBoostClassifier(
            estimator=DecisionTreeClassifier(max_depth=1),
            n_estimators=n,
            learning_rate=lr,
            random_state=42
        )
        model.fit(X_train, y_train)
        preds = model.predict(X_test)
        acc = accuracy_score(y_test, preds)
        print(f"n_estimators={n}, learning_rate={lr} -> Accuracy: {acc * 100:.2f}%")

# Using LogisticRegression
print("\nResults with LogisticRegression as base estimator:")
for n in n_estimators_values:
    for lr in learning_rates:
        model = AdaBoostClassifier(
            estimator=LogisticRegression(max_iter=1000, random_state=42),
            n_estimators=n,
            learning_rate=lr,
            random_state=42
        )
        model.fit(X_train, y_train)
        preds = model.predict(X_test)
        acc = accuracy_score(y_test, preds)
        print(f"n_estimators={n}, learning_rate={lr} -> Accuracy: {acc * 100:.2f}%")

Results with DecisionTreeClassifier as base estimator:
n_estimators=50, learning_rate=0.1 -> Accuracy: 100.00%
n_estimators=50, learning_rate=0.5 -> Accuracy: 96.67%
n_estimators=50, learning_rate=1.0 -> Accuracy: 93.33%
n_estimators=100, learning_rate=0.1 -> Accuracy: 100.00%
n_estimators=100, learning_rate=0.5 -> Accuracy: 100.00%
n_estimators=100, learning_rate=1.0 -> Accuracy: 93.33%
n_estimators=150, learning_rate=0.1 -> Accuracy: 100.00%
n_estimators=150, learning_rate=0.5 -> Accuracy: 96.67%
n_estimators=150, learning_rate=1.0 -> Accuracy: 93.33%

Results with LogisticRegression as base estimator:
n_estimators=50, learning_rate=0.1 -> Accuracy: 100.00%
n_estimators=50, learning_rate=0.5 -> Accuracy: 100.00%
n_estimators=50, learning_rate=1.0 -> Accuracy: 93.33%
n_estimators=100, learning_rate=0.1 -> Accuracy: 100.00%
n_estimators=100, learning_rate=0.5 -> Accuracy: 100.00%
n_estimators=100, learning_rate=1.0 -> Accuracy: 93.33%
n_estimators=150, learning_rate=0.1 -> Accuracy: 10