In [1]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification

In [3]:
# Create custom dataset
X, y = make_classification(n_samples=800, n_features=10, n_informative=5, n_redundant=0, random_state=42)

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Forward Feature Selection

In [4]:
# Implement forward feature selection
selected_features = []
for i in range(X_train.shape[1]):
    best_accuracy = 0
    best_feature = None
    for j in range(X_train.shape[1]):
        if j not in selected_features:
            features = selected_features + [j]
            model = LogisticRegression()
            model.fit(X_train[:, features], y_train)
            accuracy = model.score(X_test[:, features], y_test)
            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_feature = j
    selected_features.append(best_feature)
    print("Selected Features (Forward):", selected_features, "Score:", accuracy)

Selected Features (Forward): [9] Score: 0.68125
Selected Features (Forward): [9, 0] Score: 0.7
Selected Features (Forward): [9, 0, 5] Score: 0.78125
Selected Features (Forward): [9, 0, 5, 6] Score: 0.84375
Selected Features (Forward): [9, 0, 5, 6, 2] Score: 0.85
Selected Features (Forward): [9, 0, 5, 6, 2, 1] Score: 0.8375
Selected Features (Forward): [9, 0, 5, 6, 2, 1, 7] Score: 0.8375
Selected Features (Forward): [9, 0, 5, 6, 2, 1, 7, 8] Score: 0.8625
Selected Features (Forward): [9, 0, 5, 6, 2, 1, 7, 8, 4] Score: 0.8625
Selected Features (Forward): [9, 0, 5, 6, 2, 1, 7, 8, 4, 3] Score: 0.84375


## Backward Feature Selection

In [5]:
# Implement backward feature elimination
selected_features = list(range(X_train.shape[1]))
for i in range(X_train.shape[1] - 1):
    worst_accuracy = 1
    worst_feature = None
    for j in selected_features:
        features = selected_features.copy()
        features.remove(j)
        model = LogisticRegression()
        model.fit(X_train[:, features], y_train)
        accuracy = model.score(X_test[:, features], y_test)
        if accuracy < worst_accuracy:
            worst_accuracy = accuracy
            worst_feature = j
    selected_features.remove(worst_feature)
    print("Selected Features (Backward):", selected_features, "Score:", accuracy)

Selected Features (Backward): [0, 1, 2, 3, 4, 5, 6, 7, 8] Score: 0.7125
Selected Features (Backward): [0, 1, 2, 3, 4, 5, 6, 7] Score: 0.65
Selected Features (Backward): [0, 1, 2, 3, 4, 6, 7] Score: 0.6375
Selected Features (Backward): [1, 2, 3, 4, 6, 7] Score: 0.59375
Selected Features (Backward): [1, 2, 3, 4, 6] Score: 0.45625
Selected Features (Backward): [2, 3, 4, 6] Score: 0.4625
Selected Features (Backward): [2, 3, 4] Score: 0.4625
Selected Features (Backward): [3, 4] Score: 0.5
Selected Features (Backward): [4] Score: 0.50625
