In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.base import clone
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from collections import Counter

In [2]:
# Load dataset
X, y = load_iris(return_X_y=True)

In [3]:
# Split into pool training, validation, and test sets
X_train_pool, X_temp, y_train_pool, y_temp = train_test_split(
    X, y, test_size=0.4, random_state=42, stratify=y
)
X_validation, X_test, y_validation, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp
)

In [4]:
# Standardize data
scaler = StandardScaler()
X_train_pool = scaler.fit_transform(X_train_pool)
X_validation = scaler.transform(X_validation)
X_test = scaler.transform(X_test)

In [5]:
# Create a pool of diverse classifiers
pool = [
    DecisionTreeClassifier(random_state=42),
    KNeighborsClassifier(n_neighbors=5),
    LogisticRegression(max_iter=1000, random_state=42)
]

In [6]:
# Train all classifiers on the pool training set
trained_pool = [clf.fit(X_train_pool, y_train_pool) for clf in pool]

# Fit a KNN on the validation set to define the region of competence
k = 7
region_selector = KNeighborsClassifier(n_neighbors=k).fit(X_validation, y_validation)

dcs_preds = []
des_preds = []

In [7]:
# For each test sample, perform DCS and DES
for x in X_test:
    # Find region of competence
    neighbors = region_selector.kneighbors([x], return_distance=False)[0]
    X_region, y_region = X_validation[neighbors], y_validation[neighbors]
    
    # Compute local competence for each classifier
    competences = []
    for clf in trained_pool:
        y_reg_pred = clf.predict(X_region)
        competences.append(np.mean(y_reg_pred == y_region))
    
    # DCS: pick the classifier with highest local accuracy
    best_clf = trained_pool[np.argmax(competences)]
    dcs_preds.append(best_clf.predict([x])[0])
    
    # DES: select classifiers with competence ≥ average competence
    threshold = np.mean(competences)
    selected = [trained_pool[i] for i, c in enumerate(competences) if c >= threshold]
    if not selected:
        selected = [best_clf]
    # Majority vote among selected
    votes = [clf.predict([x])[0] for clf in selected]
    des_preds.append(Counter(votes).most_common(1)[0][0])

In [8]:
# Static ensemble (majority vote of all classifiers)
static_preds = []
for x in X_test:
    votes = [clf.predict([x])[0] for clf in trained_pool]
    static_preds.append(Counter(votes).most_common(1)[0][0])

# Evaluate
print("Static Ensemble Accuracy: {:.2f}%".format(accuracy_score(y_test, static_preds)*100))
print("Dynamic Classifier Selection Accuracy: {:.2f}%".format(accuracy_score(y_test, dcs_preds)*100))
print("Dynamic Ensemble Selection Accuracy: {:.2f}%".format(accuracy_score(y_test, des_preds)*100))

Static Ensemble Accuracy: 93.33%
Dynamic Classifier Selection Accuracy: 90.00%
Dynamic Ensemble Selection Accuracy: 90.00%
