In [6]:
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import RFE
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

n_features = 8

# choose a ranker that exposes coef_ or feature_importances_
ranker_for_linear = LogisticRegression(max_iter=5000, solver="liblinear")
ranker_for_tree = RandomForestClassifier(n_estimators=50, random_state=0)

model_defs = {
    "Logistic": (LogisticRegression(max_iter=5000, solver="liblinear"), True, ranker_for_linear),
    "SVMl": (SVC(kernel="linear", random_state=0), True, ranker_for_linear),   # linear SVC can use coef_
    "SVMnl": (SVC(kernel="rbf", random_state=0), True, ranker_for_linear),     # use logistic ranker for RFE
    "KNN": (KNeighborsClassifier(n_neighbors=5), True, ranker_for_linear),
    "Navie": (GaussianNB(), True, ranker_for_linear),
    "Decision": (DecisionTreeClassifier(criterion="entropy", random_state=0), False, ranker_for_tree),
    "Random": (RandomForestClassifier(n_estimators=100, random_state=0), False, ranker_for_tree),
}

results = []
for name, (final_clf, std_before, ranker) in model_defs.items():
    steps = []
    if std_before:
        steps.append(("std", StandardScaler()))
    steps.append(("rfe", RFE(estimator=ranker, n_features_to_select=n_features)))
    steps.append(("clf", final_clf))
    pipe = Pipeline(steps)

    pipe.fit(X_train, y_train)   # RFE uses 'ranker' to rank; final_clf is re-fit on selected features

    selected_mask = pipe.named_steps["rfe"].get_support()
    features = X.columns[selected_mask].tolist()
    y_pred = pipe.predict(X_test)
    acc = accuracy_score(y_test, y_pred)

    print(f"\nModel: {name} | Acc: {acc:.4f} | Selected: {features}")
    results.append((name, acc))

# summary
print("\nSummary:")
print(pd.DataFrame(results, columns=["model","test_acc"]).sort_values("test_acc", ascending=False))



Model: Logistic | Acc: 0.9600 | Selected: ['al', 'hrmo', 'pcv', 'rc', 'sg_b', 'sg_c', 'sg_d', 'dm_yes']

Model: SVMl | Acc: 0.9800 | Selected: ['al', 'hrmo', 'pcv', 'rc', 'sg_b', 'sg_c', 'sg_d', 'dm_yes']

Model: SVMnl | Acc: 0.9700 | Selected: ['al', 'hrmo', 'pcv', 'rc', 'sg_b', 'sg_c', 'sg_d', 'dm_yes']

Model: KNN | Acc: 0.9600 | Selected: ['al', 'hrmo', 'pcv', 'rc', 'sg_b', 'sg_c', 'sg_d', 'dm_yes']

Model: Navie | Acc: 0.9700 | Selected: ['al', 'hrmo', 'pcv', 'rc', 'sg_b', 'sg_c', 'sg_d', 'dm_yes']

Model: Decision | Acc: 0.9300 | Selected: ['al', 'bgr', 'sc', 'hrmo', 'pcv', 'rc', 'sg_d', 'htn_yes']

Model: Random | Acc: 0.9800 | Selected: ['al', 'bgr', 'sc', 'hrmo', 'pcv', 'rc', 'sg_d', 'htn_yes']

Summary:
      model  test_acc
1      SVMl      0.98
6    Random      0.98
2     SVMnl      0.97
4     Navie      0.97
0  Logistic      0.96
3       KNN      0.96
5  Decision      0.93
