In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Classifiers
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

# ---------- Load & prepare data ----------
df = pd.read_csv("prep.csv")

# One-hot encode; drop_first to avoid dummy trap
df = pd.get_dummies(df, drop_first=True)

# Target column (adjust if your positive class column differs)
target_col = "classification_yes"
X = df.drop(columns=[target_col])
y = df[target_col]

# Train/test split with stratification
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=0, stratify=y
)

# ---------- Helper to build a pipeline ----------
# chi2 needs non-negative inputs -> MinMax BEFORE SelectKBest
# Some models like SVM/KNN/LogReg benefit from standardization after feature selection.
def make_pipeline(clf, k=6, standardize_after=True):
    steps = [
        ("minmax", MinMaxScaler()),
        ("kbest", SelectKBest(score_func=chi2, k=k)),
    ]
    if standardize_after:
        steps.append(("std", StandardScaler()))
    steps.append(("clf", clf))
    return Pipeline(steps)

# Pipelines per model
pipelines = {
    "Logistic": make_pipeline(LogisticRegression(max_iter=5000, solver="liblinear")),
    "SVM (Linear)": make_pipeline(SVC(kernel="linear", probability=False, random_state=0)),
    "SVM (RBF)": make_pipeline(SVC(kernel="rbf", probability=False, random_state=0)),
    "KNN": make_pipeline(KNeighborsClassifier(n_neighbors=5)),
    # GaussianNB assumes features ~ Gaussian; standardization ok
    "Naive Bayes": make_pipeline(GaussianNB()),
    # Tree/Forest don’t need standardization; set standardize_after=False
    "Decision Tree": make_pipeline(
        DecisionTreeClassifier(criterion="entropy", random_state=0),
        standardize_after=False
    ),
    "Random Forest": make_pipeline(
        RandomForestClassifier(n_estimators=200, criterion="entropy", random_state=0),
        standardize_after=False
    ),
}

# ---------- Train, evaluate, collect metrics ----------
rows = []
reports = {}
cms = {}

for name, pipe in pipelines.items():
    pipe.fit(X_train, y_train)
    y_pred = pipe.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    rpt = classification_report(y_test, y_pred, zero_division=0)
    cm = confusion_matrix(y_test, y_pred)

    rows.append({"Model": name, "Accuracy": acc})
    reports[name] = rpt
    cms[name] = cm

results = pd.DataFrame(rows).sort_values("Accuracy", ascending=False).reset_index(drop=True)

print("\n=== Accuracy by Model (k=6, SelectKBest chi2) ===")
print(results)

# If you want to see one model’s detailed report & confusion matrix:
best_model = results.iloc[0]["Model"]
print(f"\n=== Classification Report: {best_model} ===\n{reports[best_model]}")
print(f"=== Confusion Matrix: {best_model} ===\n{cms[best_model]}")

# ---------- Optional: check which k works best ----------
# Try multiple k values and record the best accuracy per model
def benchmark_k(k_values=(3,5,10,15,20)):
    out = []
    for k in k_values:
        for name, _ in pipelines.items():
            # Rebuild each pipeline with this k
            standardize_after = name not in ["Decision Tree", "Random Forest"]
            clf = {
                "Logistic": LogisticRegression(max_iter=5000, solver="liblinear"),
                "SVM (Linear)": SVC(kernel="linear", random_state=0),
                "SVM (RBF)": SVC(kernel="rbf", random_state=0),
                "KNN": KNeighborsClassifier(n_neighbors=5),
                "Naive Bayes": GaussianNB(),
                "Decision Tree": DecisionTreeClassifier(criterion="entropy", random_state=0),
                "Random Forest": RandomForestClassifier(n_estimators=200, criterion="entropy", random_state=0),
            }[name]
            pipe = make_pipeline(clf, k=k, standardize_after=standardize_after)
            pipe.fit(X_train, y_train)
            acc = accuracy_score(y_test, pipe.predict(X_test))
            out.append({"k": k, "Model": name, "Accuracy": acc})
    return pd.DataFrame(out).sort_values(["Model", "Accuracy"], ascending=[True, False])

# Example (uncomment to run):
# k_scan = benchmark_k()
# print("\n=== Accuracy vs k (top rows) ===")
# print(k_scan.groupby("Model").head(3))



=== Accuracy by Model (k=6, SelectKBest chi2) ===
           Model  Accuracy
0   SVM (Linear)      0.93
1      SVM (RBF)      0.93
2            KNN      0.93
3  Decision Tree      0.93
4    Naive Bayes      0.93
5  Random Forest      0.93
6       Logistic      0.92

=== Classification Report: SVM (Linear) ===
              precision    recall  f1-score   support

       False       0.86      0.97      0.91        38
        True       0.98      0.90      0.94        62

    accuracy                           0.93       100
   macro avg       0.92      0.94      0.93       100
weighted avg       0.94      0.93      0.93       100

=== Confusion Matrix: SVM (Linear) ===
[[37  1]
 [ 6 56]]


In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

# --- load & prep ---
df = pd.read_csv("prep.csv")
df = pd.get_dummies(df, drop_first=True)
y = df["classification_yes"]
X = df.drop(columns=["classification_yes"])

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=0, stratify=y
)

k = 5  # match your run

def mkpipe(clf, std_after=True, n_estimators=None):
    steps = [("mm", MinMaxScaler()), ("kbest", SelectKBest(chi2, k=k))]
    if std_after:
        steps.append(("std", StandardScaler()))
    steps.append(("clf", clf))
    return Pipeline(steps)

pipes = {
    "Logistic": mkpipe(LogisticRegression(max_iter=5000, solver="liblinear")),
    "SVMl": mkpipe(SVC(kernel="linear", random_state=0)),
    "SVMnl": mkpipe(SVC(kernel="rbf", random_state=0)),
    "KNN": mkpipe(KNeighborsClassifier(n_neighbors=5)),
    "Navie": mkpipe(GaussianNB()),
    "Decision": mkpipe(DecisionTreeClassifier(criterion="entropy", random_state=0), std_after=False),
    "Random": mkpipe(RandomForestClassifier(n_estimators=10, criterion="entropy", random_state=0), std_after=False),
}

accs = {}
for name, pipe in pipes.items():
    pipe.fit(X_train, y_train)
    accs[name] = accuracy_score(y_test, pipe.predict(X_test))

# format like your one-row table
result = pd.DataFrame([accs], index=["ChiSquare"])[["Logistic","SVMl","SVMnl","KNN","Navie","Decision","Random"]]
print(result.round(2))


           Logistic  SVMl  SVMnl   KNN  Navie  Decision  Random
ChiSquare      0.93  0.94   0.93  0.92   0.94      0.94    0.94


In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

# --- load & prep ---
df = pd.read_csv("prep.csv")
df = pd.get_dummies(df, drop_first=True)
y = df["classification_yes"]
X = df.drop(columns=["classification_yes"])

#stratify=y ensures class balance is preserved in both sets.
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=0, stratify=y
)

k = 7 # match your run

def mkpipe(clf, std_after=True, n_estimators=None):
    steps = [("mm", MinMaxScaler()), ("kbest", SelectKBest(chi2, k=k))]
    if std_after:
        steps.append(("std", StandardScaler()))
    steps.append(("clf", clf))
    return Pipeline(steps)

pipes = {
    "Logistic": mkpipe(LogisticRegression(max_iter=5000, solver="liblinear")),
    "SVMl": mkpipe(SVC(kernel="linear", random_state=0)),
    "SVMnl": mkpipe(SVC(kernel="rbf", random_state=0)),
    "KNN": mkpipe(KNeighborsClassifier(n_neighbors=5)),
    "Navie": mkpipe(GaussianNB()),
    "Decision": mkpipe(DecisionTreeClassifier(criterion="entropy", random_state=0), std_after=False),
    "Random": mkpipe(RandomForestClassifier(n_estimators=10, criterion="entropy", random_state=0), std_after=False),
}

accs = {}
for name, pipe in pipes.items():
    pipe.fit(X_train, y_train)
    accs[name] = accuracy_score(y_test, pipe.predict(X_test))

# format like your one-row table
result = pd.DataFrame([accs], index=["ChiSquare"])[["Logistic","SVMl","SVMnl","KNN","Navie","Decision","Random"]]
print(result.round(2))


           Logistic  SVMl  SVMnl   KNN  Navie  Decision  Random
ChiSquare      0.95  0.96   0.96  0.96   0.96      0.96    0.96


In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

# --- load & prep ---
df = pd.read_csv("prep.csv")
df = pd.get_dummies(df, drop_first=True)
y = df["classification_yes"]
X = df.drop(columns=["classification_yes"])

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=0, stratify=y
)

k = 8 # match your run

def mkpipe(clf, std_after=True, n_estimators=None):
    steps = [("mm", MinMaxScaler()), ("kbest", SelectKBest(chi2, k=k))]
    if std_after:
        steps.append(("std", StandardScaler()))
    steps.append(("clf", clf))
    return Pipeline(steps)

pipes = {
    "Logistic": mkpipe(LogisticRegression(max_iter=5000, solver="liblinear")),
    "SVMl": mkpipe(SVC(kernel="linear", random_state=0)),
    "SVMnl": mkpipe(SVC(kernel="rbf", random_state=0)),
    "KNN": mkpipe(KNeighborsClassifier(n_neighbors=5)),
    "Navie": mkpipe(GaussianNB()),
    "Decision": mkpipe(DecisionTreeClassifier(criterion="entropy", random_state=0), std_after=False),
    "Random": mkpipe(RandomForestClassifier(n_estimators=10, criterion="entropy", random_state=0), std_after=False),
}

accs = {}
for name, pipe in pipes.items():
    pipe.fit(X_train, y_train)
    accs[name] = accuracy_score(y_test, pipe.predict(X_test))

# format like your one-row table
result = pd.DataFrame([accs], index=["ChiSquare"])[["Logistic","SVMl","SVMnl","KNN","Navie","Decision","Random"]]
print(result.round(2))


           Logistic  SVMl  SVMnl   KNN  Navie  Decision  Random
ChiSquare      0.96  0.96   0.96  0.96   0.96      0.96    0.96


In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

# --- load & prep ---
df = pd.read_csv("prep.csv")
df = pd.get_dummies(df, drop_first=True)
y = df["classification_yes"]
X = df.drop(columns=["classification_yes"])

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=0, stratify=y
)

k = 8  # number of top features to select

def mkpipe(clf, std_after=True):
    steps = [
        ("mm", MinMaxScaler()),
        ("kbest", SelectKBest(chi2, k=k))
    ]
    if std_after:
        steps.append(("std", StandardScaler()))
    steps.append(("clf", clf))
    return Pipeline(steps)

# --- example: Logistic Regression ---
pipe = mkpipe(LogisticRegression(max_iter=5000, solver="liblinear"))
pipe.fit(X_train, y_train)

# Get selected features
selector = pipe.named_steps["kbest"]
selected_mask = selector.get_support()
selected_features = X.columns[selected_mask]

print("Top features selected by Chi²:")
print(selected_features.tolist())

# --- evaluate models ---
pipes = {
    "Logistic": mkpipe(LogisticRegression(max_iter=5000, solver="liblinear")),
    "SVMl": mkpipe(SVC(kernel="linear", random_state=0)),
    "SVMnl": mkpipe(SVC(kernel="rbf", random_state=0)),
    "KNN": mkpipe(KNeighborsClassifier(n_neighbors=5)),
    "Naive": mkpipe(GaussianNB()),
    "Decision": mkpipe(DecisionTreeClassifier(criterion="entropy", random_state=0), std_after=False),
    "Random": mkpipe(RandomForestClassifier(n_estimators=10, criterion="entropy", random_state=0), std_after=False),
}

accs = {}
for name, pipe in pipes.items():
    pipe.fit(X_train, y_train)
    accs[name] = accuracy_score(y_test, pipe.predict(X_test))

result = pd.DataFrame([accs], index=["ChiSquare"])[["Logistic","SVMl","SVMnl","KNN","Naive","Decision","Random"]]
print("\nModel Accuracies:")
print(result.round(2))


Top features selected by Chi²:
['al', 'sg_b', 'sg_c', 'sg_d', 'htn_yes', 'dm_yes', 'pe_yes', 'ane_yes']

Model Accuracies:
           Logistic  SVMl  SVMnl   KNN  Naive  Decision  Random
ChiSquare      0.96  0.96   0.96  0.96   0.96      0.96    0.96


In [10]:
df

Unnamed: 0,age,bp,al,su,bgr,bu,sc,sod,pot,hrmo,...,pc_normal,pcc_present,ba_present,htn_yes,dm_yes,cad_yes,appet_yes,pe_yes,ane_yes,classification_yes
0,2.000000,76.459948,3.0,0.0,148.112676,57.482105,3.077356,137.528754,4.627244,12.518156,...,False,False,False,False,False,False,True,True,False,True
1,3.000000,76.459948,2.0,0.0,148.112676,22.000000,0.700000,137.528754,4.627244,10.700000,...,True,False,False,False,False,False,True,False,False,True
2,4.000000,76.459948,1.0,0.0,99.000000,23.000000,0.600000,138.000000,4.400000,12.000000,...,True,False,False,False,False,False,True,False,False,True
3,5.000000,76.459948,1.0,0.0,148.112676,16.000000,0.700000,138.000000,3.200000,8.100000,...,True,False,False,False,False,False,True,False,True,True
4,5.000000,50.000000,0.0,0.0,148.112676,25.000000,0.600000,137.528754,4.627244,11.800000,...,True,False,False,False,False,False,True,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
394,51.492308,70.000000,0.0,0.0,219.000000,36.000000,1.300000,139.000000,3.700000,12.500000,...,True,False,False,False,False,False,True,False,False,True
395,51.492308,70.000000,0.0,2.0,220.000000,68.000000,2.800000,137.528754,4.627244,8.700000,...,True,False,False,True,True,False,True,False,True,True
396,51.492308,70.000000,3.0,0.0,110.000000,115.000000,6.000000,134.000000,2.700000,9.100000,...,True,False,False,True,True,False,False,False,False,True
397,51.492308,90.000000,0.0,0.0,207.000000,80.000000,6.800000,142.000000,5.500000,8.500000,...,True,False,False,True,True,False,True,False,True,True


In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

# --- load & prep ---
df = pd.read_csv("prep.csv")
df = pd.get_dummies(df, drop_first=True)
y = df["classification_yes"]
X = df.drop(columns=["classification_yes"])

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=0, stratify=y
)



# def mkpipe(clf, std_after=True, n_estimators=None):
#     steps = [("mm", MinMaxScaler()), ("kbest", SelectKBest(chi2, k=k))]
#     if std_after:
#         steps.append(("std", StandardScaler()))
#     steps.append(("clf", clf))
#     return Pipeline(steps)

pipes = {
    "Logistic": (LogisticRegression(max_iter=5000, solver="liblinear"),True),
    "SVMl": (SVC(kernel="linear", random_state=0),True),
    "SVMnl": (SVC(kernel="rbf", random_state=0),True),
    "KNN": (KNeighborsClassifier(n_neighbors=5),True),
    "Navie": (GaussianNB(), True),
    "Decision": (DecisionTreeClassifier(criterion="entropy", random_state=0), False),
    "Random": (RandomForestClassifier(n_estimators=10, criterion="entropy", random_state=0), False),
}
max_k = min(30, X_train.shape[1])
k_list = list(range(1, max_k + 1))
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
results = []
for name,(clf,std_after) in pipes.items():
    best_k = None
    best_score = -np.inf
    for k in k_list:
        steps = [("mm", MinMaxScaler()), ("kbest", SelectKBest(chi2, k=k))]
        if std_after:
            steps.append(("std", StandardScaler()))
        steps.append(("clf", clf))
        pipe = Pipeline(steps)

        scores = cross_val_score(pipe, X_train, y_train, cv=cv, scoring="accuracy", n_jobs=-1)
        mean_score = scores.mean()

        results.append({"model": name, "k": k, "cv_mean_acc": mean_score})

        if mean_score > best_score:
            best_score = mean_score
            best_k = k

    print(f"{name:10s} -> best_k={best_k}, cv_acc={best_score:.4f}")

# convert to DataFrame for analysis
res_df = pd.DataFrame(results)
# Show best k per model
best_per_model = res_df.loc[res_df.groupby("model")["cv_mean_acc"].idxmax()].sort_values("model")
print("\nBest k per model (by CV mean accuracy):")
print(best_per_model[["model","k","cv_mean_acc"]].reset_index(drop=True))

Logistic   -> best_k=18, cv_acc=0.9900
SVMl       -> best_k=19, cv_acc=0.9933
SVMnl      -> best_k=17, cv_acc=0.9933
KNN        -> best_k=17, cv_acc=0.9866
Navie      -> best_k=15, cv_acc=0.9900
Decision   -> best_k=7, cv_acc=0.9799
Random     -> best_k=20, cv_acc=0.9899

Best k per model (by CV mean accuracy):
      model   k  cv_mean_acc
0  Decision   7     0.979944
1       KNN  17     0.986610
2  Logistic  18     0.990000
3     Navie  15     0.990000
4    Random  20     0.989944
5      SVMl  19     0.993277
6     SVMnl  17     0.993333


In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

# --- load & prep ---
df = pd.read_csv("prep.csv")
df = pd.get_dummies(df, drop_first=True)
y = df["classification_yes"]
X = df.drop(columns=["classification_yes"])

# Split once (we used this earlier for CV)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=0, stratify=y
)

# Best k per your CV results:
best_k_per_model = {
    "Logistic": 18,
    "SVMl": 19,
    "SVMnl": 17,
    "KNN": 17,
    "Navie": 15,
    "Decision": 7,
    "Random": 20
}

# model definitions and whether to standardize after SelectKBest
model_defs = {
    "Logistic": (LogisticRegression(max_iter=5000, solver="liblinear"), True),
    "SVMl": (SVC(kernel="linear", probability=False, random_state=0), True),
    "SVMnl": (SVC(kernel="rbf", probability=False, random_state=0), True),
    "KNN": (KNeighborsClassifier(n_neighbors=5), True),
    "Navie": (GaussianNB(), True),
    "Decision": (DecisionTreeClassifier(criterion="entropy", random_state=0), False),
    "Random": (RandomForestClassifier(n_estimators=100, criterion="entropy", random_state=0), False),
}

def make_pipeline_with_k(clf, k, std_after=True):
    steps = [
        ("mm", MinMaxScaler()),                # ensure non-negative for chi2
        ("kbest", SelectKBest(score_func=chi2, k=k))
    ]
    if std_after:
        steps.append(("std", StandardScaler()))
    steps.append(("clf", clf))
    return Pipeline(steps)

# Train/eval each model with its best k and print selected features + metrics
results = []
for name, (clf, std_after) in model_defs.items():
    k = best_k_per_model.get(name)
    pipe = make_pipeline_with_k(clf, k=k, std_after=std_after)

    # Fit pipeline on training set (SelectKBest is fit on train inside pipeline)
    pipe.fit(X_train, y_train)

    # Evaluate on test set
    y_pred = pipe.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, zero_division=0)
    cm = confusion_matrix(y_test, y_pred)

    # Extract selected feature names (based on columns in X_train)
    selector = pipe.named_steps["kbest"]
    selected_mask = selector.get_support()
    selected_features = X.columns[selected_mask].tolist()

    print("\n" + "="*60)
    print(f"Model: {name} (k={k})")
    print(f"Test Accuracy: {acc:.4f}")
    print("Selected features:", selected_features)
    print("Confusion Matrix:\n", cm)
    print("Classification Report:\n", report)

    results.append({"model": name, "k": k, "test_acc": acc})

# summary table
print("\nSummary of test accuracies:")
print(pd.DataFrame(results).sort_values("test_acc", ascending=False).reset_index(drop=True))



Model: Logistic (k=18)
Test Accuracy: 0.9700
Selected features: ['al', 'su', 'sc', 'hrmo', 'pcv', 'rc', 'sg_b', 'sg_c', 'sg_d', 'pc_normal', 'pcc_present', 'ba_present', 'htn_yes', 'dm_yes', 'cad_yes', 'appet_yes', 'pe_yes', 'ane_yes']
Confusion Matrix:
 [[37  1]
 [ 2 60]]
Classification Report:
               precision    recall  f1-score   support

       False       0.95      0.97      0.96        38
        True       0.98      0.97      0.98        62

    accuracy                           0.97       100
   macro avg       0.97      0.97      0.97       100
weighted avg       0.97      0.97      0.97       100


Model: SVMl (k=19)
Test Accuracy: 0.9800
Selected features: ['al', 'su', 'bgr', 'sc', 'hrmo', 'pcv', 'rc', 'sg_b', 'sg_c', 'sg_d', 'pc_normal', 'pcc_present', 'ba_present', 'htn_yes', 'dm_yes', 'cad_yes', 'appet_yes', 'pe_yes', 'ane_yes']
Confusion Matrix:
 [[37  1]
 [ 1 61]]
Classification Report:
               precision    recall  f1-score   support

       False     