In [12]:
import pandas as pd
from sklearn.model_selection import KFold, cross_val_score
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
import xgboost as xgb

# Load datasets
datasets = {}
for i in range(1, 8):
    datasets[f'Dataset_{i}'] = pd.read_csv(f'E:/R(5)/Datasets/Dataset_{i}.csv')

# Classifiers
classifiers = {
    'RF': RandomForestClassifier(),
    'XgB': xgb.XGBClassifier(),
    'GB': GradientBoostingClassifier(),
    'AdaB': AdaBoostClassifier(),
    'NB': GaussianNB(),
    'SVM': SVC(),
    'DT': DecisionTreeClassifier(),
    'LR': LogisticRegression(),
    'KNN': KNeighborsClassifier()
}

# Stratified K-Fold
skf = KFold(n_splits=10)

# DataFrame for storing error rates
error_rates = pd.DataFrame(columns=classifiers.keys())

# Calculate error rates
for name, dataset in datasets.items():
    X = dataset.drop('label', axis=1)
    y = dataset['label']
    error_rates_row = {}
    for clf_name, clf in classifiers.items():
        scores = cross_val_score(clf, X, y, cv=skf, scoring='accuracy')
        error_rate = 1 - scores.mean()
        error_rates_row[clf_name] = error_rate
    error_rates = error_rates.append(error_rates_row, ignore_index=True)

error_rates.index = datasets.keys()

Traceback (most recent call last):
  File "C:\ProgramData\anaconda3\Lib\site-packages\sklearn\metrics\_scorer.py", line 136, in __call__
    score = scorer._score(
            ^^^^^^^^^^^^^^
  File "C:\ProgramData\anaconda3\Lib\site-packages\sklearn\metrics\_scorer.py", line 353, in _score
    y_pred = method_caller(estimator, "predict", X)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\ProgramData\anaconda3\Lib\site-packages\sklearn\metrics\_scorer.py", line 86, in _cached_call
    result, _ = _get_response_values(
                ^^^^^^^^^^^^^^^^^^^^^
  File "C:\ProgramData\anaconda3\Lib\site-packages\sklearn\utils\_response.py", line 85, in _get_response_values
    y_pred = prediction_method(X)
             ^^^^^^^^^^^^^^^^^^^^
  File "C:\ProgramData\anaconda3\Lib\site-packages\sklearn\neighbors\_classification.py", line 246, in predict
    if self._fit_method == "brute" and ArgKminClassMode.is_usable_for(
                                       ^^^^^^^^^^^^^^^^^^^^^^

In [13]:
pd.DataFrame(error_rates)

Unnamed: 0,RF,XgB,GB,AdaB,NB,SVM,DT,LR,KNN
Dataset_1,0.015,0.0225,0.01,0.01,0.035,0.0225,0.035,0.03,
Dataset_2,0.0125,0.0125,0.0075,0.0075,0.035,0.02,0.02,0.03,
Dataset_3,0.0725,0.0775,0.0725,0.0725,0.0725,0.0725,0.0725,0.075,0.375
Dataset_4,0.0125,0.015,0.01,0.015,0.0575,0.0275,0.0325,0.03,0.0275
Dataset_5,0.015,0.0175,0.01,0.0075,0.045,0.0225,0.02,0.0225,0.0225
Dataset_6,0.18,0.18,0.18,0.18,0.18,0.18,0.18,0.18,0.375
Dataset_7,0.0325,0.04,0.0375,0.0425,0.0375,0.06,0.0475,0.0775,0.0425


In [14]:
# Calculate mean error rate for each classifier and select the best seven
mean_error_rates = error_rates.mean()
best_seven_classifiers = mean_error_rates.nsmallest(7).index.tolist()

print("Best seven classifiers:", best_seven_classifiers)

Best seven classifiers: ['GB', 'AdaB', 'RF', 'XgB', 'SVM', 'DT', 'LR']
