In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import cross_validate
from sklearn.model_selection import LeaveOneOut
import pandas as pd
import numpy as np
from numpy import mean, std

df = pd.read_csv("https://raw.githubusercontent.com/waleed-alfaifi/ml-project/main/Datasets/BankNote_Authentication.csv")
X, y = df.values[:, :-1], df.values[:, -1]

loo = LeaveOneOut()

metrics = ["accuracy", "balanced_accuracy","recall", "precision", "f1"]

# Use Logistic Regression for classification
logistic_model = LogisticRegression(random_state=0)
logistic_scores = cross_validate(logistic_model, X, y, scoring=metrics, cv=loo, n_jobs=-1)

print("Mean scores and standard deviation for Logistic Regression: ")
print(' 1. Accuracy: %.3f (%.3f)' % (mean(logistic_scores["test_accuracy"]), std(logistic_scores["test_accuracy"])))
print(' 2. Balanced Accuracy: %.3f (%.3f)' % (mean(logistic_scores["test_balanced_accuracy"]), std(logistic_scores["test_balanced_accuracy"])))
print(' 3. Recall: %.3f (%.3f)' % (mean(logistic_scores["test_recall"]), std(logistic_scores["test_recall"])))
print(' 4. Precision: %.3f (%.3f)' % (mean(logistic_scores["test_precision"]), std(logistic_scores["test_precision"])))
print(' 5. F1: %.3f (%.3f)' % (mean(logistic_scores["test_f1"]), std(logistic_scores["test_f1"])))

# Use Naive Bayes for classification
NB_model = GaussianNB()
NB_scores = cross_validate(NB_model, X, y, scoring=metrics, cv=loo, n_jobs=-1)

print("Mean scores and standard deviation for Naive Bayes: ")
print(' 1. Accuracy: %.3f (%.3f)' % (mean(NB_scores["test_accuracy"]), std(NB_scores["test_accuracy"])))
print(' 2. Balanced Accuracy: %.3f (%.3f)' % (mean(NB_scores["test_balanced_accuracy"]), std(NB_scores["test_balanced_accuracy"])))
print(' 3. Recall: %.3f (%.3f)' % (mean(NB_scores["test_recall"]), std(NB_scores["test_recall"])))
print(' 4. Precision: %.3f (%.3f)' % (mean(NB_scores["test_precision"]), std(NB_scores["test_precision"])))
print(' 5. F1: %.3f (%.3f)' % (mean(NB_scores["test_f1"]), std(NB_scores["test_f1"])))

# Use SVM for classification
SVM_model = SVC()
SVM_scores = cross_validate(SVM_model, X, y, scoring=metrics, cv=loo, n_jobs=-1)

print("Mean scores and standard deviation for SVM: ")
print(' 1. Accuracy: %.3f (%.3f)' % (mean(SVM_scores["test_accuracy"]), std(SVM_scores["test_accuracy"])))
print(' 2. Balanced Accuracy: %.3f (%.3f)' % (mean(SVM_scores["test_balanced_accuracy"]), std(SVM_scores["test_balanced_accuracy"])))
print(' 3. Recall: %.3f (%.3f)' % (mean(SVM_scores["test_recall"]), std(SVM_scores["test_recall"])))
print(' 4. Precision: %.3f (%.3f)' % (mean(SVM_scores["test_precision"]), std(SVM_scores["test_precision"])))
print(' 5. F1: %.3f (%.3f)' % (mean(SVM_scores["test_f1"]), std(SVM_scores["test_f1"])))

# Use KNN for classification
KNN_model = KNeighborsClassifier(n_neighbors=3)
KNN_scores = cross_validate(KNN_model, X, y, scoring=metrics, cv=loo, n_jobs=-1)

print("Mean scores and standard deviation for KNN: ")
print(' 1. Accuracy: %.3f (%.3f)' % (mean(KNN_scores["test_accuracy"]), std(KNN_scores["test_accuracy"])))
print(' 2. Balanced Accuracy: %.3f (%.3f)' % (mean(KNN_scores["test_balanced_accuracy"]), std(KNN_scores["test_balanced_accuracy"])))
print(' 3. Recall: %.3f (%.3f)' % (mean(KNN_scores["test_recall"]), std(KNN_scores["test_recall"])))
print(' 4. Precision: %.3f (%.3f)' % (mean(KNN_scores["test_precision"]), std(KNN_scores["test_precision"])))
print(' 5. F1: %.3f (%.3f)' % (mean(KNN_scores["test_f1"]), std(KNN_scores["test_f1"])))

# Use Decision Tree for classification
DT_model = DecisionTreeClassifier()
DT_scores = cross_validate(DT_model, X, y, scoring=metrics, cv=loo, n_jobs=-1)

print("Mean scores and standard deviation for Decision Tree: ")
print(' 1. Accuracy: %.3f (%.3f)' % (mean(DT_scores["test_accuracy"]), std(DT_scores["test_accuracy"])))
print(' 2. Balanced Accuracy: %.3f (%.3f)' % (mean(DT_scores["test_balanced_accuracy"]), std(DT_scores["test_balanced_accuracy"])))
print(' 3. Recall: %.3f (%.3f)' % (mean(DT_scores["test_recall"]), std(DT_scores["test_recall"])))
print(' 4. Precision: %.3f (%.3f)' % (mean(DT_scores["test_precision"]), std(DT_scores["test_precision"])))
print(' 5. F1: %.3f (%.3f)' % (mean(DT_scores["test_f1"]), std(DT_scores["test_f1"])))

# Ensemble of all models
ensemble_model = VotingClassifier(estimators=[('LR', logistic_model), ('NB', NB_model), ('SVM', SVM_model), ("KNN", KNN_model), ('DT', DT_model)], voting='hard')
ensemble_scores = cross_validate(ensemble_model, X, y, scoring=metrics, cv=loo, n_jobs=-1)

print("Mean scores and standard deviation for Ensemble Model (Hard Voting): ")
print(' 1. Accuracy: %.3f (%.3f)' % (mean(ensemble_scores["test_accuracy"]), std(ensemble_scores["test_accuracy"])))
print(' 2. Balanced Accuracy: %.3f (%.3f)' % (mean(ensemble_scores["test_balanced_accuracy"]), std(ensemble_scores["test_balanced_accuracy"])))
print(' 3. Recall: %.3f (%.3f)' % (mean(ensemble_scores["test_recall"]), std(ensemble_scores["test_recall"])))
print(' 4. Precision: %.3f (%.3f)' % (mean(ensemble_scores["test_precision"]), std(ensemble_scores["test_precision"])))
print(' 5. F1: %.3f (%.3f)' % (mean(ensemble_scores["test_f1"]), std(ensemble_scores["test_f1"])))

Mean scores and standard deviation for Logistic Regression: 
 1. Accuracy: 0.990 (0.100)
 2. Balanced Accuracy: 0.990 (0.100)
 3. Recall: 0.442 (0.497)
 4. Precision: 0.442 (0.497)
 5. F1: 0.442 (0.497)
Mean scores and standard deviation for Naive Bayes: 
 1. Accuracy: 0.840 (0.367)
 2. Balanced Accuracy: 0.840 (0.367)
 3. Recall: 0.352 (0.478)
 4. Precision: 0.352 (0.478)
 5. F1: 0.352 (0.478)
Mean scores and standard deviation for SVM: 
 1. Accuracy: 0.996 (0.060)
 2. Balanced Accuracy: 0.996 (0.060)
 3. Recall: 0.445 (0.497)
 4. Precision: 0.445 (0.497)
 5. F1: 0.445 (0.497)
Mean scores and standard deviation for KNN: 
 1. Accuracy: 0.999 (0.027)
 2. Balanced Accuracy: 0.999 (0.027)
 3. Recall: 0.445 (0.497)
 4. Precision: 0.445 (0.497)
 5. F1: 0.445 (0.497)
Mean scores and standard deviation for Decision Tree: 
 1. Accuracy: 0.985 (0.120)
 2. Balanced Accuracy: 0.985 (0.120)
 3. Recall: 0.438 (0.496)
 4. Precision: 0.438 (0.496)
 5. F1: 0.438 (0.496)
Mean scores and standard deviat

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import cross_val_score, cross_validate
from sklearn.metrics import accuracy_score
from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn import preprocessing
import pandas as pd
import numpy as np
from numpy import mean, std

label_encoder = preprocessing.LabelEncoder()

loo = LeaveOneOut()
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)

metrics = ["accuracy", "balanced_accuracy","recall", "precision", "f1"]

df = pd.read_csv("https://raw.githubusercontent.com/waleed-alfaifi/ml-project/main/Datasets/Breast%20Cancer%20Wisconsin%20(Diagnostic)%20Data%20Set.data", header=None)

df[1] = label_encoder.fit_transform(df[1]) # encode classes

X, y = df.values[:, 2:], df.values[:, 1]

# Use Logistic Regression for classification
logistic_model = LogisticRegression(random_state=0)
logistic_scores = cross_validate(logistic_model, X, y, scoring=metrics, cv=cv, n_jobs=-1)

print("Mean scores and standard deviation for Logistic Regression: ")
print(' 1. Accuracy: %.3f (%.3f)' % (mean(logistic_scores["test_accuracy"]), std(logistic_scores["test_accuracy"])))
print(' 2. Balanced Accuracy: %.3f (%.3f)' % (mean(logistic_scores["test_balanced_accuracy"]), std(logistic_scores["test_balanced_accuracy"])))
print(' 3. Recall: %.3f (%.3f)' % (mean(logistic_scores["test_recall"]), std(logistic_scores["test_recall"])))
print(' 4. Precision: %.3f (%.3f)' % (mean(logistic_scores["test_precision"]), std(logistic_scores["test_precision"])))
print(' 5. F1: %.3f (%.3f)' % (mean(logistic_scores["test_f1"]), std(logistic_scores["test_f1"])))

# Use Naive Bayes for classification
NB_model = GaussianNB()
NB_scores = cross_validate(NB_model, X, y, scoring=metrics, cv=cv, n_jobs=-1)

print("Mean scores and standard deviation for Naive Bayes: ")
print(' 1. Accuracy: %.3f (%.3f)' % (mean(NB_scores["test_accuracy"]), std(NB_scores["test_accuracy"])))
print(' 2. Balanced Accuracy: %.3f (%.3f)' % (mean(NB_scores["test_balanced_accuracy"]), std(NB_scores["test_balanced_accuracy"])))
print(' 3. Recall: %.3f (%.3f)' % (mean(NB_scores["test_recall"]), std(NB_scores["test_recall"])))
print(' 4. Precision: %.3f (%.3f)' % (mean(NB_scores["test_precision"]), std(NB_scores["test_precision"])))
print(' 5. F1: %.3f (%.3f)' % (mean(NB_scores["test_f1"]), std(NB_scores["test_f1"])))

# Use SVM for classification
SVM_model = SVC()
SVM_scores = cross_validate(SVM_model, X, y, scoring=metrics, cv=cv, n_jobs=-1)

print("Mean scores and standard deviation for SVM: ")
print(' 1. Accuracy: %.3f (%.3f)' % (mean(SVM_scores["test_accuracy"]), std(SVM_scores["test_accuracy"])))
print(' 2. Balanced Accuracy: %.3f (%.3f)' % (mean(SVM_scores["test_balanced_accuracy"]), std(SVM_scores["test_balanced_accuracy"])))
print(' 3. Recall: %.3f (%.3f)' % (mean(SVM_scores["test_recall"]), std(SVM_scores["test_recall"])))
print(' 4. Precision: %.3f (%.3f)' % (mean(SVM_scores["test_precision"]), std(SVM_scores["test_precision"])))
print(' 5. F1: %.3f (%.3f)' % (mean(SVM_scores["test_f1"]), std(SVM_scores["test_f1"])))

# Use KNN for classification
KNN_model = KNeighborsClassifier(n_neighbors=3)
KNN_scores = cross_validate(KNN_model, X, y, scoring=metrics, cv=cv, n_jobs=-1)

print("Mean scores and standard deviation for KNN: ")
print(' 1. Accuracy: %.3f (%.3f)' % (mean(KNN_scores["test_accuracy"]), std(KNN_scores["test_accuracy"])))
print(' 2. Balanced Accuracy: %.3f (%.3f)' % (mean(KNN_scores["test_balanced_accuracy"]), std(KNN_scores["test_balanced_accuracy"])))
print(' 3. Recall: %.3f (%.3f)' % (mean(KNN_scores["test_recall"]), std(KNN_scores["test_recall"])))
print(' 4. Precision: %.3f (%.3f)' % (mean(KNN_scores["test_precision"]), std(KNN_scores["test_precision"])))
print(' 5. F1: %.3f (%.3f)' % (mean(KNN_scores["test_f1"]), std(KNN_scores["test_f1"])))

# Use Decision Tree for classification
DT_model = DecisionTreeClassifier()
DT_scores = cross_validate(DT_model, X, y, scoring=metrics, cv=cv, n_jobs=-1)

print("Mean scores and standard deviation for Decision Tree: ")
print(' 1. Accuracy: %.3f (%.3f)' % (mean(DT_scores["test_accuracy"]), std(DT_scores["test_accuracy"])))
print(' 2. Balanced Accuracy: %.3f (%.3f)' % (mean(DT_scores["test_balanced_accuracy"]), std(DT_scores["test_balanced_accuracy"])))
print(' 3. Recall: %.3f (%.3f)' % (mean(DT_scores["test_recall"]), std(DT_scores["test_recall"])))
print(' 4. Precision: %.3f (%.3f)' % (mean(DT_scores["test_precision"]), std(DT_scores["test_precision"])))
print(' 5. F1: %.3f (%.3f)' % (mean(DT_scores["test_f1"]), std(DT_scores["test_f1"])))


# Ensemble of all models
ensemble_model = VotingClassifier(estimators=[('LR', logistic_model), ('NB', NB_model), ('SVM', SVM_model), ("KNN", KNN_model), ('DT', DT_model)], voting='hard')
ensemble_scores = cross_validate(ensemble_model, X, y, scoring=metrics, cv=cv, n_jobs=-1) 

print("Mean scores and standard deviation for Ensemble Model (Hard Voting): ")
print(' 1. Accuracy: %.3f (%.3f)' % (mean(ensemble_scores["test_accuracy"]), std(ensemble_scores["test_accuracy"])))
print(' 2. Balanced Accuracy: %.3f (%.3f)' % (mean(ensemble_scores["test_balanced_accuracy"]), std(ensemble_scores["test_balanced_accuracy"])))
print(' 3. Recall: %.3f (%.3f)' % (mean(ensemble_scores["test_recall"]), std(ensemble_scores["test_recall"])))
print(' 4. Precision: %.3f (%.3f)' % (mean(ensemble_scores["test_precision"]), std(ensemble_scores["test_precision"])))
print(' 5. F1: %.3f (%.3f)' % (mean(ensemble_scores["test_f1"]), std(ensemble_scores["test_f1"])))

Mean scores and standard deviation for Logistic Regression: 
 1. Accuracy: 0.943 (0.029)
 2. Balanced Accuracy: 0.936 (0.034)
 3. Recall: 0.906 (0.066)
 4. Precision: 0.943 (0.052)
 5. F1: 0.922 (0.041)
Mean scores and standard deviation for Naive Bayes: 
 1. Accuracy: 0.939 (0.030)
 2. Balanced Accuracy: 0.928 (0.034)
 3. Recall: 0.887 (0.061)
 4. Precision: 0.947 (0.048)
 5. F1: 0.914 (0.042)
Mean scores and standard deviation for SVM: 
 1. Accuracy: 0.915 (0.037)
 2. Balanced Accuracy: 0.893 (0.050)
 3. Recall: 0.805 (0.103)
 4. Precision: 0.964 (0.046)
 5. F1: 0.872 (0.066)
Mean scores and standard deviation for KNN: 
 1. Accuracy: 0.931 (0.024)
 2. Balanced Accuracy: 0.922 (0.029)
 3. Recall: 0.884 (0.059)
 4. Precision: 0.932 (0.047)
 5. F1: 0.905 (0.035)
Mean scores and standard deviation for Decision Tree: 
 1. Accuracy: 0.927 (0.032)
 2. Balanced Accuracy: 0.923 (0.036)
 3. Recall: 0.907 (0.056)
 4. Precision: 0.900 (0.048)
 5. F1: 0.903 (0.044)
Mean scores and standard deviat

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import cross_val_score, cross_validate
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn import preprocessing
import pandas as pd
import numpy as np
from numpy import mean, std

label_encoder = preprocessing.LabelEncoder()

cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)

metrics = ["accuracy", "balanced_accuracy", "recall", "precision", "f1"]

df = pd.read_csv("https://raw.githubusercontent.com/waleed-alfaifi/ml-project/main/Datasets/spambase.data", header=None)

X, y = df.values[:, :-1], df.values[:, -1]

# Use Logistic Regression for classification
logistic_model = LogisticRegression(random_state=0)
logistic_scores = cross_validate(logistic_model, X, y, scoring=metrics, cv=cv, n_jobs=-1)

print("Mean scores and standard deviation for Logistic Regression: ")
print(' 1. Accuracy: %.3f (%.3f)' % (mean(logistic_scores["test_accuracy"]), std(logistic_scores["test_accuracy"])))
print(' 2. Balanced Accuracy: %.3f (%.3f)' % (mean(logistic_scores["test_balanced_accuracy"]), std(logistic_scores["test_balanced_accuracy"])))
print(' 3. Recall: %.3f (%.3f)' % (mean(logistic_scores["test_recall"]), std(logistic_scores["test_recall"])))
print(' 4. Precision: %.3f (%.3f)' % (mean(logistic_scores["test_precision"]), std(logistic_scores["test_precision"])))
print(' 5. F1: %.3f (%.3f)' % (mean(logistic_scores["test_f1"]), std(logistic_scores["test_f1"])))

# Use Naive Bayes for classification
NB_model = GaussianNB()
NB_scores = cross_validate(NB_model, X, y, scoring=metrics, cv=cv, n_jobs=-1)

print("Mean scores and standard deviation for Naive Bayes: ")
print(' 1. Accuracy: %.3f (%.3f)' % (mean(NB_scores["test_accuracy"]), std(NB_scores["test_accuracy"])))
print(' 2. Balanced Accuracy: %.3f (%.3f)' % (mean(NB_scores["test_balanced_accuracy"]), std(NB_scores["test_balanced_accuracy"])))
print(' 3. Recall: %.3f (%.3f)' % (mean(NB_scores["test_recall"]), std(NB_scores["test_recall"])))
print(' 4. Precision: %.3f (%.3f)' % (mean(NB_scores["test_precision"]), std(NB_scores["test_precision"])))
print(' 5. F1: %.3f (%.3f)' % (mean(NB_scores["test_f1"]), std(NB_scores["test_f1"])))

# Use SVM for classification
SVM_model = SVC()
SVM_scores = cross_validate(SVM_model, X, y, scoring=metrics, cv=cv, n_jobs=-1)

print("Mean scores and standard deviation for SVM: ")
print(' 1. Accuracy: %.3f (%.3f)' % (mean(SVM_scores["test_accuracy"]), std(SVM_scores["test_accuracy"])))
print(' 2. Balanced Accuracy: %.3f (%.3f)' % (mean(SVM_scores["test_balanced_accuracy"]), std(SVM_scores["test_balanced_accuracy"])))
print(' 3. Recall: %.3f (%.3f)' % (mean(SVM_scores["test_recall"]), std(SVM_scores["test_recall"])))
print(' 4. Precision: %.3f (%.3f)' % (mean(SVM_scores["test_precision"]), std(SVM_scores["test_precision"])))
print(' 5. F1: %.3f (%.3f)' % (mean(SVM_scores["test_f1"]), std(SVM_scores["test_f1"])))

# Use KNN for classification
KNN_model = KNeighborsClassifier(n_neighbors=3)
KNN_scores = cross_validate(KNN_model, X, y, scoring=metrics, cv=cv, n_jobs=-1)

print("Mean scores and standard deviation for KNN: ")
print(' 1. Accuracy: %.3f (%.3f)' % (mean(KNN_scores["test_accuracy"]), std(KNN_scores["test_accuracy"])))
print(' 2. Balanced Accuracy: %.3f (%.3f)' % (mean(KNN_scores["test_balanced_accuracy"]), std(KNN_scores["test_balanced_accuracy"])))
print(' 3. Recall: %.3f (%.3f)' % (mean(KNN_scores["test_recall"]), std(KNN_scores["test_recall"])))
print(' 4. Precision: %.3f (%.3f)' % (mean(KNN_scores["test_precision"]), std(KNN_scores["test_precision"])))
print(' 5. F1: %.3f (%.3f)' % (mean(KNN_scores["test_f1"]), std(KNN_scores["test_f1"])))

# Use Decision Tree for classification
DT_model = DecisionTreeClassifier()
DT_scores = cross_validate(DT_model, X, y, scoring=metrics, cv=cv, n_jobs=-1)

print("Mean scores and standard deviation for Decision Tree: ")
print(' 1. Accuracy: %.3f (%.3f)' % (mean(DT_scores["test_accuracy"]), std(DT_scores["test_accuracy"])))
print(' 2. Balanced Accuracy: %.3f (%.3f)' % (mean(DT_scores["test_balanced_accuracy"]), std(DT_scores["test_balanced_accuracy"])))
print(' 3. Recall: %.3f (%.3f)' % (mean(DT_scores["test_recall"]), std(DT_scores["test_recall"])))
print(' 4. Precision: %.3f (%.3f)' % (mean(DT_scores["test_precision"]), std(DT_scores["test_precision"])))
print(' 5. F1: %.3f (%.3f)' % (mean(DT_scores["test_f1"]), std(DT_scores["test_f1"])))


# Ensemble of all models
ensemble_model = VotingClassifier(estimators=[('LR', logistic_model), ('NB', NB_model), ('SVM', SVM_model), ("KNN", KNN_model), ('DT', DT_model)], voting='hard')
ensemble_scores = cross_validate(ensemble_model, X, y, scoring=metrics, cv=cv, n_jobs=-1) 

print("Mean scores and standard deviation for Ensemble Model (Hard Voting): ")
print(' 1. Accuracy: %.3f (%.3f)' % (mean(ensemble_scores["test_accuracy"]), std(ensemble_scores["test_accuracy"])))
print(' 2. Balanced Accuracy: %.3f (%.3f)' % (mean(ensemble_scores["test_balanced_accuracy"]), std(ensemble_scores["test_balanced_accuracy"])))
print(' 3. Recall: %.3f (%.3f)' % (mean(ensemble_scores["test_recall"]), std(ensemble_scores["test_recall"])))
print(' 4. Precision: %.3f (%.3f)' % (mean(ensemble_scores["test_precision"]), std(ensemble_scores["test_precision"])))
print(' 5. F1: %.3f (%.3f)' % (mean(ensemble_scores["test_f1"]), std(ensemble_scores["test_f1"])))

Mean scores and standard deviation for Logistic Regression: 
 1. Accuracy: 0.920 (0.014)
 2. Balanced Accuracy: 0.915 (0.015)
 3. Recall: 0.892 (0.023)
 4. Precision: 0.905 (0.023)
 5. F1: 0.898 (0.018)
Mean scores and standard deviation for Naive Bayes: 
 1. Accuracy: 0.821 (0.017)
 2. Balanced Accuracy: 0.845 (0.016)
 3. Recall: 0.958 (0.015)
 4. Precision: 0.700 (0.021)
 5. F1: 0.809 (0.016)
Mean scores and standard deviation for SVM: 
 1. Accuracy: 0.717 (0.022)
 2. Balanced Accuracy: 0.671 (0.025)
 3. Recall: 0.453 (0.048)
 4. Precision: 0.726 (0.044)
 5. F1: 0.556 (0.043)
Mean scores and standard deviation for KNN: 
 1. Accuracy: 0.813 (0.017)
 2. Balanced Accuracy: 0.803 (0.017)
 3. Recall: 0.757 (0.025)
 4. Precision: 0.766 (0.029)
 5. F1: 0.761 (0.021)
Mean scores and standard deviation for Decision Tree: 
 1. Accuracy: 0.913 (0.012)
 2. Balanced Accuracy: 0.910 (0.014)
 3. Recall: 0.896 (0.027)
 4. Precision: 0.885 (0.019)
 5. F1: 0.890 (0.016)
Mean scores and standard deviat

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import cross_val_score, cross_validate
from sklearn.metrics import accuracy_score
from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import RepeatedStratifiedKFold
import pandas as pd
import numpy as np
from numpy import mean, std

df = pd.read_csv("https://raw.githubusercontent.com/waleed-alfaifi/ml-project/main/Datasets/seeds_dataset.csv", header=None)

X, y = df.values[:, :-1], df.values[:, -1]

cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)

metrics = ["accuracy", "balanced_accuracy", "recall_weighted", "precision_weighted", "f1_weighted"]

# Use Logistic Regression for classification
logistic_model = LogisticRegression(random_state=0, multi_class="multinomial")
logistic_scores = cross_validate(logistic_model, X, y, scoring=metrics, cv=cv, n_jobs=-1)

print("Mean scores and standard deviation for Logistic Regression: ")
print(' 1. Accuracy: %.3f (%.3f)' % (mean(logistic_scores["test_accuracy"]), std(logistic_scores["test_accuracy"])))
print(' 2. Balanced Accuracy: %.3f (%.3f)' % (mean(logistic_scores["test_balanced_accuracy"]), std(logistic_scores["test_balanced_accuracy"])))
print(' 3. Recall Weighted: %.3f (%.3f)' % (mean(logistic_scores["test_recall_weighted"]), std(logistic_scores["test_recall_weighted"])))
print(' 4. Precision Weighted: %.3f (%.3f)' % (mean(logistic_scores["test_precision_weighted"]), std(logistic_scores["test_precision_weighted"])))
print(' 5. F1 Weighted: %.3f (%.3f)' % (mean(logistic_scores["test_f1_weighted"]), std(logistic_scores["test_f1_weighted"])))

# Use Naive Bayes for classification
NB_model = GaussianNB()
NB_scores = cross_validate(NB_model, X, y, scoring=metrics, cv=cv, n_jobs=-1)

print("Mean scores and standard deviation for Naive Bayes: ")
print(' 1. Accuracy: %.3f (%.3f)' % (mean(NB_scores["test_accuracy"]), std(NB_scores["test_accuracy"])))
print(' 2. Balanced Accuracy: %.3f (%.3f)' % (mean(NB_scores["test_balanced_accuracy"]), std(NB_scores["test_balanced_accuracy"])))
print(' 3. Recall Weighted: %.3f (%.3f)' % (mean(NB_scores["test_recall_weighted"]), std(NB_scores["test_recall_weighted"])))
print(' 4. Precision Weighted: %.3f (%.3f)' % (mean(NB_scores["test_precision_weighted"]), std(NB_scores["test_precision_weighted"])))
print(' 5. F1 Weighted: %.3f (%.3f)' % (mean(NB_scores["test_f1_weighted"]), std(NB_scores["test_f1_weighted"])))

# Use SVM for classification
SVM_model = SVC()
SVM_scores = cross_validate(SVM_model, X, y, scoring=metrics, cv=cv, n_jobs=-1)

print("Mean scores and standard deviation for SVM: ")
print(' 1. Accuracy: %.3f (%.3f)' % (mean(SVM_scores["test_accuracy"]), std(SVM_scores["test_accuracy"])))
print(' 2. Balanced Accuracy: %.3f (%.3f)' % (mean(SVM_scores["test_balanced_accuracy"]), std(SVM_scores["test_balanced_accuracy"])))
print(' 3. Recall Weighted: %.3f (%.3f)' % (mean(SVM_scores["test_recall_weighted"]), std(SVM_scores["test_recall_weighted"])))
print(' 4. Precision Weighted: %.3f (%.3f)' % (mean(SVM_scores["test_precision_weighted"]), std(SVM_scores["test_precision_weighted"])))
print(' 5. F1 Weighted: %.3f (%.3f)' % (mean(SVM_scores["test_f1_weighted"]), std(SVM_scores["test_f1_weighted"])))

# Use KNN for classification
KNN_model = KNeighborsClassifier(n_neighbors=3)
KNN_scores = cross_validate(KNN_model, X, y, scoring=metrics, cv=cv, n_jobs=-1)

print("Mean scores and standard deviation for KNN: ")
print(' 1. Accuracy: %.3f (%.3f)' % (mean(KNN_scores["test_accuracy"]), std(KNN_scores["test_accuracy"])))
print(' 2. Balanced Accuracy: %.3f (%.3f)' % (mean(KNN_scores["test_balanced_accuracy"]), std(KNN_scores["test_balanced_accuracy"])))
print(' 3. Recall Weighted: %.3f (%.3f)' % (mean(KNN_scores["test_recall_weighted"]), std(KNN_scores["test_recall_weighted"])))
print(' 4. Precision Weighted: %.3f (%.3f)' % (mean(KNN_scores["test_precision_weighted"]), std(KNN_scores["test_precision_weighted"])))
print(' 5. F1 Weighted: %.3f (%.3f)' % (mean(KNN_scores["test_f1_weighted"]), std(KNN_scores["test_f1_weighted"])))

# Use Decision Tree for classification
DT_model = DecisionTreeClassifier()
DT_scores = cross_validate(DT_model, X, y, scoring=metrics, cv=cv, n_jobs=-1)

print("Mean scores and standard deviation for Decision Tree: ")
print(' 1. Accuracy: %.3f (%.3f)' % (mean(DT_scores["test_accuracy"]), std(DT_scores["test_accuracy"])))
print(' 2. Balanced Accuracy: %.3f (%.3f)' % (mean(DT_scores["test_balanced_accuracy"]), std(DT_scores["test_balanced_accuracy"])))
print(' 3. Recall Weighted: %.3f (%.3f)' % (mean(DT_scores["test_recall_weighted"]), std(DT_scores["test_recall_weighted"])))
print(' 4. Precision Weighted: %.3f (%.3f)' % (mean(DT_scores["test_precision_weighted"]), std(DT_scores["test_precision_weighted"])))
print(' 5. F1 Weighted: %.3f (%.3f)' % (mean(DT_scores["test_f1_weighted"]), std(DT_scores["test_f1_weighted"])))


# Ensemble of all models
ensemble_model = VotingClassifier(estimators=[('LR', logistic_model), ('NB', NB_model), ('SVM', SVM_model), ("KNN", KNN_model), ('DT', DT_model)], voting='hard')
ensemble_scores = cross_validate(ensemble_model, X, y, scoring=metrics, cv=cv, n_jobs=-1)

print("Mean scores and standard deviation for Ensemble Model (Hard Voting): ")
print(' 1. Accuracy: %.3f (%.3f)' % (mean(ensemble_scores["test_accuracy"]), std(ensemble_scores["test_accuracy"])))
print(' 2. Balanced Accuracy: %.3f (%.3f)' % (mean(ensemble_scores["test_balanced_accuracy"]), std(ensemble_scores["test_balanced_accuracy"])))
print(' 3. Recall Weighted: %.3f (%.3f)' % (mean(ensemble_scores["test_recall_weighted"]), std(ensemble_scores["test_recall_weighted"])))
print(' 4. Precision Weighted: %.3f (%.3f)' % (mean(ensemble_scores["test_precision_weighted"]), std(ensemble_scores["test_precision_weighted"])))
print(' 5. F1 Weighted: %.3f (%.3f)' % (mean(ensemble_scores["test_f1_weighted"]), std(ensemble_scores["test_f1_weighted"])))

Mean scores and standard deviation for Logistic Regression: 
 1. Accuracy: 0.921 (0.068)
 2. Balanced Accuracy: 0.921 (0.068)
 3. Recall Weighted: 0.921 (0.068)
 4. Precision Weighted: 0.926 (0.068)
 5. F1 Weighted: 0.919 (0.073)
Mean scores and standard deviation for Naive Bayes: 
 1. Accuracy: 0.902 (0.063)
 2. Balanced Accuracy: 0.902 (0.063)
 3. Recall Weighted: 0.902 (0.063)
 4. Precision Weighted: 0.909 (0.062)
 5. F1 Weighted: 0.900 (0.067)
Mean scores and standard deviation for SVM: 
 1. Accuracy: 0.908 (0.056)
 2. Balanced Accuracy: 0.908 (0.056)
 3. Recall Weighted: 0.908 (0.056)
 4. Precision Weighted: 0.914 (0.056)
 5. F1 Weighted: 0.906 (0.059)
Mean scores and standard deviation for KNN: 
 1. Accuracy: 0.887 (0.064)
 2. Balanced Accuracy: 0.887 (0.064)
 3. Recall Weighted: 0.887 (0.064)
 4. Precision Weighted: 0.894 (0.065)
 5. F1 Weighted: 0.885 (0.068)
Mean scores and standard deviation for Decision Tree: 
 1. Accuracy: 0.916 (0.045)
 2. Balanced Accuracy: 0.916 (0.045)


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import cross_validate
from sklearn.model_selection import RepeatedStratifiedKFold
import pandas as pd
import numpy as np
from numpy import mean, std

X, y = load_iris(return_X_y=True)

cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)

metrics = ["accuracy", "balanced_accuracy", "recall_weighted", "precision_weighted", "f1_weighted"]

# Use Logistic Regression for classification
logistic_model = LogisticRegression(random_state=0, multi_class="multinomial")
logistic_scores = cross_validate(logistic_model, X, y, scoring=metrics, cv=cv, n_jobs=-1)

print("Mean scores and standard deviation for Logistic Regression: ")
print(' 1. Accuracy: %.3f (%.3f)' % (mean(logistic_scores["test_accuracy"]), std(logistic_scores["test_accuracy"])))
print(' 2. Balanced Accuracy: %.3f (%.3f)' % (mean(logistic_scores["test_balanced_accuracy"]), std(logistic_scores["test_balanced_accuracy"])))
print(' 3. Recall Weighted: %.3f (%.3f)' % (mean(logistic_scores["test_recall_weighted"]), std(logistic_scores["test_recall_weighted"])))
print(' 4. Precision Weighted: %.3f (%.3f)' % (mean(logistic_scores["test_precision_weighted"]), std(logistic_scores["test_precision_weighted"])))
print(' 5. F1 Weighted: %.3f (%.3f)' % (mean(logistic_scores["test_f1_weighted"]), std(logistic_scores["test_f1_weighted"])))

# Use Naive Bayes for classification
NB_model = GaussianNB()
NB_scores = cross_validate(NB_model, X, y, scoring=metrics, cv=cv, n_jobs=-1)

print("Mean scores and standard deviation for Naive Bayes: ")
print(' 1. Accuracy: %.3f (%.3f)' % (mean(NB_scores["test_accuracy"]), std(NB_scores["test_accuracy"])))
print(' 2. Balanced Accuracy: %.3f (%.3f)' % (mean(NB_scores["test_balanced_accuracy"]), std(NB_scores["test_balanced_accuracy"])))
print(' 3. Recall Weighted: %.3f (%.3f)' % (mean(NB_scores["test_recall_weighted"]), std(NB_scores["test_recall_weighted"])))
print(' 4. Precision Weighted: %.3f (%.3f)' % (mean(NB_scores["test_precision_weighted"]), std(NB_scores["test_precision_weighted"])))
print(' 5. F1 Weighted: %.3f (%.3f)' % (mean(NB_scores["test_f1_weighted"]), std(NB_scores["test_f1_weighted"])))

# Use SVM for classification
SVM_model = SVC()
SVM_scores = cross_validate(SVM_model, X, y, scoring=metrics, cv=cv, n_jobs=-1)

print("Mean scores and standard deviation for SVM: ")
print(' 1. Accuracy: %.3f (%.3f)' % (mean(SVM_scores["test_accuracy"]), std(SVM_scores["test_accuracy"])))
print(' 2. Balanced Accuracy: %.3f (%.3f)' % (mean(SVM_scores["test_balanced_accuracy"]), std(SVM_scores["test_balanced_accuracy"])))
print(' 3. Recall Weighted: %.3f (%.3f)' % (mean(SVM_scores["test_recall_weighted"]), std(SVM_scores["test_recall_weighted"])))
print(' 4. Precision Weighted: %.3f (%.3f)' % (mean(SVM_scores["test_precision_weighted"]), std(SVM_scores["test_precision_weighted"])))
print(' 5. F1 Weighted: %.3f (%.3f)' % (mean(SVM_scores["test_f1_weighted"]), std(SVM_scores["test_f1_weighted"])))

# Use KNN for classification
KNN_model = KNeighborsClassifier(n_neighbors=3)
KNN_scores = cross_validate(KNN_model, X, y, scoring=metrics, cv=cv, n_jobs=-1)

print("Mean scores and standard deviation for KNN: ")
print(' 1. Accuracy: %.3f (%.3f)' % (mean(KNN_scores["test_accuracy"]), std(KNN_scores["test_accuracy"])))
print(' 2. Balanced Accuracy: %.3f (%.3f)' % (mean(KNN_scores["test_balanced_accuracy"]), std(KNN_scores["test_balanced_accuracy"])))
print(' 3. Recall Weighted: %.3f (%.3f)' % (mean(KNN_scores["test_recall_weighted"]), std(KNN_scores["test_recall_weighted"])))
print(' 4. Precision Weighted: %.3f (%.3f)' % (mean(KNN_scores["test_precision_weighted"]), std(KNN_scores["test_precision_weighted"])))
print(' 5. F1 Weighted: %.3f (%.3f)' % (mean(KNN_scores["test_f1_weighted"]), std(KNN_scores["test_f1_weighted"])))

# Use Decision Tree for classification
DT_model = DecisionTreeClassifier()
DT_scores = cross_validate(DT_model, X, y, scoring=metrics, cv=cv, n_jobs=-1)

print("Mean scores and standard deviation for Decision Tree: ")
print(' 1. Accuracy: %.3f (%.3f)' % (mean(DT_scores["test_accuracy"]), std(DT_scores["test_accuracy"])))
print(' 2. Balanced Accuracy: %.3f (%.3f)' % (mean(DT_scores["test_balanced_accuracy"]), std(DT_scores["test_balanced_accuracy"])))
print(' 3. Recall Weighted: %.3f (%.3f)' % (mean(DT_scores["test_recall_weighted"]), std(DT_scores["test_recall_weighted"])))
print(' 4. Precision Weighted: %.3f (%.3f)' % (mean(DT_scores["test_precision_weighted"]), std(DT_scores["test_precision_weighted"])))
print(' 5. F1 Weighted: %.3f (%.3f)' % (mean(DT_scores["test_f1_weighted"]), std(DT_scores["test_f1_weighted"])))


# Ensemble of all models
ensemble_model = VotingClassifier(estimators=[('LR', logistic_model), ('NB', NB_model), ('SVM', SVM_model), ("KNN", KNN_model), ('DT', DT_model)], voting='hard')
ensemble_scores = cross_validate(ensemble_model, X, y, scoring=metrics, cv=cv, n_jobs=-1)

print("Mean scores and standard deviation for Ensemble Model (Hard Voting): ")
print(' 1. Accuracy: %.3f (%.3f)' % (mean(ensemble_scores["test_accuracy"]), std(ensemble_scores["test_accuracy"])))
print(' 2. Balanced Accuracy: %.3f (%.3f)' % (mean(ensemble_scores["test_balanced_accuracy"]), std(ensemble_scores["test_balanced_accuracy"])))
print(' 3. Recall Weighted: %.3f (%.3f)' % (mean(ensemble_scores["test_recall_weighted"]), std(ensemble_scores["test_recall_weighted"])))
print(' 4. Precision Weighted: %.3f (%.3f)' % (mean(ensemble_scores["test_precision_weighted"]), std(ensemble_scores["test_precision_weighted"])))
print(' 5. F1 Weighted: %.3f (%.3f)' % (mean(ensemble_scores["test_f1_weighted"]), std(ensemble_scores["test_f1_weighted"])))

Mean scores and standard deviation for Logistic Regression: 
 1. Accuracy: 0.967 (0.034)
 2. Balanced Accuracy: 0.967 (0.034)
 3. Recall Weighted: 0.967 (0.034)
 4. Precision Weighted: 0.968 (0.034)
 5. F1 Weighted: 0.967 (0.034)
Mean scores and standard deviation for Naive Bayes: 
 1. Accuracy: 0.956 (0.038)
 2. Balanced Accuracy: 0.956 (0.038)
 3. Recall Weighted: 0.956 (0.038)
 4. Precision Weighted: 0.958 (0.038)
 5. F1 Weighted: 0.955 (0.038)
Mean scores and standard deviation for SVM: 
 1. Accuracy: 0.967 (0.030)
 2. Balanced Accuracy: 0.967 (0.030)
 3. Recall Weighted: 0.967 (0.030)
 4. Precision Weighted: 0.968 (0.029)
 5. F1 Weighted: 0.967 (0.030)
Mean scores and standard deviation for KNN: 
 1. Accuracy: 0.962 (0.032)
 2. Balanced Accuracy: 0.962 (0.032)
 3. Recall Weighted: 0.962 (0.032)
 4. Precision Weighted: 0.964 (0.032)
 5. F1 Weighted: 0.962 (0.032)
Mean scores and standard deviation for Decision Tree: 
 1. Accuracy: 0.947 (0.040)
 2. Balanced Accuracy: 0.947 (0.040)


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import cross_validate
from sklearn.model_selection import LeaveOneOut
import pandas as pd
import numpy as np
from numpy import mean, std

df = pd.read_csv("https://raw.githubusercontent.com/waleed-alfaifi/ml-project/main/Datasets/marks.txt")
X, y = df.values[:, :-1], df.values[:, -1]

loo = LeaveOneOut()

metrics = ["accuracy", "balanced_accuracy","recall", "precision", "f1"]

# Use Logistic Regression for classification
logistic_model = LogisticRegression(random_state=0)
logistic_scores = cross_validate(logistic_model, X, y, scoring=metrics, cv=loo, n_jobs=-1)

print("Mean scores and standard deviation for Logistic Regression: ")
print(' 1. Accuracy: %.3f (%.3f)' % (mean(logistic_scores["test_accuracy"]), std(logistic_scores["test_accuracy"])))
print(' 2. Balanced Accuracy: %.3f (%.3f)' % (mean(logistic_scores["test_balanced_accuracy"]), std(logistic_scores["test_balanced_accuracy"])))
print(' 3. Recall: %.3f (%.3f)' % (mean(logistic_scores["test_recall"]), std(logistic_scores["test_recall"])))
print(' 4. Precision: %.3f (%.3f)' % (mean(logistic_scores["test_precision"]), std(logistic_scores["test_precision"])))
print(' 5. F1: %.3f (%.3f)' % (mean(logistic_scores["test_f1"]), std(logistic_scores["test_f1"])))

# Use Naive Bayes for classification
NB_model = GaussianNB()
NB_scores = cross_validate(NB_model, X, y, scoring=metrics, cv=loo, n_jobs=-1)

print("Mean scores and standard deviation for Naive Bayes: ")
print(' 1. Accuracy: %.3f (%.3f)' % (mean(NB_scores["test_accuracy"]), std(NB_scores["test_accuracy"])))
print(' 2. Balanced Accuracy: %.3f (%.3f)' % (mean(NB_scores["test_balanced_accuracy"]), std(NB_scores["test_balanced_accuracy"])))
print(' 3. Recall: %.3f (%.3f)' % (mean(NB_scores["test_recall"]), std(NB_scores["test_recall"])))
print(' 4. Precision: %.3f (%.3f)' % (mean(NB_scores["test_precision"]), std(NB_scores["test_precision"])))
print(' 5. F1: %.3f (%.3f)' % (mean(NB_scores["test_f1"]), std(NB_scores["test_f1"])))

# Use SVM for classification
SVM_model = SVC()
SVM_scores = cross_validate(SVM_model, X, y, scoring=metrics, cv=loo, n_jobs=-1)

print("Mean scores and standard deviation for SVM: ")
print(' 1. Accuracy: %.3f (%.3f)' % (mean(SVM_scores["test_accuracy"]), std(SVM_scores["test_accuracy"])))
print(' 2. Balanced Accuracy: %.3f (%.3f)' % (mean(SVM_scores["test_balanced_accuracy"]), std(SVM_scores["test_balanced_accuracy"])))
print(' 3. Recall: %.3f (%.3f)' % (mean(SVM_scores["test_recall"]), std(SVM_scores["test_recall"])))
print(' 4. Precision: %.3f (%.3f)' % (mean(SVM_scores["test_precision"]), std(SVM_scores["test_precision"])))
print(' 5. F1: %.3f (%.3f)' % (mean(SVM_scores["test_f1"]), std(SVM_scores["test_f1"])))

# Use KNN for classification
KNN_model = KNeighborsClassifier(n_neighbors=3)
KNN_scores = cross_validate(KNN_model, X, y, scoring=metrics, cv=loo, n_jobs=-1)

print("Mean scores and standard deviation for KNN: ")
print(' 1. Accuracy: %.3f (%.3f)' % (mean(KNN_scores["test_accuracy"]), std(KNN_scores["test_accuracy"])))
print(' 2. Balanced Accuracy: %.3f (%.3f)' % (mean(KNN_scores["test_balanced_accuracy"]), std(KNN_scores["test_balanced_accuracy"])))
print(' 3. Recall: %.3f (%.3f)' % (mean(KNN_scores["test_recall"]), std(KNN_scores["test_recall"])))
print(' 4. Precision: %.3f (%.3f)' % (mean(KNN_scores["test_precision"]), std(KNN_scores["test_precision"])))
print(' 5. F1: %.3f (%.3f)' % (mean(KNN_scores["test_f1"]), std(KNN_scores["test_f1"])))

# Use Decision Tree for classification
DT_model = DecisionTreeClassifier()
DT_scores = cross_validate(DT_model, X, y, scoring=metrics, cv=loo, n_jobs=-1)

print("Mean scores and standard deviation for Decision Tree: ")
print(' 1. Accuracy: %.3f (%.3f)' % (mean(DT_scores["test_accuracy"]), std(DT_scores["test_accuracy"])))
print(' 2. Balanced Accuracy: %.3f (%.3f)' % (mean(DT_scores["test_balanced_accuracy"]), std(DT_scores["test_balanced_accuracy"])))
print(' 3. Recall: %.3f (%.3f)' % (mean(DT_scores["test_recall"]), std(DT_scores["test_recall"])))
print(' 4. Precision: %.3f (%.3f)' % (mean(DT_scores["test_precision"]), std(DT_scores["test_precision"])))
print(' 5. F1: %.3f (%.3f)' % (mean(DT_scores["test_f1"]), std(DT_scores["test_f1"])))

# Ensemble of all models
ensemble_model = VotingClassifier(estimators=[('LR', logistic_model), ('NB', NB_model), ('SVM', SVM_model), ("KNN", KNN_model), ('DT', DT_model)], voting='hard')
ensemble_scores = cross_validate(ensemble_model, X, y, scoring=metrics, cv=loo, n_jobs=-1)

print("Mean scores and standard deviation for Ensemble Model (Hard Voting): ")
print(' 1. Accuracy: %.3f (%.3f)' % (mean(ensemble_scores["test_accuracy"]), std(ensemble_scores["test_accuracy"])))
print(' 2. Balanced Accuracy: %.3f (%.3f)' % (mean(ensemble_scores["test_balanced_accuracy"]), std(ensemble_scores["test_balanced_accuracy"])))
print(' 3. Recall: %.3f (%.3f)' % (mean(ensemble_scores["test_recall"]), std(ensemble_scores["test_recall"])))
print(' 4. Precision: %.3f (%.3f)' % (mean(ensemble_scores["test_precision"]), std(ensemble_scores["test_precision"])))
print(' 5. F1: %.3f (%.3f)' % (mean(ensemble_scores["test_f1"]), std(ensemble_scores["test_f1"])))

Mean scores and standard deviation for Logistic Regression: 
 1. Accuracy: 0.890 (0.313)
 2. Balanced Accuracy: 0.890 (0.313)
 3. Recall: 0.550 (0.497)
 4. Precision: 0.550 (0.497)
 5. F1: 0.550 (0.497)
Mean scores and standard deviation for Naive Bayes: 
 1. Accuracy: 0.890 (0.313)
 2. Balanced Accuracy: 0.890 (0.313)
 3. Recall: 0.550 (0.497)
 4. Precision: 0.550 (0.497)
 5. F1: 0.550 (0.497)
Mean scores and standard deviation for SVM: 
 1. Accuracy: 0.900 (0.300)
 2. Balanced Accuracy: 0.900 (0.300)
 3. Recall: 0.550 (0.497)
 4. Precision: 0.550 (0.497)
 5. F1: 0.550 (0.497)
Mean scores and standard deviation for KNN: 
 1. Accuracy: 0.880 (0.325)
 2. Balanced Accuracy: 0.880 (0.325)
 3. Recall: 0.550 (0.497)
 4. Precision: 0.550 (0.497)
 5. F1: 0.550 (0.497)
Mean scores and standard deviation for Decision Tree: 
 1. Accuracy: 0.890 (0.313)
 2. Balanced Accuracy: 0.890 (0.313)
 3. Recall: 0.540 (0.498)
 4. Precision: 0.540 (0.498)
 5. F1: 0.540 (0.498)
Mean scores and standard deviat