In [2]:
import pandas as pd
from sklearn.datasets import load_digits

digits = load_digits()

df = pd.DataFrame(digits.data)
df['target'] = digits.target

features = df.drop('target', axis = 1)
target = df['target']

In [3]:
from sklearn.svm import LinearSVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier

In [4]:
# dictionary of the algos that you need to try out
estimators = {
    'k-nearest neighbors' : KNeighborsClassifier(),
    'Support Vector Machine' : LinearSVC(max_iter  = 1000000),
    'Gaussian Naive Bayes' : GaussianNB(),
    'Decision Tree' : DecisionTreeClassifier()}

In [5]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

for estimator_name, estimator_object in estimators.items():

    kfold = KFold(n_splits = 10, random_state = 3000, shuffle = True)
    
    scores = cross_val_score(estimator = estimator_object, X = features, y = target, cv = kfold)
    print(scores)
    print(estimator_name+ ':\n\t' + f'Mean Accuracy = {scores.mean():.2%}, ', f'Standard Deviation = {scores.std():.2%}' + '\n')

[0.98333333 0.97777778 0.98888889 0.98888889 0.98333333 0.98888889
 0.98888889 0.98882682 0.98324022 0.98882682]
k-nearest neighbors:
	Mean Accuracy = 98.61%,  Standard Deviation = 0.37%

[0.96111111 0.93888889 0.91666667 0.95555556 0.95555556 0.95555556
 0.95       0.97206704 0.93296089 0.94413408]
Support Vector Machine:
	Mean Accuracy = 94.82%,  Standard Deviation = 1.49%

[0.85       0.83888889 0.82222222 0.88333333 0.81666667 0.83333333
 0.82777778 0.83240223 0.8603352  0.8603352 ]
Gaussian Naive Bayes:
	Mean Accuracy = 84.25%,  Standard Deviation = 1.96%

[0.84444444 0.82777778 0.85555556 0.85       0.87222222 0.9
 0.83888889 0.87150838 0.8547486  0.84916201]
Decision Tree:
	Mean Accuracy = 85.64%,  Standard Deviation = 1.94%



In [None]:
'''
Other metrics to evaluate performace of classification algorithms
- Precision
- Recall
- F1 Score
'''

In [None]:
'''
Confusion Matrix:

       Predicted Class
A   True positive | False Positive
C                 |
T   -------------------------------
U   False Neg     | True Neg
A                 |
L

'''

In [None]:
'''
Recall vs Precision

Optimizing the Recall:
- Minimizes the chance of not detecting a positive case
- Cost: a high number of false positives

Optimizing the Precision:
- Minimizes the number of False Positives
- Cost: a high number of false negatives

F1 Score: combination of recall and precision
Harmonic Mean of the Two 
 
F1 = 2 x (PRE x REC) / (PRE + REC)

Recall = Correctly Predicted Positive Cases / All Positive Cases
Precision = Correctly Predicted Positive Cases / All Positive Predictions

Make F1 score as high as possible (the closer to 1 the better)
'''