## Import libraries and packages

In [1]:
# some useful mysklearn package import statements and reloads
import importlib

import mysklearn.myutils
importlib.reload(mysklearn.myutils)
import mysklearn.myutils as myutils

# uncomment once you paste your mypytable.py into mysklearn package
import mysklearn.mypytable
importlib.reload(mysklearn.mypytable)
from mysklearn.mypytable import MyPyTable 

# uncomment once you paste your myclassifiers.py into mysklearn package
import mysklearn.myclassifiers
importlib.reload(mysklearn.myclassifiers)
from mysklearn.myclassifiers import MyKNeighborsClassifier, MyDummyClassifier, MyNaiveBayesClassifier, MyDecisionTreeClassifier

import mysklearn.myevaluation
importlib.reload(mysklearn.myevaluation)
import mysklearn.myevaluation as myevaluation

# Data Import and Setup

In [2]:
from os import path
filename = path.join('input_data', 'AllStarData.csv')
basketball_data = MyPyTable().load_from_file(filename)
# set up classifiers
knn_classifier = MyKNeighborsClassifier(n_neighbors=10)
dummy_classifier = MyDummyClassifier()
nb_classifier = MyNaiveBayesClassifier()
decision_tree = MyDecisionTreeClassifier()
classifiers = [knn_classifier, dummy_classifier, nb_classifier]#, decision_tree]
# Other vars
RANDOM_STATE = 6

## Predict Using Effective Field Goal Percentage (eFG%)

In [3]:
efg_data = basketball_data.get_column('eFG%')
allstars = basketball_data.get_column('All-star')
train_sets, test_sets = myevaluation.stratified_kfold_cross_validation(efg_data, allstars, n_splits=10,random_state=RANDOM_STATE)
test_length = 0
test_answers = []
for test in test_sets: 
    test_answers += [allstars[index] for index in test]
    test_length += len(test)
classifier_results = []
for classifier in classifiers:
    result_set = [[], 0] # [all the predictions, total_number_true] 
    for train, test in zip(train_sets, test_sets):
        # convert the indices to actual samples
        x_train = [[efg_data[index]] for index in train]
        y_train = [allstars[index] for index in train]
        x_test = [[efg_data[index]] for index in test]
        y_test = [allstars[index] for index in test]
        classifier.fit(x_train, y_train)
        prediction = classifier.predict(x_test)
        num_true = myevaluation.accuracy_score(y_test, prediction, normalize=False)
        result_set[0] += prediction
        result_set[1] += num_true
    result_set[1] /= test_length
    classifier_results.append(result_set)

In [4]:
classifier_names = ['KNN', 'Dummy', 'Naive Bayes', 'Decision Tree']
headers = ['no', 'yes']
myevaluation.print_classifier_results(classifier_names, classifier_results, test_answers, headers)

KNN--------------------------
Summary:
	Accuracy..: 1.0
	Error Rate: 0.0 

Precision, Recall, F1:
      precision    recall    f1    support
--  -----------  --------  ----  ---------
no            1         1     1        515 

Confusion Matrix:
       no    yes    Total    Recognition (%)
---  ----  -----  -------  -----------------
no    515      0      515                100
yes     0      0        0                  0


Dummy--------------------------
Summary:
	Accuracy..: 1.0
	Error Rate: 0.0 

Precision, Recall, F1:
      precision    recall    f1    support
--  -----------  --------  ----  ---------
no            1         1     1        515 

Confusion Matrix:
       no    yes    Total    Recognition (%)
---  ----  -----  -------  -----------------
no    515      0      515                100
yes     0      0        0                  0


Naive Bayes--------------------------
Summary:
	Accuracy..: 1.0
	Error Rate: 0.0 

Precision, Recall, F1:
      precision    recall    f1   