## Import libraries and packages

In [52]:
# some useful mysklearn package import statements and reloads
import importlib

import mysklearn.myutils
importlib.reload(mysklearn.myutils)
import mysklearn.myutils as myutils

# uncomment once you paste your mypytable.py into mysklearn package
import mysklearn.mypytable
importlib.reload(mysklearn.mypytable)
from mysklearn.mypytable import MyPyTable 

# uncomment once you paste your myclassifiers.py into mysklearn package
import mysklearn.myclassifiers
importlib.reload(mysklearn.myclassifiers)
from mysklearn.myclassifiers import MyKNeighborsClassifier, MyDummyClassifier, MyNaiveBayesClassifier, MyDecisionTreeClassifier

import mysklearn.myevaluation
importlib.reload(mysklearn.myevaluation)
import mysklearn.myevaluation as myevaluation

from os import path
import utils

# Data Import and Setup

In [53]:
filename = path.join('input_data', 'AllStarData.csv')
basketball_data = MyPyTable().load_from_file(filename)
# set up classifiers
knn_classifier = MyKNeighborsClassifier(n_neighbors=10)
dummy_classifier = MyDummyClassifier()
nb_classifier = MyNaiveBayesClassifier()
decision_tree = MyDecisionTreeClassifier()
classifiers = [knn_classifier, dummy_classifier, nb_classifier, decision_tree]
# Other vars
RANDOM_STATE = 6

## Predict Using Effective Field Goal Percentage (eFG%)

In [54]:
# retrieve data from table
efg_data = basketball_data.get_column('eFG%')
allstars = basketball_data.get_column('All-star')
# build train and test sets
train_sets, test_sets = myevaluation.stratified_kfold_cross_validation(efg_data, allstars, n_splits=10,random_state=RANDOM_STATE)
test_length = 0
test_answers = []
for test in test_sets: 
    test_answers += [allstars[index] for index in test]
    test_length += len(test)
# run tests and record results
classifier_results = []
for classifier in classifiers:
    result_set = [[], 0] # [all the predictions, total_number_true] 
    for train, test in zip(train_sets, test_sets):
        # convert the indices to actual samples
        x_train = [efg_data[index] for index in train]
        y_train = [allstars[index] for index in train]
        x_test = [efg_data[index] for index in test]
        y_test = [allstars[index] for index in test]
        # discretize the data
        train_cutoffs = utils.compute_equal_width_cutoffs(x_train, 10)
        train_output = [f'X<{train_cutoffs[0]}']
        for i, cut in enumerate(train_cutoffs):
            if i == len(train_cutoffs)-1:
                break
            train_output.append(f'{cut}<=X<{train_cutoffs[i+1]}')
        train_output.append(f'X>{train_cutoffs[-1]}')
        x_train = [[myutils.discretize_ratings_custom(val, train_cutoffs, train_output)] for val in x_train]
        x_test = [[myutils.discretize_ratings_custom(val, train_cutoffs, train_output)] for val in x_test]
        # fit and predict
        classifier.fit(x_train, y_train)
        prediction = classifier.predict(x_test)
        num_true = myevaluation.accuracy_score(y_test, prediction, normalize=False)
        # update counters
        result_set[0] += prediction
        result_set[1] += num_true
    # finalize results
    result_set[1] /= test_length
    classifier_results.append(result_set)

## Output Results

In [55]:
classifier_names = ['KNN', 'Dummy', 'Naive Bayes', 'Decision Tree']
headers = ['no', 'yes']
myevaluation.print_classifier_results(classifier_names, classifier_results, test_answers, headers)

KNN--------------------------
Summary:
	Accuracy..: 0.744
	Error Rate: 0.256 

Precision, Recall, F1:
       precision    recall         f1    support
---  -----------  --------  ---------  ---------
yes     0.333333  0.03125   0.0571429        128
no      0.753968  0.979381  0.852018         388 

Confusion Matrix:
       no    yes    Total    Recognition (%)
---  ----  -----  -------  -----------------
no    380      8      388             97.938
yes   124      4      128              3.125


Dummy--------------------------
Summary:
	Accuracy..: 0.752
	Error Rate: 0.248 

Precision, Recall, F1:
       precision    recall        f1    support
---  -----------  --------  --------  ---------
yes     0                0  0               128
no      0.751938         1  0.858407        388 

Confusion Matrix:
       no    yes    Total    Recognition (%)
---  ----  -----  -------  -----------------
no    388      0      388                100
yes   128      0      128                  0


Na

## More Traits

In [56]:
# build train and test sets
train_sets, test_sets = myevaluation.stratified_kfold_cross_validation(basketball_data.data, allstars, n_splits=10,random_state=RANDOM_STATE)
test_length = 0
test_answers = []
for test in test_sets: 
    test_answers += [allstars[index] for index in test]
    test_length += len(test)
# run tests and record results
classifier_results = []
for classifier in classifiers:
    result_set = [[], 0] # [all the predictions, total_number_true] 
    for train, test in zip(train_sets, test_sets):
        # convert the indices to actual samples
        x_train = [basketball_data.data[index] for index in train]
        y_train = [allstars[index] for index in train]
        x_test = [basketball_data.data[index] for index in test]
        y_test = [allstars[index] for index in test]
        # discretize the data
        train_cutoffs = utils.compute_equal_width_cutoffs(x_train, 10)
        train_output = [f'X<{train_cutoffs[0]}']
        for i, cut in enumerate(train_cutoffs):
            if i == len(train_cutoffs)-1:
                break
            train_output.append(f'{cut}<=X<{train_cutoffs[i+1]}')
        train_output.append(f'X>{train_cutoffs[-1]}')
        x_train = [[myutils.discretize_ratings_custom(val, train_cutoffs, train_output)] for val in x_train]
        x_test = [[myutils.discretize_ratings_custom(val, train_cutoffs, train_output)] for val in x_test]
        # fit and predict
        classifier.fit(x_train, y_train)
        prediction = classifier.predict(x_test)
        num_true = myevaluation.accuracy_score(y_test, prediction, normalize=False)
        # update counters
        result_set[0] += prediction
        result_set[1] += num_true
    # finalize results
    result_set[1] /= test_length
    classifier_results.append(result_set)

TypeError: unsupported operand type(s) for -: 'list' and 'list'

In [None]:
classifier_names = ['KNN', 'Dummy', 'Naive Bayes', 'Decision Tree']
headers = ['no', 'yes']
myevaluation.print_classifier_results(classifier_names, classifier_results, test_answers, headers)