In [6]:
from sklearn.utils import shuffle
from sklearn import neighbors, metrics
from ucimlrepo import fetch_ucirepo
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import Perceptron
from sklearn.naive_bayes import GaussianNB, BernoulliNB
from tabulate import tabulate

import time

In [7]:
bone_marrow_transplant_children = fetch_ucirepo(id=565)

In [8]:
diabetesRepo = fetch_ucirepo(id=296)

  df = pd.read_csv(data_url)


# Prepare datasets

In [9]:
randomState = 22020
labelEncoder = LabelEncoder()
boneData, boneTarget = shuffle(bone_marrow_transplant_children.data.features, bone_marrow_transplant_children.data.targets, random_state=randomState)
diabetesData, diabetesTarget = shuffle(diabetesRepo.data.features, diabetesRepo.data.targets, random_state=randomState)

numericBoneData = boneData.select_dtypes(include='number')
numericBoneTarget = boneTarget.select_dtypes(include='number')

numericDiabetesData = diabetesData.select_dtypes(include='number')
numericDiabetesTarget = labelEncoder.fit_transform(diabetesTarget)

numericBoneData = numericBoneData.fillna(0)
numericBoneTarget = numericBoneTarget.fillna(0)

xBoneTrain, xBoneTest, yBoneTrain, yBoneTest = train_test_split(numericBoneData, numericBoneTarget, test_size=0.33, random_state=(randomState+1))
xDiabetesTrain, xDiabetesTest, yDiabetesTrain, yDiabetesTest = train_test_split(numericDiabetesData, numericDiabetesTarget, test_size=0.33, random_state=(randomState+1))

  y = column_or_1d(y, warn=True)


## Helper functions

In [10]:
columns = ["Model", "Accuracy", "Precision", "Recall", "Training Time", "Testing Time"]
def printTable(cols, data):
    print(tabulate(data, headers=cols, tablefmt="grid"))

def getMeasures(classifier, X_train, X_test, y_train, y_test):
    startTimeTrain = time.time()
    classifier.fit(X_train, y_train)
    endTimeTrain = time.time()
    trainingTime = endTimeTrain - startTimeTrain

    startTimeTest = time.time()
    yTestPredicted = classifier.predict(X_test)
    endTimeTest = time.time()
    testingTime = endTimeTest - startTimeTest

    accuracy = metrics.accuracy_score(y_test, yTestPredicted)
    precision = metrics.precision_score(y_test, yTestPredicted, average="micro")
    recall = metrics.recall_score(y_test, yTestPredicted, average='macro')
    return (accuracy, precision, recall, trainingTime, testingTime)

def collectIterationData(classifierName, classifier, X_train, X_test, y_train, y_test):
    accuracy, precision, recall, trainingTime, testingTime = getMeasures(classifier, X_train, X_test, y_train, y_test)
    return [classifierName, str(accuracy), str(precision), str(recall), str(trainingTime) + "s", str(testingTime) + "s"]

## KNN

In [11]:
noNeighbors = [20, 40, 80]

def calcKNN(X_train, X_test, y_train, y_test):
    data = []
    for neighbor in noNeighbors:
        classifierName = "KNN (" + str(neighbor) + " Neighbors)"
        classifier = neighbors.KNeighborsClassifier(neighbor)

        dataIteration = collectIterationData(classifierName, classifier, X_train, X_test, y_train, y_test)
        data.append(dataIteration)
    return data


## Perceptron

In [12]:
# With the help of https://www.geeksforgeeks.org/sklearn-perceptron/, but barely.

learningRate = 0.1
iterations = 50

def calcPerceptron(X_train, X_test, y_train, y_test):
    data = []

    classifierName = "Perceptron"
    classifier = Perceptron(max_iter=iterations, eta0=learningRate, random_state=randomState)
    dataIteration = collectIterationData(classifierName, classifier, X_train, X_test, y_train, y_test)

    data.append(dataIteration)
    
    return data



## Naive Bayes (Gaussian)

In [13]:
def calcGaussianNaiveBayes(X_train, X_test, y_train, y_test):
    data = []

    classifierName = "Naive Bayes (Gaussian)"
    classifier = GaussianNB()
    dataIteration = collectIterationData(classifierName, classifier, X_train, X_test, y_train, y_test)
    
    data.append(dataIteration)

    return data

## Naive Bayes (Bernoulli)

In [14]:
def calcBernoulliNaiveBayes(X_train, X_test, y_train, y_test):
    data = []

    classifierName = "Naive Bayes (Bernoulli)"
    classifier = BernoulliNB()
    dataIteration = collectIterationData(classifierName, classifier, X_train, X_test, y_train, y_test)
    
    data.append(dataIteration)

    return data

## Bone Marrow dataset

In [15]:
boneMarrowTable = []

boneMarrowTable += calcKNN(xBoneTrain, xBoneTest, yBoneTrain, yBoneTest)
boneMarrowTable += calcPerceptron(xBoneTrain, xBoneTest, yBoneTrain, yBoneTest)
boneMarrowTable += calcGaussianNaiveBayes(xBoneTrain, xBoneTest, yBoneTrain, yBoneTest)
boneMarrowTable += calcBernoulliNaiveBayes(xBoneTrain, xBoneTest, yBoneTrain, yBoneTest)

printTable(columns, boneMarrowTable)

  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  y = column_or_1d(y, warn=True)


+-------------------------+------------+-------------+----------+------------------------+------------------------+
| Model                   |   Accuracy |   Precision |   Recall | Training Time          | Testing Time           |
| KNN (20 Neighbors)      |   0.83871  |    0.83871  | 0.840212 | 0.00527501106262207s   | 0.13753604888916016s   |
+-------------------------+------------+-------------+----------+------------------------+------------------------+
| KNN (40 Neighbors)      |   0.919355 |    0.919355 | 0.920106 | 0.0030858516693115234s | 0.004518985748291016s  |
+-------------------------+------------+-------------+----------+------------------------+------------------------+
| KNN (80 Neighbors)      |   0.677419 |    0.677419 | 0.62963  | 0.0007240772247314453s | 0.020986080169677734s  |
+-------------------------+------------+-------------+----------+------------------------+------------------------+
| Perceptron              |   0.645161 |    0.645161 | 0.592593 | 0.0061

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


## Diabetes dataset

In [16]:
diabetesTable = []

diabetesTable += calcKNN(xDiabetesTrain, xDiabetesTest, yDiabetesTrain, yDiabetesTest)
diabetesTable += calcPerceptron(xDiabetesTrain, xDiabetesTest, yDiabetesTrain, yDiabetesTest)
diabetesTable += calcGaussianNaiveBayes(xDiabetesTrain, xDiabetesTest, yDiabetesTrain, yDiabetesTest)
diabetesTable += calcBernoulliNaiveBayes(xDiabetesTrain, xDiabetesTest, yDiabetesTrain, yDiabetesTest)

printTable(columns, diabetesTable)

+-------------------------+------------+-------------+----------+-----------------------+------------------------+
| Model                   |   Accuracy |   Precision |   Recall | Training Time         | Testing Time           |
| KNN (20 Neighbors)      |   0.550755 |    0.550755 | 0.382688 | 0.03202390670776367s  | 2.3535330295562744s    |
+-------------------------+------------+-------------+----------+-----------------------+------------------------+
| KNN (40 Neighbors)      |   0.558824 |    0.558824 | 0.376338 | 0.024196863174438477s | 2.769627094268799s     |
+-------------------------+------------+-------------+----------+-----------------------+------------------------+
| KNN (80 Neighbors)      |   0.556234 |    0.556234 | 0.367199 | 0.024379968643188477s | 3.5231099128723145s    |
+-------------------------+------------+-------------+----------+-----------------------+------------------------+
| Perceptron              |   0.548164 |    0.548164 | 0.351074 | 0.081253051757