In [67]:
from math import sqrt
from random import seed
from csv import reader
from random import randrange
from math import pi
from math import exp
from ast import If

In [68]:
def loadCsv(filename):
    bayesdata = list()
    with open(filename, 'r') as file:
        read = reader(file)
        for row in read:
            if not row:
                continue
            bayesdata.append(row)
    return bayesdata

In [69]:
# Splitting Data Using K-Folds for test and train
def crossvsplit(bayesdata, dataFolds):
    bayesdataSplit = list()
    bayesdataCopy = list(bayesdata)
    foldSize = int(len(bayesdata) / dataFolds)
    for _ in range(dataFolds):
        fold = list()
        while len(fold) < foldSize:
            index = randrange(len(bayesdataCopy))
            fold.append(bayesdataCopy.pop(index))
        bayesdataSplit.append(fold)
    return bayesdataSplit

In [70]:
# We are iterating through data and returning value of float
def strFloat(bayesdata, column):
    for row in bayesdata:
        row[column] = float(row[column].strip())


# We are iterating through data and returning value of int
def strInt(bayesdata, column):
    classValues = [row[column] for row in bayesdata]
    unique = set(classValues)
    lookup = dict()
    for i, value in enumerate(unique):
        lookup[value] = i
    for row in bayesdata:
        row[column] = lookup[row[column]]
    return lookup


In [71]:
def evalAlgo(bayesdata, algorithm, dataFolds, *args):
    folds = crossvsplit(bayesdata, dataFolds)
    scores = list()
    for fold in folds:
        trainSet = list(folds)
        trainSet.remove(fold)
        trainSet = sum(trainSet, [])
        testSet = list()
        for row in fold:
            rcp = list(row)
            testSet.append(rcp)
            rcp[-1] = None
        predicted = algorithm(trainSet, testSet, *args)
        actual = [row[-1] for row in fold]
        accuracy = accuracyValue(actual, predicted)
        scores.append(accuracy)
    return scores

In [72]:
def accuracyValue(actual, predicted):
    crct = 0
    for i in range(len(actual)):
        if actual[i] == predicted[i]:
            crct += 1
    return crct / float(len(actual)) * 100.0

def seperateByclass(bayesdata):
    separated = dict()
    for i in range(len(bayesdata)):
        vector = bayesdata[i]
        clsValue = vector[-1]
        if (clsValue not in separated):
            separated[clsValue] = list()
        separated[clsValue].append(vector)
    return separated

In [73]:
def mean(numbers):
    return sum(numbers)/float(len(numbers))


# Calculating Standard Deviation
def stDev(numbers):
    avg = mean(numbers)
    variance = sum([(x-avg)**2 for x in numbers]) / float(len(numbers)-1)
    return sqrt(variance)

In [74]:

def summarizeBayes(bayesdata):
    summaries = [(mean(column), stDev(column), len(column))
                 for column in zip(*bayesdata)]
    del (summaries[-1])
    return summaries


def summarizeClass(bayesdata):
    separated = seperateByclass(bayesdata)
    summaries = dict()
    for clsValue, rows in separated.items():
        summaries[clsValue] = summarizeBayes(rows)
    return summaries

In [75]:
# Calculating Probability Using Gaussian Distribution.
def probaCalc(x, mean, stDev):
    exponent = exp(-((x-mean)**2 / (2 * stDev**2)))
    return (1 / (sqrt(2 * pi) * stDev)) * exponent


def calcClsProb(summaries, row):
    trows = sum([summaries[label][0][2] for label in summaries])
    probaility = dict()
    for clsValue, clsSumm in summaries.items():
        probaility[clsValue] = summaries[clsValue][0][2]/float(trows)
        for i in range(len(clsSumm)):
            mean, stDev, _ = clsSumm[i]
            probaility[clsValue] *= probaCalc(row[i], mean, stDev)
    return probaility

In [76]:
def predict(summaries, row):
    probaility = calcClsProb(summaries, row)
    bstLable, bstProb = None, -1
    for clsValue, probability in probaility.items():
        if bstLable is None or probability > bstProb:
            bstProb = probability
            bstLable = clsValue
    return bstLable

# Naive Bayes Algorithm
def naiveBayes(train, test):
    summarize = summarizeClass(train)
    predictions = list()
    for row in test:
        output = predict(summarize, row)
        predictions.append(output)
    return (predictions)

In [77]:
filename = 'heart.csv'
bayesdata = loadCsv(filename)
for i in range(len(bayesdata[0])-1):
    strFloat(bayesdata, i)
strInt(bayesdata, len(bayesdata[0])-1)
dataFolds = 4
scores = evalAlgo(bayesdata, naiveBayes, dataFolds)
print("_____________________________________________________________________________________________")
print("")
print("The following is the accuracy of the predections on heart dataset")
print('Accuracy: %.2f%%' % (sum(scores)/float(len(scores))))
print("_____________________________________________________________________________________________")

_____________________________________________________________________________________________

The following is the accuracy of the predections on heart dataset
Accuracy: 82.52%
_____________________________________________________________________________________________
