In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
import sklearn

In [2]:
data = pd.read_csv('health_data.csv')
data.head()

Unnamed: 0,age,restbps,chol,category
0,26,109,243,0
1,27,106,156,0
2,28,107,225,0
3,27,105,277,0
4,30,96,221,0


In [3]:
X = data.to_numpy()[:, :3]
Y = data.to_numpy()[:, 3]
X_00 = data[data['category'] == 0].to_numpy()[:, :3]
X_11 = data[data['category'] == 1].to_numpy()[:, :3]

In [4]:
X_0 = np.array(X_00[:280, :3])
X_1 = np.array(X_11[:210, :3])

In [5]:
#mean function
def mean(data):
    return sum(data)/float(len(data))

In [6]:
#variance function
def std(data):
    mu  = mean(data)
    return math.sqrt(sum([(x - mu)**2 for x in data])/float(len(data) - 1))

In [7]:
#stat of class 0(Maximum Likelihood Estimate)
stat_X0 = [(mean(col),  std(col)) for col in zip(*X_0)]
stat_X0

[(27.335714285714285, 5.325411743767366),
 (107.38928571428572, 6.372141387939814),
 (230.425, 31.591729385891465)]

In [8]:
#stat of class 1(Maximum Likelihood Estimate)
stat_X1 = [(mean(col),  std(col)) for col in zip(*X_1)]
stat_X1

[(59.11904761904762, 7.417818908961221),
 (145.0857142857143, 10.359669948848621),
 (319.04761904761904, 46.57953653664896)]

In [9]:
stat = [stat_X0, stat_X1]

In [10]:
#Probability calculation
def Normal_dist(x, mean, std):
    return (1 / (math.sqrt(2 * math.pi) * std)) *math.exp(-((x-mean)**2 / (2 * std**2 )))

In [11]:
#prior calculation
P = [float(len(X_0))/ float(len(X_0) + len(X_1)) ,float(len(X_1))/ float(len(X_0) + len(X_1)) ]
P

[0.5714285714285714, 0.42857142857142855]

In [12]:
#calculation of class conditional density
def class_conditional_probability(data, stat):
    probabilities = np.ones(2, dtype = np.float64)
    for i in range(0,2):
        for j in range(len(stat[i])):
            mean, stdev = stat[i][j]
            #print( Normal_dist(data[i], mean, stdev))
            probabilities[i] *= Normal_dist(data[j], mean, stdev)
    return probabilities

In [29]:
#calculation of posterior
def posterior(data, stat, P):
    prob = np.zeros(2, dtype = np.float64)
    probabilities = class_conditional_probability(data, stat)
    print(probabilities)
    for i in range(2):
        prob[i] = probabilities[i]*P[i]
    return prob

In [33]:
def prediction(data, stat,P, Th):
    Probabilities = posterior(data, stat, P)
    Probabilities = Probabilities/sum(Probabilities)
    #print(Probabilities)
    if Probabilities[1] > Th:
        return 1
    else:
        return 0

In [34]:
X_test_0 = np.array(X_00[280 :, : 3])
X_test_1 = np.array(X_11[210:,:3])

In [42]:
#computing accuracy on training data.
C1 = np.zeros((2,2)) #[[]]
for i in range((X_0.shape[0])):
    y_pred = prediction(X_0[i], stat, P, 0.7)
    #print(y_pred)
    C1[y_pred, 0] += 1
for i in range((X_1.shape[0])):
    y_pred = prediction(X_1[i], stat, P, 0.7)
    #print(y_pred)
    C1[y_pred, 1] += 1

[5.13544024e-05 5.08969525e-13]
[3.59891687e-06 2.66679600e-15]
[5.77997091e-05 4.04705718e-13]
[1.85847619e-05 5.61739031e-13]
[1.01188707e-05 1.16295153e-14]
[4.35680172e-05 3.38744960e-14]
[3.71461497e-06 1.99353539e-16]
[2.88311560e-05 1.83744163e-14]
[9.81022399e-06 1.74455372e-15]
[2.96542843e-06 1.01150830e-17]
[8.60667321e-06 2.70331111e-16]
[2.75985886e-05 1.87525797e-13]
[5.87785432e-05 3.83151953e-13]
[5.59751993e-05 1.17720265e-13]
[4.6888438e-05 6.4498084e-14]
[2.86529873e-05 3.24078511e-14]
[2.84061481e-05 1.23646964e-13]
[5.42569795e-06 6.24498735e-17]
[6.32449038e-06 2.33923943e-16]
[1.07407794e-05 5.08789548e-11]
[2.14147928e-05 2.37521683e-15]
[4.61784100e-06 8.28867664e-12]
[5.77113516e-05 2.04542281e-13]
[3.95811220e-05 9.21318015e-15]
[1.7845900e-05 2.3376766e-14]
[5.33764822e-06 4.94108662e-12]
[3.12239934e-05 3.59457657e-14]
[1.57084980e-05 7.86508891e-15]
[4.14520184e-05 1.43200559e-14]
[3.29260664e-05 3.65078330e-14]
[1.48517273e-05 1.23326525e-15]
[4.59860944e

In [43]:
C1

array([[280.,   0.],
       [  0., 210.]])

In [44]:
acc1 = (C1[0,0] + C1[1,1])/np.sum(C1)
print(acc1)

1.0


In [38]:
#computing accuracy on test data.
C = np.zeros((2,2)) #[[]]
for i in range((X_test_0.shape[0])):
    y_pred = prediction(X_test_0[i], stat, P, 0.6)
    C[y_pred, 0] += 1
for i in range((X_test_1.shape[0])):
    y_pred = prediction(X_test_1[i], stat, P, 0.6)
    C[y_pred, 1] += 1

[4.01360368e-06 5.60526593e-11]
[6.13415800e-06 6.09853156e-10]
[5.72199046e-07 3.65131288e-09]
[1.05880505e-05 2.23676351e-10]
[6.93063391e-07 1.47353348e-09]
[2.55180764e-05 3.79546351e-11]
[3.64380537e-06 1.16146279e-11]
[7.42790045e-07 3.54706021e-09]
[8.27919287e-06 2.40998150e-10]
[3.37067562e-06 2.60940421e-10]
[1.72710348e-05 4.68092620e-12]
[9.32994874e-06 8.34895109e-11]
[1.51651747e-05 1.29657585e-10]
[2.30766410e-06 2.34593086e-09]
[1.42000728e-05 5.55551353e-11]
[7.46505805e-06 3.92501328e-10]
[5.79296447e-06 3.21449843e-11]
[1.40276859e-05 3.08450180e-11]
[1.75552078e-05 2.70183286e-11]
[1.77203134e-05 7.86011078e-11]
[2.65694470e-14 1.93770883e-06]
[1.79425931e-15 3.96612687e-07]
[2.65967145e-11 5.74091439e-07]
[5.81777683e-14 2.97751963e-06]
[5.83602665e-09 1.60280930e-07]
[1.44569317e-10 8.89319088e-08]
[1.90484013e-10 6.11352158e-07]
[8.67248744e-11 1.49638237e-08]
[1.47814784e-13 2.41649046e-06]
[1.81195754e-14 2.45468361e-06]
[7.98252885e-09 8.50757784e-08]
[8.97000

In [39]:
C

array([[38., 18.],
       [82., 72.]])

In [None]:
acc = (C[0,0] + C[1,1])/np.sum(C)
print(acc)

In [None]:
pos_predictivity = (C[0,0])/(C[0,0] + C[0,1])
pos_predictivity

In [None]:
neg_predictivity = (C[1,1])/(C[1,0] + C[1,1])
neg_predictivity

In [None]:
Recall = (C[0,0])/(C[0,0] + C[1,0])
Recall

In [None]:
Specificity = (C[1,1])/(C[0,1] + C[1,1])
Specificity