In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
import sklearn

In [2]:
data = pd.read_csv('health_data.csv')
data.head()

Unnamed: 0,age,restbps,chol,category
0,26,109,243,0
1,27,106,156,0
2,28,107,225,0
3,27,105,277,0
4,30,96,221,0


In [3]:
X = data.to_numpy()[:, :3]
Y = data.to_numpy()[:, 3]
X_00 = data[data['category'] == 0].to_numpy()[:, :3]
X_11 = data[data['category'] == 1].to_numpy()[:, :3]

In [4]:
X_0 = np.array(X_00[:280, :3])
X_1 = np.array(X_11[:210, :3])

In [5]:
#mean function
def mean(data):
    return sum(data)/float(len(data))

In [6]:
#variance function
def std(data):
    mu  = mean(data)
    return math.sqrt(sum([(x - mu)**2 for x in data])/float(len(data) - 1))

In [7]:
#stat of class 0(Maximum Likelihood Estimate)
stat_X0 = [(mean(col),  std(col)) for col in zip(*X_0)]
stat_X0

[(27.335714285714285, 5.325411743767366),
 (107.38928571428572, 6.372141387939814),
 (230.425, 31.591729385891465)]

In [8]:
#stat of class 1(Maximum Likelihood Estimate)
stat_X1 = [(mean(col),  std(col)) for col in zip(*X_1)]
stat_X1

[(59.11904761904762, 7.417818908961221),
 (145.0857142857143, 10.359669948848621),
 (319.04761904761904, 46.57953653664896)]

In [9]:
stat = [stat_X0, stat_X1]

In [10]:
#Probability calculation
def Normal_dist(x, mean, std):
    return (1 / (math.sqrt(2 * math.pi) * std)) *math.exp(-((x-mean)**2 / (2 * std**2 )))

In [11]:
#prior calculation
P = [float(len(X_0))/ float(len(data)) ,float(len(X_1))/ float(len(X)) ]
P

[0.4, 0.3]

In [12]:
#calculation of class conditional density
def class_conditional_probability(data, stat):
    probabilities = np.ones(2, dtype = np.float64)
    for i in range(0,2):
        for j in range(len(stat[i])):
            mean, stdev = stat[i][j]
            #print( Normal_dist(data[i], mean, stdev))
            probabilities[i] *= Normal_dist(data[j], mean, stdev)
    return probabilities

In [13]:
#calculation of posterior
def posterior(data, stat, P):
    prob = np.zeros(2, dtype = np.float64)
    probabilities = class_conditional_probability(data, stat)
    for i in range(2):
        prob[i] = probabilities[i]*P[i]
    return prob

In [14]:
def prediction(data, stat,P, Th):
    Probabilities = posterior(data, stat, P)
    Probabilities = Probabilities/sum(Probabilities)
    if Probabilities[1] > Th:
        return 1
    else:
        return 0

In [15]:
X_test_0 = np.array(X_00[280 :, : 3])
X_test_1 = np.array(X_11[210:,:3])

In [24]:
#computing accuracy on training data.
C1 = np.zeros((2,2)) #[[]]
for i in range((X_0.shape[0])):
    y_pred = prediction(X_0[i], stat, P, 0.5)
    C1[y_pred, 0] += 1
for i in range((X_1.shape[0])):
    y_pred = prediction(X_1[i], stat, P, 0.5)
    C1[y_pred, 1] += 1

In [25]:
acc1 = (C1[0,0] + C1[1,1])/np.sum(C1)
print(acc1)

1.0


In [19]:
#computing accuracy on test data.
C = np.zeros((2,2)) #[[]]
for i in range((X_test_0.shape[0])):
    y_pred = prediction(X_test_0[i], stat, P, 0.5)
    C[y_pred, 0] += 1
for i in range((X_test_1.shape[0])):
    y_pred = prediction(X_test_1[i], stat, P, 0.5)
    C[y_pred, 1] += 1

In [20]:
C

array([[36., 16.],
       [84., 74.]])

In [21]:
acc = (C[0,0] + C[1,1])/np.sum(C)
print(acc)

0.5238095238095238
