In [31]:
import numpy as np
import math

In [32]:
with open("MicroChip.txt") as f:
    line=f.readlines()
    f.close()
raw_data=[]
for i in line:
    t=i.strip()
    raw_data.append(t)

In [33]:
dataset=[]
for i in raw_data:
    dataset.append(i.split(','))
dataset=np.array(dataset,dtype=np.longdouble)

In [34]:
dataset=dataset.T
dataset.shape

(3, 118)

In [35]:
min(dataset[0]),max(dataset[0])

(-0.83007, 1.0709)

In [36]:
min(dataset[1]),max(dataset[1])

(-0.76974, 1.1089)

In [37]:
def processed(dataset):
    f1=np.array(dataset[0])
    f2=np.array(dataset[1])
    f3 = np.square(f1)
    f4 = np.square(f2)
    f5 = f1*f2
    f1 = (f1-np.mean(f1))/(np.max(f1)-np.min(f1))
    f2 = (f2-np.mean(f2))/(np.max(f2)-np.min(f2))
    f3 = (f3-np.mean(f3))/(np.max(f3)-np.min(f3))
    f4 = (f4-np.mean(f4))/(np.max(f4)-np.min(f4))
    f5 = (f5-np.mean(f5))/(np.max(f5)-np.min(f5))
    pct_data = int(0.7*(len(dataset[0])))
    dataset_train=np.array([f1[:pct_data],f2[:pct_data],f3[:pct_data],f4[:pct_data],f5[:pct_data]])
    y_train = np.array(dataset[2][:pct_data])
    dataset_test=np.array([f1[pct_data:],f2[pct_data:],f3[pct_data:],f4[pct_data:],f5[pct_data:]])
    y_test = np.array(dataset[2][pct_data:])
    return dataset_train.T,y_train.T,dataset_test.T,y_test.T

# Gaussian Discriminant Algorithm
## step1:
### probablity distribution normal distribution
$$ P(X|Y;\mu;\sigma)= \frac{1}{((2\pi)^n\sigma)^\frac{1}{2}} \exp(\frac{-1}{2}(X-\mu)^T \sigma^{-1}(X-\mu)$$
## step2:
### LikelyHood estimate of m samples
$$ L(\mu;\sigma;x^i) = L(\mu;\sigma;x^1)*L(\mu;\sigma;x^2)*L(\mu;\sigma;x^3)*....*L(\mu;\sigma;x^m) $$
## step3:
### Taking Log and partial differentiating with respect to mu,sigma we get values of mu and sigma
$$ \mu = \frac{\sum_{i=1}^{i=m}X^i}{m} $$
$$ \sigma = \frac{\sum_{i=1}{i=m}(x^i-\mu_{y^i})(x^i-\mu_{y^i})^T}{m} $$
## step4:Probability for prediction
$$ P(y^i,\phi) = \phi(y^i)(1-\phi(y))^{1-y^i} $$
## step5:
### using step1 and taking Log and partial differentiating with respect to phi we get values of phi
$$ \phi^- = \frac{\sum_{i=1}^{i=m}(Y^i=1)}{m} $$


In [38]:
def cal_phi_bar(y_train):
    m=len(y_train)
    count=0
    for i in y_train:
        if i==1:
            count+=1
    return count/m

In [39]:
def cal_mu(dataset_train,y_train):
    n=len(dataset_train[0])
    mu0=np.zeros(n)
    mu1=np.zeros(n)
    zero=0
    one=1
    for i in range(len(dataset_train)):
        if(y_train[i]==0):
            for j in range(n):
                mu0[j]+=dataset_train[i][j]
            zero+=1
        else:
            for j in range(n):
                mu1[j]+=dataset_train[i][j]
            one+=1
    mu0=mu0/zero
    mu1=mu1/one
    return mu0,mu1

In [40]:
def cal_sigma(X_train,Y_train):
    m = len(X_train)
    n = len(X_train[0])
    sigma = np.zeros((n, n))
    for i in range(m):
        if Y_train[i] == 1:
            mu = mu1
        else:
            mu = mu0
        mu = mu.reshape((n, 1))
        xi = X_train[i].reshape((n, 1))
        sigma += np.dot(xi-mu, np.transpose(xi-mu))
    return sigma / m

In [41]:
def cal_p_x_given_y(x,mu,sigma):
    n = len(x)
    pi = 3.14
    mu = (np.array(mu)).reshape((n, 1))
    x = (np.array(x)).reshape((n, 1))
    return ((1 / (((2 * pi) ** (n / 2)) * math.sqrt(np.linalg.det(sigma)))) * np.exp(-0.5 * np.dot(np.transpose(x - mu), np.dot(np.linalg.inv(sigma), (x - mu)))))

In [42]:
def cal_py(y,phi):
    if(y==1):
        return phi
    return 1-phi

In [43]:
def accuracy(dataset_test,y_test,mu0,mu1,sigma,phi):
    n=len(y_test)
    correct=0
    for i in range(n):
        p_x_0_y_0 = cal_p_x_given_y(dataset_test[i],mu0,sigma)*cal_py(0,phi)
        p_x_0_y_1 = cal_p_x_given_y(dataset_test[i],mu1,sigma)*cal_py(1,phi)
        if(p_x_0_y_0>=p_x_0_y_1):
            if(y_test[i]==0):
                correct+=1
        else:
            if(y_test[i]==1):
                correct+=1
    return (correct/n)*100 

In [44]:
X=dataset.T
np.random.shuffle(X)
dataset_train,y_train,dataset_test,y_test = processed(X.T)
mu0,mu1=cal_mu(dataset_train,y_train)
sigma=getsigma(dataset_train,y_train)
#dataset_train,dataset_train.shape,y_train,y_train.shape,dataset_test,dataset_test.shape,y_test,y_test.shape,mu0,mu0.shape,mu1,mu1.shape,sigma,sigma.shape

In [45]:
phi  = cal_phi_bar(y_train)
print("Accuracy is: ",accuracy(dataset_test,y_test,mu0,mu1,sigma,phi))

Accuracy is:  88.88888888888889
