# Use  Bayes theorem to compute the performance of vowel classification

In [7]:
from mat4py import loadmat
import numpy as np
data = loadmat('data.mat')
vowel_map={0:'a',1:'e',2:'i',3:'u'}
whole_set=[]
labels=[]
mus=[]
cs=[]
prior_prob=[]
for i in range(4):
    l=len(data['vowels'][i]['training'])
    prior_prob.append(l)
    labels.extend([vowel_map[i]]*l)
    for j in range(l):
        l=list(data['vowels'][i]['training'][j])
        whole_set.append(l)
    x=np.array(data['vowels'][i]['training'])[:,0]
    y=np.array(data['vowels'][i]['training'])[:,1]
    mu=np.array(data['vowels'][i]['training']).mean(axis=0)
    covariance= np.cov(x,y)
    mus.append(mu)
    cs.append(covariance)

In [8]:
total=sum(prior_prob)
for i in range(4):
    prior_prob[i]=prior_prob[i]/total
    

In [9]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits import mplot3d
from sklearn import linear_model

%matplotlib inline
plt.style.use('seaborn-white')

In [10]:
def multivariate_gaussian_pdf(X,MU,SIGMA):
    '''Returns the pdf of a nultivariate gaussian distribution
     - X, MU are p x 1 vectors
     - SIGMA is a p x p matrix'''
    #Initialize and reshape
    X = X.reshape(-1,1)
    MU = MU.reshape(-1,1)
    p,_ = SIGMA.shape

    #Compute values
    SIGMA_inv = np.linalg.inv(SIGMA)
    denominator = np.sqrt((2 * np.pi)**p * np.linalg.det(SIGMA))
    exponent = -(1/2) * ((X - MU).T @ SIGMA_inv @ (X - MU))
    
    #Return result
    return float((1. / denominator) * np.exp(exponent) )   

### Split 1:Train 67%, Test 33%

In [11]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(whole_set, labels, train_size=0.67)
X_test=np.array(X_test)

#### Assuming prior prabability same for all class i.e., 1/4

In [12]:
y_pred=[]
for x in X_test:
    values=[]
    for j in range(4):
        v=multivariate_gaussian_pdf(x,mus[j],cs[j])
        values.append(v)
        if j ==3:
            values=np.array(values)
            idx = np.argmax(values)
            y_pred.append(vowel_map[idx])

## Evaluating the model

#### Accuracy

In [13]:
import sklearn.metrics
acc = sklearn.metrics.accuracy_score(y_test, y_pred)
print(acc)

0.9786363636363636


#### Report-Precision, recall, f1 score

In [14]:
vowel_list=list(vowel_map.values())
cr=sklearn.metrics.classification_report(y_test, y_pred, target_names=vowel_list)
print(cr)


              precision    recall  f1-score   support

           a       0.94      0.95      0.95       978
           e       1.00      0.99      0.99      2613
           i       0.95      0.97      0.96      1002
           u       0.99      0.98      0.99      2007

    accuracy                           0.98      6600
   macro avg       0.97      0.97      0.97      6600
weighted avg       0.98      0.98      0.98      6600



#### Assuming prior prabability of kth class= No of samples in class k/Total no of of samples

In [15]:
y_pred=[]
for x in X_test:
    values=[]
    for j in range(4):
        v=multivariate_gaussian_pdf(x,mus[j],cs[j])
        values.append(v*prior_prob[j])
        if j ==3:
            values=np.array(values)
            idx = np.argmax(values)
            y_pred.append(vowel_map[idx])

## Evaluating the model

#### Accuracy

In [16]:
acc = sklearn.metrics.accuracy_score(y_test, y_pred)
print(acc)

0.9807575757575757


#### Report-Precision, recall, f1 score

In [17]:
vowel_list=list(vowel_map.values())
cr=sklearn.metrics.classification_report(y_test, y_pred, target_names=vowel_list)
print(cr)

              precision    recall  f1-score   support

           a       0.95      0.95      0.95       978
           e       1.00      0.99      0.99      2613
           i       0.96      0.96      0.96      1002
           u       0.99      0.99      0.99      2007

    accuracy                           0.98      6600
   macro avg       0.97      0.97      0.97      6600
weighted avg       0.98      0.98      0.98      6600



### Split 2:Train 80%, Test 20%

In [18]:
X_train, X_test, y_train, y_test = train_test_split(whole_set, labels, train_size=0.8)
X_test=np.array(X_test)

#### Assuming prior prabability same for all class i.e., 1/4

In [19]:
y_pred=[]
for x in X_test:
    values=[]
    for j in range(4):
        v=multivariate_gaussian_pdf(x,mus[j],cs[j])
        values.append(v)
        if j ==3:
            values=np.array(values)
            idx = np.argmax(values)
            y_pred.append(vowel_map[idx])

## Evaluating the model

#### Accuracy

In [20]:
acc = sklearn.metrics.accuracy_score(y_test, y_pred)
print(acc)

0.9805


#### Report-Precision, recall, f1 score

In [21]:
cr=sklearn.metrics.classification_report(y_test, y_pred, target_names=vowel_list)
print(cr)

              precision    recall  f1-score   support

           a       0.95      0.95      0.95       614
           e       1.00      0.99      1.00      1609
           i       0.94      0.98      0.96       562
           u       0.99      0.98      0.99      1215

    accuracy                           0.98      4000
   macro avg       0.97      0.98      0.97      4000
weighted avg       0.98      0.98      0.98      4000



#### Assuming prior prabability of kth class= No of samples in class k/Total no of of samples

In [22]:
y_pred=[]
for x in X_test:
    values=[]
    for j in range(4):
        v=multivariate_gaussian_pdf(x,mus[j],cs[j])
        values.append(v*prior_prob[j])
        if j ==3:
            values=np.array(values)
            idx = np.argmax(values)
            y_pred.append(vowel_map[idx])

### Evaluating the model

#### Accuracy

In [23]:
acc = sklearn.metrics.accuracy_score(y_test, y_pred)
print(acc)

0.9795


#### Report-Precision, recall, f1 score

In [24]:
vowel_list=list(vowel_map.values())
cr=sklearn.metrics.classification_report(y_test, y_pred, target_names=vowel_list)
print(cr)

              precision    recall  f1-score   support

           a       0.96      0.94      0.95       614
           e       1.00      0.99      0.99      1609
           i       0.95      0.96      0.96       562
           u       0.98      0.99      0.98      1215

    accuracy                           0.98      4000
   macro avg       0.97      0.97      0.97      4000
weighted avg       0.98      0.98      0.98      4000

