In [48]:
"""
MA23M026
 VINOD KUMAR
 Data Analysis and Visualization
"""

import numpy as np

# Function to generate artificial data
def getArtificialData(mean, cov, nx, nt):
    # Generate nx data points from a multivariate normal distribution with given mean and covariance
    x = np.random.multivariate_normal(mean, cov, nx)
    # Generate nt test points from a multivariate normal distribution with given mean and covariance
    t = np.random.multivariate_normal(mean, cov, nt)
    # Calculate the mean of the data points
    mu = np.mean(x, axis=0)
    # Calculate the covariance matrix of the data points
    Sgm = np.cov(x, rowvar=False)
    return x, t, mu, Sgm

# Function to evaluate the multivariate Gaussian distribution
def evaluateMultiVarGauss(t, mu, Sgm):
    D = len(mu)  # Dimensionality of the data
    detSgm = np.linalg.det(Sgm)  # Determinant of the covariance matrix
    invSgm = np.linalg.inv(Sgm)  # Inverse of the covariance matrix
    x_minus_mu = t - mu  # Difference between each test point and the mean
    exponent = -0.5 * np.sum(np.dot(x_minus_mu, invSgm) * x_minus_mu, axis=1)  # Exponential term of the Gaussian function
    coef = 1 / np.sqrt((2*np.pi)**D * detSgm)  # Coefficient of the Gaussian function
    return coef * np.exp(exponent)  # Evaluate the Gaussian function for each test point

# Function to calculate and print test set results
def calculateTestSet(t, pXY, yEx):

    num_points = len(t)  # Number of test points
    misclassified_points = 0  # Counter for misclassified points

    for i in range(num_points):
        predicted_class = np.argmax(pXY[i])  # Predicted class for the test point
        actual_class = yEx[i]  # Actual class of the test point

        print(f"Test Point {i + 1}:")
        print(f"Probabilities: {pXY[i]}")
        print(f"Actual Label: {actual_class}, Computed Label: {predicted_class}")
        print("-------------------------")

        if predicted_class != actual_class:
            misclassified_points += 1  # Increment misclassified points counter

    # Print total number of misclassified points
    print(f"Number of Misclassified Points: {misclassified_points}")

"""
MA23M026
 VINOD KUMAR
 Data Analysis and Visualization
"""

# Generate artificial data
mean1 = [0, 0]
cov1 = [[2, 1], [1, 50]]
mean2 = [7, 5]
cov2 = [[3, 1], [1, 3]]
mean3 = [-5, 5]
cov3 = [[5, 2], [2, 3]]
nx1 = 40
nt1 = 10
nx2 = 80
nt2 = 20
nx3 = 20
nt3 = 5

# Get artificial data for each class
x1, t1, mu1, Sgm1 = getArtificialData(mean1, cov1, nx1, nt1)
x2, t2, mu2, Sgm2 = getArtificialData(mean2, cov2, nx2, nt2)
x3, t3, mu3, Sgm3 = getArtificialData(mean3, cov3, nx3, nt3)

# Merge test points from all classes
t = np.concatenate((t1, t2, t3))

# Evaluate Gaussian distribution on test points for each class
pXY1 = evaluateMultiVarGauss(t, mu1, Sgm1)
pXY2 = evaluateMultiVarGauss(t, mu2, Sgm2)
pXY3 = evaluateMultiVarGauss(t, mu3, Sgm3)

# Prior probabilities for each class
prior1 = nx1 / (nx1 + nx2 + nx3)
prior2 = nx2 / (nx1 + nx2 + nx3)
prior3 = nx3 / (nx1 + nx2 + nx3)

# Posterior probabilities using Bayes' rule
posterior1 = pXY1 * prior1
posterior2 = pXY2 * prior2
posterior3 = pXY3 * prior3

# Combine posterior probabilities
pXY = np.column_stack((posterior1, posterior2, posterior3))

# Exact labels of the test points
yEx = np.concatenate((np.zeros(len(t1)), np.ones(len(t2)), np.full(len(t3), 2)))

# Calculate and print test set results
calculateTestSet(t, pXY, yEx)

Test Point 1:
Probabilities: [2.35789210e-03 7.94353557e-11 3.23128250e-15]
Actual Label: 0.0, Computed Label: 0
-------------------------
Test Point 2:
Probabilities: [2.27757321e-03 2.44055158e-18 2.79258244e-30]
Actual Label: 0.0, Computed Label: 0
-------------------------
Test Point 3:
Probabilities: [9.40544539e-06 6.84421132e-20 2.22507460e-14]
Actual Label: 0.0, Computed Label: 0
-------------------------
Test Point 4:
Probabilities: [1.23315374e-03 3.79020209e-09 9.15399117e-04]
Actual Label: 0.0, Computed Label: 0
-------------------------
Test Point 5:
Probabilities: [2.14798837e-03 4.44270558e-07 9.43165108e-05]
Actual Label: 0.0, Computed Label: 0
-------------------------
Test Point 6:
Probabilities: [4.67312979e-03 7.22330355e-09 1.17999211e-15]
Actual Label: 0.0, Computed Label: 0
-------------------------
Test Point 7:
Probabilities: [4.94783349e-04 6.66698231e-11 1.30336318e-11]
Actual Label: 0.0, Computed Label: 0
-------------------------
Test Point 8:
Probabilities

In [None]:
"""
MA23M026
 VINOD KUMAR
 Data Analysis and Visualization
"""
# Merge test points from all classes into a single array
t = np.concatenate((t1, t2, t3))

# Calculate the total number of data points
n = x1.shape[0] + x2.shape[0] + x3.shape[0]

# Calculate the tied covariance matrix using the weighted sum of individual class covariances
tied_cov = (Sgm1 * x1.shape[0] + Sgm2 * x2.shape[0] + Sgm3 * x3.shape[0]) / n

# Evaluate Gaussian distribution on test points for all classes using tied covariance
pXY1 = evaluateMultiVarGauss(t, mu1, tied_cov)
pXY2 = evaluateMultiVarGauss(t, mu2, tied_cov)
pXY3 = evaluateMultiVarGauss(t, mu3, tied_cov)

"""
MA23M026
 VINOD KUMAR
 Data Analysis and Visualization
"""

# Calculate prior probabilities for each class
prior1 = nx1 / (nx1 + nx2 + nx3)
prior2 = nx2 / (nx1 + nx2 + nx3)
prior3 = nx3 / (nx1 + nx2 + nx3)

# Calculate posterior probabilities using Bayes' rule
posterior1 = pXY1 * prior1
posterior2 = pXY2 * prior2
posterior3 = pXY3 * prior3

# Combine posterior probabilities into a single array
pXY = np.column_stack((posterior1, posterior2, posterior3))

# Exact labels of the test points
yEx = np.concatenate((np.zeros(len(t1)), np.ones(len(t2)), np.full(len(t3), 2)))

# Calculate and print test set results
calculateTestSet(t, pXY, yEx)

Test Point 1:
Probabilities: [6.07760338e-03 4.56893815e-06 2.31996788e-06]
Actual Label: 0.0, Computed Label: 0
-------------------------
Test Point 2:
Probabilities: [2.21116519e-03 3.16042366e-06 7.46724134e-08]
Actual Label: 0.0, Computed Label: 0
-------------------------
Test Point 3:
Probabilities: [5.45722802e-05 2.46520290e-07 1.77100788e-10]
Actual Label: 0.0, Computed Label: 0
-------------------------
Test Point 4:
Probabilities: [5.16877328e-05 3.97504716e-08 7.02365342e-10]
Actual Label: 0.0, Computed Label: 0
-------------------------
Test Point 5:
Probabilities: [2.13384236e-03 6.68832794e-10 3.31709588e-04]
Actual Label: 0.0, Computed Label: 0
-------------------------
Test Point 6:
Probabilities: [4.39362708e-03 1.59720477e-08 7.14937961e-05]
Actual Label: 0.0, Computed Label: 0
-------------------------
Test Point 7:
Probabilities: [5.67554368e-04 1.09186395e-09 3.55184017e-06]
Actual Label: 0.0, Computed Label: 0
-------------------------
Test Point 8:
Probabilities

In [None]:
"""
MA23M026
 VINOD KUMAR
 Data Analysis and Visualization
"""

from sklearn.datasets import load_iris

# Function to extract Iris data
def getIrisData():
    # Load Iris dataset
    iris = load_iris()
    data = iris.data  # Extracting petal/sepal dimensions
    target = iris.target  # Extracting corresponding labels

    # Extracting first 40 entries in each class as data points
    data_points = [data[target == i][:40] for i in range(3)]

    # Extracting next 10 entries in each class as test points
    test_points = [data[target == i][40:50] for i in range(3)]

    # Combine data points and test points
    data = np.concatenate(data_points)
    test = np.concatenate(test_points)

    # Generate labels for data points and test points
    labels_data = np.repeat(np.arange(3), 40)
    labels_test = np.repeat(np.arange(3), 10)

    return data, test, labels_data, labels_test

"""
MA23M026
 VINOD KUMAR
 Data Analysis and Visualization
"""

# Get Iris data
data, test, labels_data, labels_test = getIrisData()

# Calculate mu and Sgm using functions
mu = [np.mean(data[labels_data == i], axis=0) for i in range(3)]
Sgm = [np.cov(data[labels_data == i], rowvar=False) for i in range(3)]

# Evaluate Gaussian distribution on test points for all classes
pXY1 = evaluateMultiVarGauss(test, mu[0], Sgm[0])
pXY2 = evaluateMultiVarGauss(test, mu[1], Sgm[1])
pXY3 = evaluateMultiVarGauss(test, mu[2], Sgm[2])

# Prior probabilities
prior1 = 40 / 150
prior2 = 40 / 150
prior3 = 40 / 150

# Posterior probabilities using Bayes' rule
posterior1 = pXY1 * prior1
posterior2 = pXY2 * prior2
posterior3 = pXY3 * prior3

# Combine posterior probabilities
pXY = np.column_stack((posterior1, posterior2, posterior3))

# Exact labels of the test points
yEx = labels_test

# Calculate and print test set results
calculateTestSet(test, pXY, yEx)

Test Point 1:
Probabilities: [2.08382718e+00 1.57040616e-23 2.49479351e-36]
Actual Label: 0, Computed Label: 0
-------------------------
Test Point 2:
Probabilities: [7.81671480e-04 4.62752318e-13 1.27828757e-26]
Actual Label: 0, Computed Label: 0
-------------------------
Test Point 3:
Probabilities: [6.54984921e-01 4.55106655e-20 5.15911159e-30]
Actual Label: 0, Computed Label: 0
-------------------------
Test Point 4:
Probabilities: [1.28313230e-03 9.56055993e-19 1.77294435e-29]
Actual Label: 0, Computed Label: 0
-------------------------
Test Point 5:
Probabilities: [2.63325416e-02 1.68497892e-21 2.23938057e-30]
Actual Label: 0, Computed Label: 0
-------------------------
Test Point 6:
Probabilities: [9.21799637e-01 5.37135200e-17 6.97153170e-30]
Actual Label: 0, Computed Label: 0
-------------------------
Test Point 7:
Probabilities: [9.41173810e-01 1.22041975e-26 2.28065154e-36]
Actual Label: 0, Computed Label: 0
-------------------------
Test Point 8:
Probabilities: [2.47990561e