In [2]:
# Author: 
# Date:
# Project: 
# Acknowledgements: 
#
from tools import load_iris, split_train_test

import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import norm
from scipy.stats import multivariate_normal

def gen_data(
    n: int,
    locs: np.ndarray,
    scales: np.ndarray
) -> np.ndarray:
    '''
    Return n data points, their classes and a unique list of all classes, from each normal distributions
    shifted and scaled by the values in locs and scales
    '''
    data = norm.rvs(locs,scales,size=(n,len(locs)))
    labels = []
    classes = []
    for i in range(len(locs)):
        classes.append(i)
        for j in range(len(data)):
            labels.append(i)


    return np.array(data),np.array(labels),np.array(classes)


def mean_of_class(
    features: np.ndarray,
    targets: np.ndarray,
    selected_class: int
) -> np.ndarray:
    '''
    Estimate the mean of a selected class given all features
    and targets in a dataset
    '''
    selected_features = features[targets==selected_class] 
    mu = np.mean(selected_features,0)
    return mu


def covar_of_class(
    features: np.ndarray,
    targets: np.ndarray,
    selected_class: int
) -> np.ndarray:
    '''
    Estimate the covariance of a selected class given all
    features and targets in a dataset
    '''   
    selected_features = features[targets==selected_class]
    cov = np.cov(selected_features,rowvar=False)
    return cov


def likelihood_of_class(
    feature: np.ndarray,
    class_mean: np.ndarray,
    class_covar: np.ndarray
) -> float:
    '''
    Estimate the likelihood that a sample is drawn
    from a multivariate normal distribution, given the mean
    and covariance of the distribution.
    '''

    p = multivariate_normal(feature,class_mean,class_covar) # assuming we have a k-dimensional data
    return p


np.random.seed(1234)


features,targets,classes = gen_data(50, [-1, 1], [np.sqrt(5), np.sqrt(5)])
(train_features, train_targets), (test_features, test_targets)\
    = split_train_test(features, targets, train_ratio=0.8)
# print(mean_of_class(train_features,test_features,0))

# print(covar_of_class(train_features,test_features,0))
class_mean = mean_of_class(train_features, train_targets, 0)
class_cov = covar_of_class(train_features, train_targets, 0)
print(class_mean)
print(class_cov)
likelihood_of_class(train_features[0:3],class_mean,class_cov)



[-0.49849907  0.54109015]
[[ 3.12693622 -0.62851986]
 [-0.62851986  6.9706728 ]]


ValueError: Array 'mean' must be a vector of length 6.

In [10]:
# Author: 
# Date:
# Project: 
# Acknowledgements: 
#
from tools import load_iris, split_train_test

import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import norm

def gen_data(
    n: int,
    locs: np.ndarray,
    scales: np.ndarray
) -> np.ndarray:
    '''
    Return n data points, their classes and a unique list of all classes, from each normal distributions
    shifted and scaled by the values in locs and scales
    '''
    data = norm.rvs(locs,scales,size=(n,len(locs)))
    labels = []
    classes = []
    for i in range(len(locs)):
        classes.append(i)
        for j in range(len(data)):
            labels.append(i)


    return np.array(data),np.array(labels),np.array(classes)


def mean_of_class(
    features: np.ndarray,
    targets: np.ndarray,
    selected_class: int
) -> np.ndarray:
    '''
    Estimate the mean of a selected class given all features
    and targets in a dataset
    '''
    selected_features = features[targets==selected_class] 
    mu = np.mean(selected_features,0)
    return mu


def covar_of_class(
    features: np.ndarray,
    targets: np.ndarray,
    selected_class: int
) -> np.ndarray:
    '''
    Estimate the covariance of a selected class given all
    features and targets in a dataset
    '''   
    selected_features = features[targets==selected_class]
    cov = np.cov(selected_features,rowvar=False)
    return cov


def likelihood_of_class(
    feature: np.ndarray,
    class_mean: np.ndarray,
    class_covar: np.ndarray
) -> np.ndarray:
    '''
    Estimate the likelihood that a sample is drawn
    from a multivariate normal distribution, given the mean
    and covariance of the distribution.
    '''

    p = norm.pdf(feature,class_mean,class_covar) # assuming we have a 1-dimensional input
    return p


def maximum_likelihood(
    train_features: np.ndarray,
    train_targets: np.ndarray,
    test_features: np.ndarray,
    classes: list
) -> np.ndarray:
    '''
    Calculate the maximum likelihood for each test point in
    test_features by first estimating the mean and covariance
    of all classes over the training set.

    You should return
    a [test_features.shape[0] x len(classes)] shaped numpy
    array
    '''
    means, covs = [], []
    for class_label in classes:
        means.append(mean_of_class(train_features,train_targets,class_label))
        covs.append(covar_of_class(train_features,train_targets,class_label))

    print(means)
    print(covs)
    likelihoods = []
    for i in range(test_features.shape[0]):
        likelihoods.append(likelihood_of_class(train_features[i],means,covs))
    return np.array(likelihoods)


features,targets,classes = gen_data(50, [-1,1], [np.sqrt(5),np.sqrt(5)])
(train_features, train_targets), (test_features, test_targets)\
    = split_train_test(features, targets, train_ratio=0.8)
class_mean = mean_of_class(train_features, train_targets, 1)
class_cov = covar_of_class(train_features, train_targets, [0,1])
class_cov
# print(classes)
# maximum_likelihood(train_features,train_targets,test_features,classes)


ValueError: operands could not be broadcast together with shapes (40,) (2,) 

In [19]:
y_c = [0, 1, 2, 1]
y_c_hat = [0, 1, 1, 1]

c = 0
class_counts = {}
match_counts = {}
class_indices = {}

for i in range(len(y_c)):
    _class = y_c[i]

    # Increment the overall match count
    if _class == y_c_hat[i]:
        c += 1
    
    # Track the count of each class
    if _class in class_counts:
        class_counts[_class] += 1
    else:
        class_counts[_class] = 1

    # Track the indices of each class
    if _class in class_indices:
        class_indices[_class].append(i)
    else:
        class_indices[_class] = [i]

    # Track the count of matches for each class
    if _class in match_counts:
        if _class == y_c_hat[i]:
            match_counts[_class] += 1
    else:
        match_counts[_class] = 1 if _class == y_c_hat[i] else 0

print("Overall match count (c):", c)
print("Class counts:", class_counts)
print("Class indices:", class_indices)
print("Match counts per class:", match_counts)


Overall match count (c): 3
Class counts: {0: 1, 1: 2, 2: 1}
Class indices: {0: [0], 1: [1, 3], 2: [2]}
Match counts per class: {0: 1, 1: 2, 2: 0}


In [21]:
y_c = [0, 1, 2, 1]
y_c_hat = [0, 1, 1, 1]

# Initialize variables to track classes, counts, and matches
class_counts = []
match_counts = []

# Iterate through the classes in y_c
for i in range(len(y_c)):
    _class = y_c[i]

    # Ensure there are enough entries in class_counts and match_counts
    while len(class_counts) <= _class:
        class_counts.append(0)
        match_counts.append(0)

    # Track the count of each class
    class_counts[_class] += 1

    # Track the count of matches for each class
    if _class == y_c_hat[i]:
        match_counts[_class] += 1

class_counts




[1, 2, 1]

In [26]:
# Given lists
x = [1, 2, 2, 3, 1, 2, 4, 3]
y = [1, 3, 2, 3, 4, 2, 4, 3]

# Find the maximum value in x to determine the size of the lists
max_value = max(x)

# Initialize lists to store counts and ratios
x_count = [0] * (max_value + 1)
match_count = [0] * (max_value + 1)
ratio = [0.0] * (max_value + 1)

# Single loop to populate the lists and calculate the ratios
for i in range(len(x)):
    # Update the count of occurrences for x[i]
    x_index = x[i]
    x_count[x_index] += 1
    
    # Update the match count if x[i] == y[i]
    if x[i] == y[i]:
        match_count[x_index] += 1
    
    # Calculate the running ratio for x[i]
    ratio[x_index] = match_count[x_index] / x_count[x_index]

# Return only the ratio list
print("Ratio match_count / x_count for each unique x:", ratio)


Ratio match_count / x_count for each unique x: [0.0, 0.5, 0.6666666666666666, 1.0, 1.0]
