In [423]:
import numpy as np 

In [501]:
class NBC:
    def __init__(self, feature_types, num_classes):
        self.feature_types = feature_types 
        self.num_classes = num_classes
    
    def fit(self, X, y):
        classes, classCounts = np.unique(y, return_counts=True)
        
        pis = np.expand_dims(classCounts / y.shape[0], axis=1)
        
        allMus = [] 
        allSigmaSqrs = []
        
        for i in range(classes.shape[0]):
            # Get class examples  
            classExampleIdxs = np.argwhere(y==classes[i])[:,0]
        
            # Calculate class parameters
            # Empirical mean / Bernoulli distribution parameter
            mus = np.average(X[classExampleIdxs,:], axis=0)
            print(mus)
            # Emperical variance 
            sigmaSqrs = np.var(X[classExampleIdxs,:], axis=0)
            
            allMus.append(mus)
            allSigmaSqrs.append(sigmaSqrs)
        
        allMus = np.array(allMus)
        allSigmaSqrs = np.array(allSigmaSqrs)
        
        self.pis = pis
        self.mus = allMus
        self.sigmaSqrs = allSigmaSqrs
    
    def calcRealProb(self, X, realIndices, clsIdx): 
        mean = self.mus[clsIdx,realIndices]
        variances = self.sigmaSqrs[clsIdx,realIndices] 
        realXs = X[:,realIndices]
        probs = np.exp(-np.square(realXs-mean)/(2*(variances+1e-6)))/np.sqrt(2*np.pi*(variances+1e-6))
        return probs
    
    def calcCatProb(self, X, binIndices, clsIdx):
        params = self.mus[clsIdx,binIndices]
        binXs = X[:,binIndices]
        probs = np.zeros_like(binXs) 
        for i in range(params.shape[0]):
            feature = binXs[:,i]
            featureProbs = np.zeros_like(feature)
            featureProbs[feature == 1] = params[i]
            featureProbs[feature == 0] = 1 - params[i]
            probs[:,i] = featureProbs
        return probs
        
    def calcClassProb(self, X, clsIdx):
        featureTypes = self.feature_types
        binIndices = [i for i, x in enumerate(featureTypes) if x == 'b']
        realIndices = [i for i, x in enumerate(featureTypes) if x == 'r']
        
        realProbs = self.calcRealProb(X, realIndices, clsIdx)
        catProbs = self.calcCatProb(X, binIndices, clsIdx)
        
        # Ensure no zeros
        realProbs[realProbs == 0] = 1e-6
        catProbs[catProbs == 0] = 1e-6
        
        print(np.log(realProbs))
        print(np.log(catProbs))
        

In [502]:
nbc = NBC(feature_types=['b','r','b','r'], num_classes=2)
nbc.fit(np.array([[[1],[0.5],[1],[0.5]],[[1],[0.5],[0],[0.5]],[[1],[0.5],[0],[0.5]]]),np.array([[2],[0],[2]]))

[[1. ]
 [0.5]
 [0. ]
 [0.5]]
[[1. ]
 [0.5]
 [0.5]
 [0.5]]


In [508]:
X = np.array([[[1],[0.5],[1],[0.5]]])
nbc.calcClassProb(X, 0)

[[[5.98881675]
  [5.98881675]]]
[[[  0.        ]
  [-13.81551056]]]
