In [854]:
from __future__ import division
from ps4_utils import load_data,load_experiment
from ps4_utils import AbstractGenerativeModel
from ps4_utils import save_submission
from scipy.misc import logsumexp
import numpy as np
data_fn = "datasets-ps4.h5"
MAX_OUTER_ITER = 20

In [877]:
class MixtureModel(AbstractGenerativeModel):
    def __init__(self, CLASSES, NUM_FEATURES, NUM_MIXTURE_COMPONENTS, MAX_ITER=50, EPS=10**(-7)):
        AbstractGenerativeModel.__init__(self, CLASSES, NUM_FEATURES)
        self.num_mixture_components = NUM_MIXTURE_COMPONENTS # list of num_mixture_components (length num_classes)
        self.max_iter = MAX_ITER # max iterations of EM
        self.epsilon = EPS # help with stability, to be used according to hint given at end of pset4.pdf
        self.params = { # lists of length CLASSES
            'pi': [np.repeat(1/k,k) for k in self.num_mixture_components], # with pi_c for each class
            'theta': [np.zeros((self.num_features,k)) for k in self.num_mixture_components], # with theta_c for each class
        }
    def pack_params(self, X, class_idx):
        pi,theta = self.fit(X[class_idx],class_idx) # fit parameters
        self.params['pi'][class_idx] = pi # update member variable pi
        self.params['theta'][class_idx] = theta #update member variable theta
        
    #make classification based on which mixture model gives higher probability to generating point xi
    def classify(self, X):
        P = list()
        pi = self.params['pi']
        theta = self.params['theta']
        for c in range(self.num_classes):
            _,Pc = self.findP(X, pi[c], theta[c])
            P.append(Pc)
        return np.vstack(P).T.argmax(-1) # np.array of class predictions for each data point in X

    # --- E-step
    def updateLatentPosterior(self, X, pi, theta, num_mixture_components): # update the latent posterior
        # YOUR CODE HERE
        # --- gamma: responsibilities (probabilities), np.array (matrix)
        # ---        shape: number of data points in X (where X consists of datapoints from class c) by NUM_MIXTURE_COMPONENTS[c]
        # note: can use output of findP here (with care taken to return gamma containing proper probabilities)
        prob, norm = self.findP(X,pi,theta)
        # -- fix the shape here using np.repeat so the prob and norm have matching shape
        norm = np.repeat(norm,num_mixture_components).reshape(np.size(X,0),num_mixture_components)
        gamma = np.exp(prob - norm)
        return gamma
    # --- M-step (1)
    @staticmethod
    def updatePi(gamma): #update the pi component using the posteriors (gammas)
        # YOUR CODE HERE
        # --- pi_c: class specific pi, np.array (vector)
        # ---        shape: NUM_MIXTURE_COMPONENTS[c]
        N = float(gamma.shape[0])
        pi_c = 1.0/N * np.sum(gamma,0)
        return pi_c
    # -- M-step (2)
    @staticmethod
    def updateTheta(X, gamma): #update theta component using posteriors (gammas)
        # YOUR CODE HERE
        # --- theta_c: class specific theta, np.array matrix
        # ---        shape: NUM_FEATURES by NUM_MIXTURE_COMPONENTS[c]
        norm = np.sum(gamma,0)
        theta_c = np.divide(np.dot(X.transpose(),gamma),norm)
        return theta_c 
    
    @staticmethod
    def findP(X, pi, theta):
        # YOUR CODE HERE
        # NOTE: you can also use t as a probability, just change "logsumexp(t,axis=1)" to "logsumexp(np.log(t),axis=1)"
        # --- t: logprobabilities of x given each component of mixture
        # ---        shape: number of data points in X (where X consists of datapoints from class c) by NUM_MIXTURE_COMPONENTS[c] 
        # --- logsumexp(t,axis=1): (for convenience) once exponentiated, gives normalization factor over all mixture components
        # ---        shape: number of data points in X (where X consists of datapoints from class c)
        t = np.log(pi)+(np.dot(X,np.log(theta))+np.dot((np.ones(X.shape)-X),np.log(np.ones(theta.shape)-theta)))
        return t,logsumexp(t,axis=1)
        
    # --- execute EM procedure
    def fit(self, X, class_idx):
        max_iter = self.max_iter
        eps = self.epsilon
        N = X.shape[0]
        pi = self.params['pi'][class_idx]
        theta = self.params['theta'][class_idx]
        num_mixture_components = self.num_mixture_components[class_idx]
        # INITIALIZE theta
        for i in range(len(theta[0])):
            a = np.random.rand(int(len(X)*0.1))*len(X)
            a = [int(j) for j in a]
            theta[:,i]=np.mean(X[a])
        for i in range(max_iter):
            # YOUR CODE HERE, E-step: gamma = self.updateLatentPosterior
            # YOUR CODE HERE, M-step(1): pi = self.updatePi 
            # YOUR CODE HERE, M-step(2): theta = self.updateTheta
            theta[theta < eps] = eps
            theta[theta > 1.0-eps] = 1.0-eps
            gamma = self.updateLatentPosterior(X,pi,theta,num_mixture_components)
            pi = self.updatePi(gamma)
            theta = self.updateTheta(X,gamma)
        theta[theta < eps] = eps
        theta[theta > 1.0-eps] = 1.0-eps
        return pi,theta #pi and theta, given class_idx

In [875]:
class NaiveBayesModel(AbstractGenerativeModel):
    def __init__(self, CLASSES, NUM_FEATURES, EPS=10**(-12)):
        AbstractGenerativeModel.__init__(self, CLASSES, NUM_FEATURES)
        self.epsilon = EPS # help with stability
        self.params = {
            'p': [np.zeros((NUM_FEATURES))] * self.num_classes # estimated log-probabilities of features for each class
        }
    def pack_params(self, X, class_idx):
        p = self.fit(X[class_idx])
        self.params['p'][class_idx] = p
    def classify(self, X): # naive bayes classifier
        # YOUR CODE HERE
        # --- predictions: predictions for data points in X (where X consists of datapoints from class c), np.array (vector)
        # ---        shape: number of data points
        p = self.params['p']
        hold = list()
        for i in range(len(p)):
            theta = p[i]
            hold.append(np.dot(X,np.log(theta))+np.dot((np.ones(X.shape)-X),np.log(np.ones(theta.shape)-theta)))
        hold = np.array(hold)
        predictions = np.array([list(i).index(max(i)) for i in hold.transpose()])
        return predictions
    def fit(self, X):
        # YOUR CODE HERE
        # --- estimated_p: estimated p's of features for input X (where X consists of datapoints from class c), np.array (vector)
        # ---        shape: NUM_FEATURES
        eps = self.epsilon
        theta = np.mean(X,0)
        theta[theta < eps] = eps
        theta[theta > 1.0-eps] = 1.0-eps
        estimated_p = theta
        return estimated_p

In [876]:
experiment_name = "sentiment_analysis"
# --- SENTIMENT ANALYSIS setup
Xtrain,Xval,num_classes,num_features = load_experiment(data_fn, experiment_name)

# -- build naive bayes model for sentiment analysis
print("SENTIMENT ANALYSIS -- NAIVE BAYES MODEL:")
nbm = NaiveBayesModel(num_classes, num_features)
nbm.train(Xtrain)
print("ACCURACY ON VALIDATION: " + str(nbm.val(Xval)))

bestnmc = None
acc = 0

# -- build mixture model for sentiment analysis
print("SENTIMENT ANALYSIS -- MIXTURE MODEL:")
for i in range(MAX_OUTER_ITER):
    num_mixture_components =  np.random.randint(2,15,num_classes)
    print("COMPONENTS: " + " ".join(str(i) for i in num_mixture_components))
    mm = MixtureModel(num_classes, num_features, num_mixture_components)
    mm.train(Xtrain)
    print("ACCURACY ON VALIDATION: " + str(mm.val(Xval)))
    if mm.val(Xval)>acc:
        acc = mm.val(Xval)
        bestnmc = num_mixture_components
        
mm = MixtureModel(num_classes, num_features, np.array(bestnmc))
mm.train(Xtrain)
# submit to kaggle
Xkaggle = load_data(data_fn, experiment_name, "kaggle")
save_submission("mm-{}-submission.csv".format(experiment_name), mm.classify(Xkaggle))

SENTIMENT ANALYSIS -- NAIVE BAYES MODEL:
ACCURACY ON VALIDATION: 0.72
SENTIMENT ANALYSIS -- MIXTURE MODEL:
COMPONENTS: 11 14
ACCURACY ON VALIDATION: 0.72
COMPONENTS: 4 5
ACCURACY ON VALIDATION: 0.724
COMPONENTS: 9 13
ACCURACY ON VALIDATION: 0.702
COMPONENTS: 6 13
ACCURACY ON VALIDATION: 0.724
COMPONENTS: 11 10
ACCURACY ON VALIDATION: 0.716
COMPONENTS: 4 5
ACCURACY ON VALIDATION: 0.714
COMPONENTS: 4 6
ACCURACY ON VALIDATION: 0.732
COMPONENTS: 14 9
ACCURACY ON VALIDATION: 0.724
COMPONENTS: 4 6
ACCURACY ON VALIDATION: 0.724
COMPONENTS: 7 6
ACCURACY ON VALIDATION: 0.714
COMPONENTS: 2 3
ACCURACY ON VALIDATION: 0.708
COMPONENTS: 5 11
ACCURACY ON VALIDATION: 0.718
COMPONENTS: 8 14
ACCURACY ON VALIDATION: 0.712
COMPONENTS: 10 11
ACCURACY ON VALIDATION: 0.734
COMPONENTS: 11 3
ACCURACY ON VALIDATION: 0.704
COMPONENTS: 5 4
ACCURACY ON VALIDATION: 0.734
COMPONENTS: 9 4
ACCURACY ON VALIDATION: 0.694
COMPONENTS: 8 6
ACCURACY ON VALIDATION: 0.712
COMPONENTS: 9 8
ACCURACY ON VALIDATION: 0.692
COMPONEN

In [863]:
experiment_name = "mnist"
# --- MNIST DIGIT CLASSIFICATION setup
Xtrain,Xval,num_classes,num_features = load_experiment(data_fn, experiment_name)

# -- build naive bayes model for mnist digit classification
print("MNIST DIGIT CLASSIFICATION -- NAIVE BAYES MODEL:")
nbm = NaiveBayesModel(num_classes, num_features)
nbm.train(Xtrain)
print("ACCURACY ON VALIDATION: " + str(nbm.val(Xval)))
bestnmc = None
acc = 0
# -- build mixture model for mnist digit classification
print("MNIST DIGIT CLASSIFICATION -- MIXTURE MODEL:")
for i in range(MAX_OUTER_ITER):
    num_mixture_components =  np.random.randint(2,15,num_classes)
    print("COMPONENTS: " + " ".join(str(i) for i in num_mixture_components))
    mm = MixtureModel(num_classes, num_features, num_mixture_components)
    mm.train(Xtrain)
    print("ACCURACY ON VALIDATION: " + str(mm.val(Xval)))
    if mm.val(Xval)>acc:
        acc = mm.val(Xval)
        bestnmc = num_mixture_components
        
mm = MixtureModel(num_classes, num_features, np.array(bestnmc))
mm.train(Xtrain)
# submit to kaggle
Xkaggle = load_data(data_fn, experiment_name, "kaggle")
save_submission("mm-{}-submission.csv".format(experiment_name), mm.classify(Xkaggle))

MNIST DIGIT CLASSIFICATION -- NAIVE BAYES MODEL:
ACCURACY ON VALIDATION: 0.733
MNIST DIGIT CLASSIFICATION -- MIXTURE MODEL:
COMPONENTS: 2 8 13 5 11 3 12 10 11 13
ACCURACY ON VALIDATION: 0.7805
COMPONENTS: 7 12 9 12 2 14 2 2 14 10
ACCURACY ON VALIDATION: 0.7635
COMPONENTS: 4 11 7 14 6 13 11 5 8 12
ACCURACY ON VALIDATION: 0.772
COMPONENTS: 12 7 5 5 14 8 2 3 3 7
ACCURACY ON VALIDATION: 0.7735
COMPONENTS: 6 12 4 5 14 13 2 7 12 9
ACCURACY ON VALIDATION: 0.768
COMPONENTS: 8 11 10 9 3 7 13 4 10 6
ACCURACY ON VALIDATION: 0.7985
COMPONENTS: 5 3 3 2 7 14 3 10 10 8
ACCURACY ON VALIDATION: 0.7775
COMPONENTS: 4 8 13 4 14 2 6 6 11 9
ACCURACY ON VALIDATION: 0.7795
COMPONENTS: 6 7 11 14 4 7 14 3 8 5
ACCURACY ON VALIDATION: 0.7805
COMPONENTS: 14 14 13 12 11 11 4 10 8 11
ACCURACY ON VALIDATION: 0.769
COMPONENTS: 4 7 12 6 7 8 9 3 9 3
ACCURACY ON VALIDATION: 0.7785
COMPONENTS: 2 11 6 11 9 2 14 14 8 3
ACCURACY ON VALIDATION: 0.765
COMPONENTS: 9 5 4 8 9 10 5 8 13 6
ACCURACY ON VALIDATION: 0.7795
COMPONENTS:

In [865]:
'''
Confusion Matrix
'''
experiment_name = "sentiment_analysis"
# --- SENTIMENT ANALYSIS setup
Xtrain,Xval,num_classes,num_features = load_experiment(data_fn, experiment_name)

# -- build naive bayes model for sentiment analysis
print("SENTIMENT ANALYSIS -- NAIVE BAYES MODEL:")
nbm = NaiveBayesModel(num_classes, num_features)
nbm.train(Xtrain)

cmatrix = np.zeros(shape=(len(Xtrain),len(Xtrain)))
for i in range(len(Xval)):
    hold = nbm.classify(Xval[i])
    for j in hold:
        cmatrix[i,j] +=1
cmatrix

SENTIMENT ANALYSIS -- NAIVE BAYES MODEL:


array([[  84.,   81.],
       [  59.,  276.]])

In [867]:
'''
Confusion Matrix
'''
experiment_name = "mnist"
# --- MNIST DIGIT CLASSIFICATION setup
Xtrain,Xval,num_classes,num_features = load_experiment(data_fn, experiment_name)

# -- build naive bayes model for mnist digit classification
print("MNIST DIGIT CLASSIFICATION -- NAIVE BAYES MODEL:")
nbm = NaiveBayesModel(num_classes, num_features)
nbm.train(Xtrain)

cmatrix = np.zeros(shape=(len(Xtrain),len(Xtrain)))
for i in range(len(Xval)):
    hold = nbm.classify(Xval[i])
    for j in hold:
        cmatrix[i,j] +=1
cmatrix

MNIST DIGIT CLASSIFICATION -- NAIVE BAYES MODEL:


array([[ 151.,    0.,    6.,    5.,    1.,    9.,   10.,    0.,    3.,
           2.],
       [   0.,  205.,    5.,    1.,    2.,    4.,    1.,    2.,    2.,
           1.],
       [   4.,   13.,  147.,   10.,    6.,    4.,    9.,    4.,    9.,
           4.],
       [   3.,    7.,    9.,  133.,    0.,   11.,    2.,    4.,   10.,
           4.],
       [   2.,    4.,    4.,    0.,  146.,    4.,    2.,    7.,    1.,
          23.],
       [   7.,   12.,    4.,   26.,    7.,   98.,    5.,    6.,    6.,
           1.],
       [   2.,   10.,    6.,    2.,   10.,    9.,  175.,    1.,    1.,
           1.],
       [   1.,   13.,    2.,    3.,    8.,    1.,    0.,  183.,    2.,
          20.],
       [   2.,   18.,   13.,   21.,    5.,    9.,    3.,    0.,  120.,
           5.],
       [   2.,    5.,    3.,   10.,   23.,    4.,    1.,   25.,    5.,
         108.]])

The accuracy for the Consumer Reviews was on par with the MM, however the NB model had a hard time classifying the 0 class. For MNIST the NB model underperformed the MM, but was able to classify each number without much confusion, save 5 where NB drastically underperformed.