In [30]:
# from variational_inference_utils import *
from scipy.special import polygamma
import numpy as np

In [33]:
class VI_sLDA_M_Step:
    '''
    The default mode is minibatch natural gradient descent
    '''
    def __init__(self, K, bow, y, alpha, xi, eta, delta, Lambda, gamma, phi, rho=None, corpus_size=None):
        self.K = K # number of topics
        self.bow = bow # list of dictionaries, with length D
        self.doc_len = [sum(list(v.values())) for v in bow] # number of words within each document
        self.y = y # D-dimensional vector
        self.alpha = alpha # K-dimensional vector
        self.new_alpha = None
        self.xi = xi # V-dimensional vector
        self.new_xi = None
        self.eta = eta
        self.new_eta = None
        self.delta = delta
        self.new_delta = None
        self.Lambda = Lambda # size: K x V
        self.new_Lambda = None
        self.D = len(self.bow) # batch_size: number of documents in the minibatch
        self.gamma = gamma # size: D x K
        self.phi = phi # for each document, size is N_d x K
        self.phi_bar = np.vstack(self.phi[d].mean(axis=0) for d in range(self.D)) # size: D x K
        self.expect_x_x_t = np.zeros(shape=(K,K)) # size: K x K
        for d in range(self.D):
            N_d = self.doc_len[d]
            self.expect_x_x_t += 1/N_d**2 * (self.phi[d].T @ self.phi[d])
            for i in self.doc_len[d]: # correct for E[Z_nZ_n^T]
                self.expect_x_x_t = self.expect_x_x_t + 1/N_d**2 * (self.phi[d][i,:] - self.phi[d][i,:]**2) 
        self.rho = rho
        self.corpus_size = corpus_size
        
    def update_Lambda(self):
        # update rule for global variational parameter Lambda
        Lambda_hat = np.zeros_like(self.Lambda) # natural gradient of ELBO w.r.t the variational distribution q(beta | Lambda)
        for wi,v in enumerate(self.bow):
            for d in range(self.D):
                Lambda_hat[:,v] += self.phi[d][wi,:]
            Lambda_hat[:,v] = self.corpus_size / self.D * Lambda_hat # scale based on minibatch size
            Lambda_hat[:,v] += self.xi[v] 
        self.new_Lambda = stochastic_variational_update(self.Lambda, Lambda_hat, self.rho)
        
    def update_alpha(self, batch = False):
        # update rule for the global hyperparameter alpha
        alpha_sum = np.sum(self.alpha)
        g = self.D * (polygamma(0, alpha_sum) - polygamma(0, self.alpha)) # gradient of ELBO w.r.t. alpha
        g += polygamma(0, self.gamma).sum(axis=0) - np.sum(polygamma(0, self.gamma.sum(axis=1)))
        g = self.corpus_size / self.D * g # scale based on minibatch size
        h = -self.corpus_size * polygamma(1, self.alpha)
        z = self.corpus_size * polygamma(1, alpha_sum)
        alpha_hat = linear_time_natural_gradient(g, h, z) # compute (the scaled) natural gradient of ELBO w.r.t. p(theta_{1:corpur_size} | alpha)
        if batch == False:
            self.new_alpha = stochastic_hyperparameter_update(self.alpha, alpha_hat, self.rho)
        else:
            self.new_alpha -= alpha_hat

    def update_xi(self):
        # update rule for the global hyperparameter alpha
        xi_sum = np.sum(self.xi)
        g = self.K * (polygamma(0, xi_sum) - polygamma(0, self.xi)) # gradient of ELBO w.r.t. xi
        g += polygamma(0, self.Lambda).sum(axis=0) - np.sum(polygamma(0, self.Lambda.sum(axis=1)))
        h = -self.K * polygamma(1, self.xi)
        z = self.K * polygamma(1, xi_sum)
        xi_hat = linear_time_natural_gradient(g, h, z) # compute natural gradient of ELBO w.r.t. p(beta_{1:K} | xi)
        self.new_alpha = stochastic_hyperparameter_update(self.xi, xi_hat, self.rho)
        
    def update_eta_and_delta(self):
        # update rule for the global hyperparameter eta (Gaussian response)
        phi_bar_times_y = np.dot(self.y, self.phi_bar) # K-dimensional vector
        expect_x_x_t_times_eta = np.dot(self.expect_x_x_t, self.eta)
        y_t_y = np.sum(self.y**2)
        temp_var = np.sum(self.eta * (phi_bar_times_y - expect_x_x_t_times_eta/2))
        g_eta = (1/self.delta)*(phi_bar_times_y - expect_x_x_t_times_eta)
        g_delta = 1/2/self.delta + 1/2/self.delta^2*(y_t_y - 2*temp_var)
        g = self.corpus_size / self.D * np.hstack(g_eta, np.array([g_delta])) # gradient is of K+1 dimensional, scale based on minibatch size
        h_11 = -1/self.delta*self.expet_x_x_t
        h_21 = -g_eta / self.delta
        h_22 = -1/2/self.delta**2 - 1/self.delta**3 * (y_t_y - 2*temp_var)
        h = np.zeros(shape=(self.K+1, self.K+1))
        h[:self.K, :self.K] = h_11
        h[self.K, self.K] = h_22
        h[self.K, :self.K] = h_21
        h[:self.K, self.K] = h_21
        h = self.corpus_size / self.D * h # (scaled) Hessian is of (K+1) x (K+1) dimensional
        h_inv = np.linalg.inv(h)
        eta_delta_hat = h_inv @ g # approximated natural gradient of ELBO w.r.t P(Y_{1:corpus_size}|eta, delta)
        updated_eta_delta = stochastic_hyperparameter_update(np.hstack(self.eta, np.array([self.delta]), eta_delta_hat, self.rho))
        
    def run(self):
        # run the full M-step
        pass

In [1]:
class batch_VI_sLDA_M_Step(VI_sLDA_M_Step):

    def __init__(self, K, bow, alpha, xi, eta, delta, Lambda, gamma, phi, epsilon): 
        super().__init__(K, bow, alpha, xi, eta, delta, Lambda, gamma, phi)
        self.epsilon = epsilon

    def optimize_alpha(self):
        # run a full Newton-Raphson algorithm to optimize alpha in M step
        change_in_alpha = math.inf
        while chage_in_alpha > self.epsilon:
            self.update_alpha(batch=True)
            change_in_alpha = np.mean(np.abs(self.new_alpha - self.alpha))
            self.alpha = self.new_alpha

    def update_xi(self):
        # run a full Newton-Raphson algorithm to optimize xi in M step
        change_in_xi = math.inf
        while chage_in_xi > self.epsilon:
            self.update_xi(batch=True)
            change_in_xi = np.mean(np.abs(self.new_alpha - self.alpha))
            self.xi = self.new_xi

    def optimize_eta_and_delta(self):
        # optimize in terms of eta and delta has a closed-form solution in batch VI mode
        expect_x_x_t_inv = np.linalg.inv(self.expect_x_x_t)
        phi_bar_times_y = np.dot(self.y, self.phi_bar)
        self.eta_new = np.dot(expect_x_x_t_inv, phi_bar_times_y) 
        self.delta_new = 1/self.D * (np.sum(self.y**2) - np.sum(np.dot(phi_bar_times_y, self.eta_new)))

    def compute_elbo(self):
        pass
    
    def run(self):
        # override the .run() method in the parent Class
        # run the full M step
        pass

NameError: name 'VI_sLDA_M_Step' is not defined