In [1]:
import numpy as np
import os
import sys
import time
from ristretto.nmf import compute_nmf
from ristretto.nmf import compute_rnmf
module_path = os.path.abspath(os.path.join('../code'))
from scipy.stats import poisson


In [2]:
def compute_loglik(X,A,W):
	p,n = X.shape
	e = np.finfo(float).eps
	if A.dot(W).sum(axis = 0).mean() < 2: ## multinomial model
		method = 'multinom'
		theta = A.dot(W)
		multinom_ll = (X * np.log(theta + e)).sum()
		lam = theta.dot(np.diag(X.sum(axis = 0)))	
		poisson_ll = poisson.logpmf(X,lam).sum()/(n*p)	
	else:
		method = "poisson"
		lam = A.dot(W)
		poisson_ll = poisson.logpmf(X,lam).sum()/(n*p)

		ca = A.sum(axis = 0)
		Ahat = A.dot(np.diag(ca**-1))
		What = np.diag(ca).dot(W)
		cw = What.sum(axis = 0)
		What = What.dot(np.diag(cw**-1))
		theta = Ahat.dot(What)
		multinom_ll = (X * np.log(theta + e)).sum()

	return {"type":method, "poisson_ll":poisson_ll, "multinom_ll":multinom_ll}

In [3]:
def simulate_pois(n, p, rank, seed=0):
    np.random.seed(seed)
    W = np.random.normal(size=(rank, n))
    W = np.exp(W)
    A = np.random.normal(size=(p, rank))
    A = np.exp(A)
    Lam = A.dot(W)
    X = np.random.poisson(lam=Lam) 
    ll = compute_loglik(X,A,W)
    return X, Lam, ll['poisson_ll']

In [4]:
n = 500
p = 1000
r = 5
np.random.seed(123)
X, Lam,p_ll = simulate_pois(n,p,r)
print("mean poisson ll: " + str(p_ll))

mean poisson ll: -2.5292799193629603


In [5]:
X.shape

(1000, 500)

In [6]:
start = time.time()
A,W = compute_nmf(X,rank=r)
runtime = time.time() - start
print("runtime: " + str(runtime))
ll = compute_loglik(X,A,W)
print("mean poisson ll: " + str(ll["poisson_ll"]))

runtime: 0.6307199001312256
mean poisson ll: -2.5247216397905046


In [7]:
start = time.time()
A,W = compute_rnmf(X,rank=r)
runtime = time.time() - start
print("runtime: " + str(runtime))
ll = compute_loglik(X,A,W)
print("mean poisson ll: " + str(ll["poisson_ll"]))

TypeError: sequence item 0: expected str instance, type found