## Goal
I want to investigate `randomized nmf` at https://github.com/erichson/ristretto (the paper is https://arxiv.org/abs/1711.02037)

In [1]:
import numpy as np
import os
import sys
import time
sys.path.insert(0,'../code/')
from utility import compute_loglik
import ristretto
from ristretto.nmf import compute_nmf
from ristretto.nmf import compute_rnmf
from scipy.stats import poisson
from sklearn.decomposition import NMF

In [2]:
def simulate_pois(n, p, rank, seed=0):
    np.random.seed(seed)
    W = np.random.normal(size=(rank, n))
    W = np.exp(W)
    A = np.random.normal(size=(p, rank))
    A = np.exp(A)
    Lam = A.dot(W)
    X = np.random.poisson(lam=Lam) 
    ll = compute_loglik(X,A,W)
    return X, Lam, ll['poisson_ll']

In [3]:
n = 1000
p = 5000
r = 5
np.random.seed(123)
X, Lam,p_ll = simulate_pois(n,p,r)
print("mean poisson ll: " + str(p_ll))

mean poisson ll: -2.5349285062516196


In [4]:
print("X shape: p " + str(X.shape[0]) + " n ", str(X.shape[1]))

X shape: p 5000 n  1000


## NMF with HALS

In [5]:
start = time.time()
A,W = compute_nmf(X,rank=r,init = 'nndsvd')
runtime = time.time() - start
print("runtime: " + str(runtime))
ll = compute_loglik(X,A,W)
print("mean poisson ll: " + str(ll["poisson_ll"]))

TypeError: compute_nmf() got an unexpected keyword argument 'verbose'

## Randomized NMF with HALS

In [None]:
start = time.time()
A,W = compute_rnmf(X,rank=r, init = 'nndsvd',random_state = 0)
runtime = time.time() - start
print("runtime: " + str(runtime))
ll = compute_loglik(X,A,W)
print("mean poisson ll: " + str(ll["poisson_ll"]))

In [None]:
start = time.time()
#print("fit")
model = NMF(n_components=r, init="nndsvd", tol = 1e-04, beta_loss="kullback-leibler",solver = "mu",
                random_state=0, max_iter = 10000, verbose = False)
model.fit(X.T)
#print("transform")
L = model.transform(X.T)
runtime = runtime = time.time() - start
print("runtime: " + str(runtime))
F = model.components_ 
A = F.T
W = L.T
ll = compute_loglik(X,A,W)
print("mean poisson ll: " + str(ll["poisson_ll"]))


skd.nmf does not handle 0s in initialization. Try `nndsvda`.

In [None]:
start = time.time()
#print("fit")
model = NMF(n_components=r, init="nndsvda", tol = 1e-04, beta_loss="kullback-leibler",solver = "mu",
                random_state=0, max_iter = 10000, verbose = False)
model.fit(X.T)
#print("transform")
L = model.transform(X.T)
runtime = runtime = time.time() - start
print("runtime: " + str(runtime))
F = model.components_ 
A = F.T
W = L.T
ll = compute_loglik(X,A,W)
print("mean poisson ll: " + str(ll["poisson_ll"]))

To be fair, use `frobenous loss` and `mu` update. 

In [None]:
start = time.time()
#print("fit")
model = NMF(n_components=r, init="nndsvda", tol = 1e-04, beta_loss='frobenius',solver = "mu",
                random_state=0, max_iter = 10000, verbose = False)
model.fit(X.T)
#print("transform")
L = model.transform(X.T)
runtime = runtime = time.time() - start
print("runtime: " + str(runtime))
F = model.components_ 
A = F.T
W = L.T
ll = compute_loglik(X,A,W)
print("mean poisson ll: " + str(ll["poisson_ll"]))

### Comment: 
It is quite surprising that `frobenius` loss gives quite good poisson loglikelihood... 

In [None]:
start = time.time()
#print("fit")
model = NMF(n_components=r, init="nndsvda", tol = 1e-04, beta_loss='frobenius',solver = "cd",
                random_state=0, max_iter = 10000, verbose = False)
model.fit(X.T)
#print("transform")
L = model.transform(X.T)
runtime = runtime = time.time() - start
print("runtime: " + str(runtime))
F = model.components_ 
A = F.T
W = L.T
ll = compute_loglik(X,A,W)
print("mean poisson ll: " + str(ll["poisson_ll"]))