# KL divergence NMF (KLNMF) SCIPI

## Example: real data

In [1]:
# We will use docword kos

### Load Libraries

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time

### Load Sourcecode

In [3]:
import sys, os
sys.path.append(os.path.join(os.path.dirname(sys.argv[1]), '..', 'src'))
import klnmf
import importlib
importlib.reload(klnmf)

<module 'klnmf' from 'C:\\Users\\bions\\Desktop\\git\\SCIPI-JMLR\\notebooks\\..\\src\\klnmf.py'>

### Set Random Seed

In [4]:
today_num = int(pd.Timestamp.today().date().strftime("%Y%m%d"))
offset = 0
print(f"our seed is {today_num + offset}")
np.random.seed(today_num + offset)

our seed is 20231019


### Read data

In [5]:
# you need to remove the first 3 lines from docword.kos
# those lines have this metadata info
n = 3430
m = 6906
e = 353160

In [6]:
# we put data here
# this is out of scope of this github
# just not to violate data licence issue

V_raw = pd.read_table("../../docword.kos.txt", header = None, sep = " ")

In [7]:
V_raw.head()

Unnamed: 0,0,1,2
0,1,61,2
1,1,76,1
2,1,89,1
3,1,211,1
4,1,296,1


### Set Size

In [8]:
k = 20
our_dtype = 'float32'

V_orig = np.zeros((n,m))
for i in range(len(V_raw)):
    V_orig[V_raw[0][i] - 1, V_raw[1][i] - 1] = V_raw[2][i]

V_orig = V_orig / V_orig.sum().sum() * k

In [9]:
# We resize the scale of V_orig.
# This is not requirede but to ease the objective calculation.
# V_orig is our target matrix to be decomposed

In [10]:
print(f"size of V: {V_orig.shape}")

size of V: (3430, 6906)


### Our matrix to be decomposed

In [11]:
# make sure data is non-degenerate
V_save = V_orig.copy()

In [12]:
np.sort(V_save.sum(axis = 0))

array([2.56567047e-04, 2.56567047e-04, 2.56567047e-04, ...,
       1.61680001e-01, 2.00079536e-01, 2.89963525e-01])

In [13]:
np.sort(V_save.sum(axis = 1))

array([0.00106903, 0.00106903, 0.00106903, ..., 0.0275382 , 0.0275382 ,
       0.02864999])

In [14]:
V_orig = V_save.copy()

### Initialization

In [15]:
W_mat, H_mat, A_mat = klnmf.init_klnmf(V_orig, k, seed = 1, our_dtype = our_dtype)

In [16]:
# we will use the same initialization for all the method
# the above function `init_klnmf` provies 1-step MU initialization from random matrix
# please see the manuscript for details

### Run Methods

In [17]:
# note that we need a sparse version of each method
# we need to compute this
# A = V / (W @ H + eps)
# However W @ H can be 0 and regardless of V we have NaNs
# so we set eps to be nonzero

#### MU (Multiplicatsive Updates)

In [18]:
res_mu = klnmf.run_mu(V_orig, k, num_iter = 100, num_print = 5)

init: obj 104.1648424260748
round 5: obj 102.35649550863323
round 10: obj 95.20451348676059
round 15: obj 92.26192865551369
round 20: obj 90.99631439561327
round 25: obj 90.3795607579676
round 30: obj 90.03510638254485
round 35: obj 89.81744029260837
round 40: obj 89.66069631986763
round 45: obj 89.53531368924601
round 50: obj 89.4424599107308
round 55: obj 89.36884496146712
round 60: obj 89.31026681043343
round 65: obj 89.26579154448441
round 70: obj 89.23120426117708
round 75: obj 89.20229119520943
round 80: obj 89.17790023243093
round 85: obj 89.15760623174432
round 90: obj 89.14114867124131
round 95: obj 89.12682622163035
round 100: obj 89.11220172723142


In [19]:
res_mu = klnmf.run_mu_for_sparse(V_orig, k, num_iter = 100, num_print = 5)

init: obj 103.93893879314088
round 5: obj 102.36013922668374
round 10: obj 95.18087539273466
round 15: obj 92.24503787627327
round 20: obj 90.98396590914665
round 25: obj 90.36672694289098
round 30: obj 90.02217198205821
round 35: obj 89.80417126914043
round 40: obj 89.6454914818979
round 45: obj 89.52098934353793
round 50: obj 89.42922281146858
round 55: obj 89.35431926283644
round 60: obj 89.29743800195284
round 65: obj 89.25477351393879
round 70: obj 89.2207740885699
round 75: obj 89.19244923515514
round 80: obj 89.16882139874292
round 85: obj 89.15011813668727
round 90: obj 89.1326804766905
round 95: obj 89.11609832349848
round 100: obj 89.10294497258613


In [20]:
res_mu_with_normalize = klnmf.run_mu_with_normalize(V_orig, k, num_iter = 100, num_print = 5)

init: obj 104.1648424260748
round 5: obj 102.36577346105295
round 10: obj 95.31634242397286
round 15: obj 92.36458342566891
round 20: obj 91.06482753644296
round 25: obj 90.45403902647729
round 30: obj 90.13233925600784
round 35: obj 89.92916949695328
round 40: obj 89.79381371520108
round 45: obj 89.69766283482755
round 50: obj 89.62378542315531
round 55: obj 89.56398522070097
round 60: obj 89.51324326291197
round 65: obj 89.4665539884792
round 70: obj 89.42643029118169
round 75: obj 89.39167402483193
round 80: obj 89.36035478546364
round 85: obj 89.33353520111416
round 90: obj 89.31092446365781
round 95: obj 89.29077936889264
round 100: obj 89.27287077131763


In [21]:
# run_mu is running mu without rescaling every round
# run_mu_with_normalize is running mu with rescaling every round
# they are visually the same
# however one is a little slower due to rescaling
# the other is a little numerically instable
# however for this example they are nearly identical

#### SCIPI (Scale Invariant Power Iteration)

In [22]:
# we have advanced version of SCIPI
# please see the paper for advanced methods
# but here we just run a plane method

In [23]:
res_scipi = klnmf.run_scipi_for_sparse(V_orig, k, num_iter = 100, num_print = 5)

init: obj 103.93893879314088
round 5: obj 96.33588741276958
round 10: obj 90.91033657956915
round 15: obj 89.89935522733734
round 20: obj 89.54812812810434
round 25: obj 89.38203713074503
round 30: obj 89.28401527224261
round 35: obj 89.2223408548987
round 40: obj 89.17955128938172
round 45: obj 89.15133916199065
round 50: obj 89.12833442791998
round 55: obj 89.10957436434082
round 60: obj 89.09355528124213
round 65: obj 89.08062743635554
round 70: obj 89.06981155290626
round 75: obj 89.05991531869577
round 80: obj 89.05186092383244
round 85: obj 89.04504669559853
round 90: obj 89.03772237827468
round 95: obj 89.03244195800912
round 100: obj 89.02733702772446


In [24]:
# just for this example, here's the advanced scipi approach

In [25]:
res_scipi_acc = klnmf.run_scipi_acc_for_sparse(V_orig, k, num_inner = 2, intercept = 0.1, num_iter = 100, num_print = 5)

init: obj 103.93893879314088
round 5: obj 92.56389935177882
round 10: obj 89.95523545440545
round 15: obj 89.48404768293821
round 20: obj 89.307165476845
round 25: obj 89.2160975872547
round 30: obj 89.15627813137752
round 35: obj 89.12159012995241
round 40: obj 89.09277866175583
round 45: obj 89.07502361993073
round 50: obj 89.06120186031109
round 55: obj 89.04783783690918
round 60: obj 89.03692406855879
round 65: obj 89.0254545173009
round 70: obj 89.01558573001992
round 75: obj 89.00814274084578
round 80: obj 89.00325809217208
round 85: obj 88.99816272611967
round 90: obj 88.99333232926105
round 95: obj 88.98829941972151
round 100: obj 88.98349110326761


In [26]:
# note that we implemented run_scipi_for_sparse
# instead of run_scipi
# this will affect the objective function values
# but just a little

#### PGD (Projected Gradient Descent)

In [27]:
# lots of papers about projection onto the simplex
# e.g.
# https://arxiv.org/pdf/1101.6081.pdf
# https://math.stackexchange.com/questions/3778014/matlab-python-euclidean-projection-on-the-simplex-why-is-my-code-wrong
# https://stanford.edu/~jduchi/projects/DuchiShSiCh08.html
# https://link.springer.com/article/10.1007/s10107-015-0946-6
# https://gist.github.com/mblondel/6f3b7aaad90606b98f71

In [28]:
# we choose the fastest one here among the above

In [29]:
res_pgd = klnmf.run_pgd(V_orig, k, stepsize = 1.0, num_iter = 100, num_print = 5)

init: obj 104.1648424260748
round 5: obj 102.35649550863323
round 10: obj 95.20451348676059
round 15: obj 92.26192865551369
round 20: obj 90.99631439561327
round 25: obj 90.37956075796758
round 30: obj 90.03510638254485
round 35: obj 89.81744029260838
round 40: obj 89.66069631986763
round 45: obj 89.53531368924601
round 50: obj 89.44245991073082
round 55: obj 89.36884496146712
round 60: obj 89.31026681043343
round 65: obj 89.26579154448441
round 70: obj 89.23120426117708
round 75: obj 89.20229119520943
round 80: obj 89.17790023243093
round 85: obj 89.15760623174432
round 90: obj 89.1411486712413
round 95: obj 89.12682622163035
round 100: obj 89.11220172723142


In [30]:
res_pgd2 = klnmf.run_pgd_for_sparse(V_orig, k, stepsize = 1.0, num_iter = 100, num_print = 5)

init: obj 103.93893879314088
round 5: obj 102.36013922668374
round 10: obj 95.18087539273466
round 15: obj 92.24503787627327
round 20: obj 90.98396590914665
round 25: obj 90.36672694289098
round 30: obj 90.02217198205821
round 35: obj 89.80417126914043
round 40: obj 89.6454914818979
round 45: obj 89.52098934353795
round 50: obj 89.42922281146858
round 55: obj 89.35431926283644
round 60: obj 89.29743800195286
round 65: obj 89.25477351393879
round 70: obj 89.22077408856993
round 75: obj 89.19244923515514
round 80: obj 89.16882139874292
round 85: obj 89.15011813668727
round 90: obj 89.1326804766905
round 95: obj 89.11609832349848
round 100: obj 89.10294497258613


In [31]:
# stepsize 1.5 seems faster in early rounds but slower in later rounds
# stepsize 1.0 converges much faster lately

In [32]:
res_pgd3 = klnmf.run_pgd_for_sparse(V_orig, k, stepsize = 1.5, num_iter = 100, num_print = 5)

init: obj 103.93893879314088
round 5: obj 99.57478940682002
round 10: obj 92.47801425725963
round 15: obj 90.82070686116568
round 20: obj 90.16177366196804
round 25: obj 89.81703557585527
round 30: obj 89.62609434944474
round 35: obj 89.50417330135966
round 40: obj 89.4236851234761
round 45: obj 89.36473886342806
round 50: obj 89.31975391075383
round 55: obj 89.28848026586115
round 60: obj 89.2632127519097
round 65: obj 89.24425942751637
round 70: obj 89.22988935734726
round 75: obj 89.21819849882596
round 80: obj 89.20795867209034
round 85: obj 89.19834472364936
round 90: obj 89.19001661279853
round 95: obj 89.18271939127223
round 100: obj 89.17593682857523


In [33]:
# stepsize 0.8 seems slower in early rounds
# stepsize 1.0 converges faster anyways

In [34]:
res_pgd4 = klnmf.run_pgd_for_sparse(V_orig, k, stepsize = 0.8, num_iter = 100, num_print = 5)

init: obj 103.93893879314088
round 5: obj 103.0060956219129
round 10: obj 97.27713181924415
round 15: obj 93.53326973074937
round 20: obj 91.82855126451479
round 25: obj 90.93434380924438
round 30: obj 90.4266742147492
round 35: obj 90.11482873593499
round 40: obj 89.90299253985711
round 45: obj 89.75006160535534
round 50: obj 89.62664801659491
round 55: obj 89.52563855510328
round 60: obj 89.44587361155786
round 65: obj 89.38045055355948
round 70: obj 89.32523456095976
round 75: obj 89.28303405881096
round 80: obj 89.24950619104612
round 85: obj 89.22102561636459
round 90: obj 89.19711469109961
round 95: obj 89.17543194362048
round 100: obj 89.15597757956819


In [35]:
# too slow
# didn't run below

In [36]:
# res_pgd_with_linesearch = klnmf.run_pgd_with_linesearch_for_sparse(V_orig, k, num_iter = 100, num_print = 5)