forked from scikit-learn/scikit-learn
/
bench_plot_nmf.py
162 lines (139 loc) · 5.49 KB
/
bench_plot_nmf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
"""
Benchmarks of Non-Negative Matrix Factorization
"""
import gc
from time import time
import numpy as np
from collections import defaultdict
from sklearn.decomposition.nmf import NMF, _initialize_nmf
from sklearn.datasets.samples_generator import make_low_rank_matrix
def alt_nnmf(V, r, max_iter=1000, tol=1e-3, R=None):
'''
A, S = nnmf(X, r, tol=1e-3, R=None)
Implement Lee & Seung's algorithm
Parameters
----------
V : 2-ndarray, [n_samples, n_features]
input matrix
r : integer
number of latent features
max_iter : integer, optional
maximum number of iterations (default: 10000)
tol : double
tolerance threshold for early exit (when the update factor is within
tol of 1., the function exits)
R : integer, optional
random seed
Returns
-------
A : 2-ndarray, [n_samples, r]
Component part of the factorization
S : 2-ndarray, [r, n_features]
Data part of the factorization
Reference
---------
"Algorithms for Non-negative Matrix Factorization"
by Daniel D Lee, Sebastian H Seung
(available at http://citeseer.ist.psu.edu/lee01algorithms.html)
'''
# Nomenclature in the function follows Lee & Seung
eps = 1e-5
n, m = V.shape
if R == "svd":
W, H = _initialize_nmf(V, r)
elif R == None:
R = np.random.mtrand._rand
W = np.abs(R.standard_normal((n, r)))
H = np.abs(R.standard_normal((r, m)))
for i in xrange(max_iter):
updateH = np.dot(W.T, V) / (np.dot(np.dot(W.T, W), H) + eps)
H *= updateH
updateW = np.dot(V, H.T) / (np.dot(W, np.dot(H, H.T)) + eps)
W *= updateW
if True or (i % 10) == 0:
max_update = max(updateW.max(), updateH.max())
if abs(1. - max_update) < tol:
break
return W, H
def compute_bench(samples_range, features_range, rank=50, tolerance=1e-7):
it = 0
timeset = defaultdict(lambda: [])
err = defaultdict(lambda: [])
max_it = len(samples_range) * len(features_range)
for n_samples in samples_range:
for n_features in features_range:
it += 1
print '===================='
print 'Iteration %03d of %03d' % (it, max_it)
print '===================='
X = np.abs(make_low_rank_matrix(n_samples, n_features,
effective_rank=rank, tail_strength=0.2))
gc.collect()
print "benching nndsvd-nmf: "
tstart = time()
m = NMF(n_components=30, tol=tolerance, init='nndsvd').fit(X)
tend = time() - tstart
timeset['nndsvd-nmf'].append(tend)
err['nndsvd-nmf'].append(m.reconstruction_err_)
print m.reconstruction_err_, tend
gc.collect()
print "benching nndsvda-nmf: "
tstart = time()
m = NMF(n_components=30, init='nndsvda',
tol=tolerance).fit(X)
tend = time() - tstart
timeset['nndsvda-nmf'].append(tend)
err['nndsvda-nmf'].append(m.reconstruction_err_)
print m.reconstruction_err_, tend
gc.collect()
print "benching nndsvdar-nmf: "
tstart = time()
m = NMF(n_components=30, init='nndsvdar',
tol=tolerance).fit(X)
tend = time() - tstart
timeset['nndsvdar-nmf'].append(tend)
err['nndsvdar-nmf'].append(m.reconstruction_err_)
print m.reconstruction_err_, tend
gc.collect()
print "benching random-nmf"
tstart = time()
m = NMF(n_components=30, init=None, max_iter=1000,
tol=tolerance).fit(X)
tend = time() - tstart
timeset['random-nmf'].append(tend)
err['random-nmf'].append(m.reconstruction_err_)
print m.reconstruction_err_, tend
gc.collect()
print "benching alt-random-nmf"
tstart = time()
W, H = alt_nnmf(X, r=30, R=None, tol=tolerance)
tend = time() - tstart
timeset['alt-random-nmf'].append(tend)
err['alt-random-nmf'].append(np.linalg.norm(X - np.dot(W, H)))
print np.linalg.norm(X - np.dot(W, H)), tend
return timeset, err
if __name__ == '__main__':
from mpl_toolkits.mplot3d import axes3d # register the 3d projection
import matplotlib.pyplot as plt
samples_range = np.linspace(50, 500, 3).astype(np.int)
features_range = np.linspace(50, 500, 3).astype(np.int)
timeset, err = compute_bench(samples_range, features_range)
for i, results in enumerate((timeset, err)):
fig = plt.figure()
ax = fig.gca(projection='3d')
for c, (label, timings) in zip('rbgcm', sorted(results.iteritems())):
X, Y = np.meshgrid(samples_range, features_range)
Z = np.asarray(timings).reshape(samples_range.shape[0],
features_range.shape[0])
# plot the actual surface
ax.plot_surface(X, Y, Z, rstride=8, cstride=8, alpha=0.3,
color=c)
# dummy point plot to stick the legend to since surface plot do not
# support legends (yet?)
ax.plot([1], [1], [1], color=c, label=label)
ax.set_xlabel('n_samples')
ax.set_ylabel('n_features')
zlabel = 'time (s)' if i == 0 else 'reconstruction error'
ax.set_zlabel(zlabel)
ax.legend()
plt.show()