In [None]:
%matplotlib inline

In [None]:
import numpy as np
from time import time

rng = np.random.RandomState(int(time()))
m, n = 500, 1000

# Gaussian random matrix of JL transform
A = rng.randn(m, n) / np.sqrt(m)
stda = 1./np.sqrt(m)

import matplotlib.pyplot as plt
plt.grid(False)
plt.imshow(A, cmap=plt.cm.bwr, vmin=-3.*stda, vmax=3.*stda)
plt.colorbar()
plt.show()

#plt.rc('font', family='serif')
#plt.savefig('RandomMatrix.pdf', bbox_inches='tight', dpi=300)
#plt.savefig('RandomMatrix.eps', bbox_inches='tight', dpi=300)

In [None]:
# cross-correlation matrix resembles an identity matrix.
ATA = A.T.dot(A)

plt.grid(False)
plt.imshow(ATA, cmap=plt.cm.bwr, vmin=-1.2, vmax=1.2)
plt.colorbar()
plt.show()

In [None]:
# show the distribution of the cross-correlation
_,_,_ = plt.hist(ATA.ravel(), bins=50, range=(-1.2, 1.2), log=True, edgecolor='k')

In [None]:
# distribution of the relative errors of pairwise squared distances
num = 300
relerr = np.zeros(num)

t0 = time()
for i in range(num):
    x = rng.randn(n)
    f = A.dot(x)
    norm2x, norm2f = np.dot(x,x), np.dot(f,f)
    relerr[i] = (norm2f - norm2x) / norm2x
print('done in %.2fs.' % (time() - t0))

_,_,_ = plt.hist(relerr, bins=50, range=(-1.0, 1.0), edgecolor='k')

In [None]:
# generate a k-sparse vector
k = 3
stdx = 1.

x = np.zeros(n)
T = np.sort(rng.choice(n,k,replace=False))
print('True support of %d nonzeros = ' % (k))
print(T)
x[T] = rng.rand(k) * stdx

#ATAx = A.T.dot(A.dot(x))
ATAx = ATA.dot(x)

plt.figure()
plt.plot(np.arange(n), x, 'g.', markersize=8, mec='green', label='True')
plt.plot(np.arange(n), ATAx, 'ro', mfc = 'None', markersize=8, mec='red', mew=1, label='A.T.dot(A.dot(x))')
plt.legend(loc='upper right', shadow=False)
plt.tight_layout()
plt.show()

#plt.rc('font', family='serif')
#plt.savefig('ATAx.pdf', bbox_inches='tight', dpi=300)
#plt.savefig('ATAx.eps', bbox_inches='tight', dpi=300)

### sklearn equips the sparse random projection

In [None]:
from sklearn.random_projection import johnson_lindenstrauss_min_dim
from sklearn.random_projection import SparseRandomProjection

Asp = SparseRandomProjection(n_components=4, dense_output=True, density=1.0/3.0, random_state=0)
np.set_printoptions(precision=3, suppress=True)
print Asp.fit_transform(np.eye(10)).T

In [None]:
Asp = SparseRandomProjection(n_components=m, dense_output=True, random_state=0)
np.set_printoptions(precision=3, suppress=True)

# distribution of the relative errors of pairwise squared distances
num = 300
relerr = np.zeros(num)

t0 = time()
for i in range(num):
    x = rng.randn(n)
    f = Asp.fit_transform(x.reshape(1, -1)).ravel()
    norm2x, norm2f = np.dot(x,x), np.dot(f,f)
    relerr[i] = (norm2f - norm2x) / norm2x
print('done in %.2fs.' % (time() - t0))

_,_,_ = plt.hist(relerr, bins=50, range=(-1.0, 1.0), edgecolor='k')

### Efficient random projection [Sakai,09]

In [None]:
from scipy.fftpack import fft, ifft

# r: one row of random matrix (Gaussian white random vector of size n)
# d: vector of random signs of size n
r = np.random.randn(n) / np.sqrt(m)
fftrconj = np.conjugate(fft(r))
d = (2*np.random.randint(0,2,size=n)-1)
# circular convolution of r after random sign flip by d
erp = lambda x: ifft(fft(d*x)*fftrconj)[:m].real

# distribution of the relative errors of pairwise squared distances
num = 300
relerr = np.zeros(num)

t0 = time()
for i in range(num):
    x = rng.randn(n)
    f = erp(x)
    norm2x, norm2f = np.dot(x,x), np.dot(f,f)
    relerr[i] = (norm2f - norm2x) / norm2x
print('done in %.2fs.' % (time() - t0))

_,_,_ = plt.hist(relerr, bins=50, range=(-1.0, 1.0), edgecolor='k')