## Randomized Singular Value Decomposition

In [10]:
using LinearAlgebra

# Halko et. al.
function tsvd(A::AbstractMatrix, k::Integer, numiter::Integer, numoversample::Integer)
    m, n = size(A)
    Q = rand(n, k + numoversample)
    Q = A * Q
    for i = 1:numiter
        Q = transpose(transpose(Q) * A)
        Q = Matrix(qr(Q).Q)
        Q = A * Q
        Q = Matrix(qr(Q).Q)
    end
    QA = transpose(Q) * A
    Va, s, R = svd(transpose(QA), full=false)
    U = Q * transpose(R)
    U[:, 1:k], s[1:k], transpose(Va)[1:k, :]
end

tsvd (generic function with 1 method)

In [8]:
# Generate a rank 1 matrix
A = rand(2 ^ 12) * rand(2 ^ 10)'

# Perform truncated SVD
U, S, Vt = tsvd(A, 20, 4, 16)

# Calculate error
norm(A .- U * Diagonal(S) * Vt) / norm(A)

7.662047768214454e-16

In [9]:
# Do the same using builtin svd routine
U′, S′, Vt′ = svd(A)
norm(A .- U′ * Diagonal(S′) * transpose(Vt′)) / norm(A)

3.3717996541638573e-15

In the extreme case of a rank-1 matrix, the truncated svd has great accuracy.
Due the relatively high default tolerance settings on the bulitin svd routine, it even outperforms the builtin here.

In terms of runtime performance, the builtin svd is no match.

In [11]:
using BenchmarkTools

@benchmark tsvd(A, 20, 4, 16)

BenchmarkTools.Trial: 
  memory estimate:  26.95 MiB
  allocs estimate:  133
  --------------
  minimum time:     30.781 ms (0.00% GC)
  median time:      33.515 ms (5.03% GC)
  mean time:        34.796 ms (3.13% GC)
  maximum time:     52.626 ms (4.78% GC)
  --------------
  samples:          144
  evals/sample:     1

In [12]:
@benchmark svd(A)

BenchmarkTools.Trial: 
  memory estimate:  104.13 MiB
  allocs estimate:  13
  --------------
  minimum time:     238.864 ms (0.13% GC)
  median time:      245.052 ms (1.94% GC)
  mean time:        246.937 ms (1.83% GC)
  maximum time:     268.918 ms (1.73% GC)
  --------------
  samples:          21
  evals/sample:     1

### Block-Krylov SVD

In [55]:
# Musco et. al.
function bksvd(A, bsize=20, iter=4)
    l = ones(size(A, 1), 1)
    u = zeros(1, size(A, 2))
    
    K = zeros(size(A, 2), bsize * iter)
    # Random block initialization
    block = randn(size(A, 2), bsize)
    block = Matrix(qr(block).Q)
    # Preallocate space for temporary products
    T = zeros(size(A, 2), bsize)

    # Construct and orthonormalize Krylov subspace
    # Orthogonalize at each step using economy size QR decomposition
    for i = 1:iter
        T = A * block - l * (u * block)
        block = A' * T - u' * (l' * T)
        block = Matrix(qr(block).Q)
        K[:, ((i - 1) * bsize + 1):(i * bsize)] = block
    end
    Q = Matrix(qr(K).Q)
    
    # Rayleigh-Ritz postprocessing with economy size dense SVD
    T = A * Q - l * (u * Q)

    Ut, St, Vt = svd(T)
    U = Ut[:, 1:bsize]
    S = St[1:bsize]
    V = Q * Vt'
    U, S, V[:, 1:bsize]
end

bksvd (generic function with 3 methods)

In [57]:
U′, S′, V′ = bksvd(A, 20)
norm(A .- U′ * Diagonal(S′) * transpose(V′)) / norm(A)

7.050727967517019e-16