In [1]:
# Copyright (c) 2016, Vladimir Feinberg
# Licensed under the BSD 3-clause license (see LICENSE)

%matplotlib inline 
import matplotlib.pyplot as plt

import sys
import math

import contexttimer
import numpy as np
import scipy.linalg
import scipy.sparse.linalg

from runlmc.linalg.kronecker import Kronecker
from runlmc.linalg.sum_matrix import SumMatrix
from runlmc.linalg.toeplitz import Toeplitz
from runlmc.linalg.numpy_matrix import NumpyMatrix
import runlmc.util.testing_utils as utils

from scipy.fftpack import fft, ifft
from scipy.sparse.linalg import LinearOperator

def chan(t): # could be parallel...
    n = len(t)
    inc = np.arange(n)
    l = t * (n - inc)
    l = l.astype(float)
    r = t[::-1][:-1] * inc[1:]
    l[1:] += r
    return l / n

def inv_precond(lus, perm, inv_perm, x):
    D = lus[0][0].shape[0]
    x = x.astype('complex').reshape(D, -1)
    x = fft(x, overwrite_x=True).reshape(-1)
    x = x[perm].reshape(-1, D)
    x = np.hstack([scipy.linalg.lu_solve(factors, b) for factors, b in zip(lus, x)])
    x = x[inv_perm].reshape(D, -1)
    return ifft(x, overwrite_x=True).reshape(-1)

def stress_sum_solve(my_mat):
    b = np.random.rand(my_mat.shape[0])
    sz = len(b)
    np_mat = my_mat.as_numpy()
    linop = my_mat.as_linear_operator()

    tol = 1e-6 # min(np.finfo('float64').eps * sz * max(math.log(sz), 1) * 2, 1e-10)
    cond = np.linalg.cond(np_mat)
    print('    cond {} tol {:g}'.format(cond, tol))
    e = 1e-5
    
    Bs = np.array([x.A.A for x in my_mat.Ks])
    Bs = np.concatenate((Bs, [np.identity(len(Bs[0]))])) #?
    tops = np.array([x.B.top for x in my_mat.Ks])
    z = np.zeros_like(tops[0])
    z[0] += e
    tops = np.concatenate((tops, [z])) #?
    toep_blocks = np.tensordot(Bs, tops, axes=(0, 0))
    circ_pre = np.array([[chan(toep_blocks[i, j])
        for j in range(toep_blocks.shape[1])] for i in range(toep_blocks.shape[0])])                         
    circ_eigs = fft(circ_pre, overwrite_x=True) # applies to last axis
    rc = np.rollaxis(circ_eigs, -1, 0)
    lus = [scipy.linalg.lu_factor(dd, overwrite_a=True) for dd in rc]
    # the rolled axis perm
    perm=np.add.outer(np.arange(circ_eigs.shape[2]), np.arange(circ_eigs.shape[0]) * circ_eigs.shape[2]).ravel()
    inv_perm = np.zeros(len(perm), dtype=int)
    inv_perm[perm] = np.arange(len(perm))
    lx = lambda x: inv_precond(lus, perm, inv_perm, x).real
    pre = LinearOperator((sz, sz), matvec=lx, rmatvec=lx)

    def time_method(f):
        with contexttimer.Timer() as solve_time:
            solve, name = f()
        print('    {} sec {:8.4f} resid {:8.4e}'.format(
            name.rjust(20),
            solve_time.elapsed,
            np.linalg.norm(linop.matvec(solve) - b)))
    time_method(lambda: (np.linalg.solve(np_mat, b), 'linear solve'))

    def sparse():
        out, succ = scipy.sparse.linalg.cg(my_mat, b, tol=tol, maxiter=sz)
        return out, '{} sparse CG'.format('' if not succ else '*')
    time_method(sparse)
    
    def sparsep():
        out, succ = scipy.sparse.linalg.cg(my_mat, b, tol=tol, M=pre, maxiter=sz)
        return out, '{} sparse CG+P'.format('' if not succ else '*')
    time_method(sparsep)

    def minres():
        out, succ = scipy.sparse.linalg.minres(my_mat, b, tol=tol,
                                               maxiter=sz)
        return out, '{} sparse MINRES'.format('' if not succ else '*')
    time_method(minres)
    
    def minresp():
        out, succ = scipy.sparse.linalg.minres(my_mat, b, tol=tol,
                                               M=pre,
                                               maxiter=sz)
        return out, '{} sparse MINRES+P'.format('' if not succ else '*')
    time_method(minresp)

In [2]:
# prog n d q eps
sys.argv = ['', '1000', '4', '3', '1e-5']

print('* = no convergence')
utils.run_main(stress_sum_solve, '')

* = no convergence
size q 3 n 1000 d 4 eps 1e-05
random (well-cond) 
    cond 1664.9544339572756 tol 1e-06
            linear solve sec   0.7578 resid 1.8225e-05
               sparse CG sec   0.5626 resid 2.9578e-05
             sparse CG+P sec   0.5447 resid 8.5177e-06
           sparse MINRES sec   0.4947 resid 8.6360e-05
         sparse MINRES+P sec   0.3953 resid 1.9974e-03
linear decrease (poor-cond)
    cond 2738462.7709808256 tol 1e-06
            linear solve sec   0.7330 resid 3.6883e-05
               sparse CG sec  20.0079 resid 2.8188e-05
             sparse CG+P sec   0.2167 resid 3.4931e-05
           sparse MINRES sec  14.8719 resid 6.9144e-04
         sparse MINRES+P sec   0.2026 resid 4.6861e-05
exponentially decreasing (realistic)
    cond 41.97426199431347 tol 1e-06
            linear solve sec   0.8960 resid 4.3154e-05
               sparse CG sec   0.4345 resid 3.5697e-05
             sparse CG+P sec   0.3156 resid 9.5542e-06
           sparse MINRES sec   0.3471 

In [3]:
np.set_printoptions(precision=3)
ctr = 0
def i(_):
    global ctr
    ctr += 1

T = None
t = None
s = None
while True:
    t = np.random.randint(0, 100, 100)
    s = np.random.randint(0, 100, 100)
    t[::-1].sort()
    #t[0] = np.fabs(t[1:]).sum() + 1
    T = scipy.linalg.toeplitz(t)
    r = np.linalg.matrix_rank(T)
    print('rank', r)
    if r == 100:
        break

C = scipy.linalg.circulant(chan(t))
print('T^-1s')
b = (scipy.sparse.linalg.cg(T, s, tol=1e-10, maxiter=10000, callback=i)[0])
lcg = ctr
ctr =0
iC = np.linalg.inv(C)
lo = LinearOperator(C.shape, matvec=(lambda x: iC.dot(x)), rmatvec=(lambda x: iC.dot(x)))
ceig = fft(C[0])
def mv2(x):
    return ifft(fft(x) / ceig).real
lo2 = LinearOperator(C.shape, matvec=mv2, rmatvec=mv2)

c=(scipy.sparse.linalg.cg(T, s, tol=1e-10, M=lo, maxiter=10000, callback=i)[0])
pcg = ctr
ctr =0
cc=(scipy.sparse.linalg.cg(T, s, tol=1e-10, M=lo2, maxiter=10000, callback=i)[0])
pcg2 = ctr
ctr =0
print('LCG', lcg, 'PCG', pcg, 'PCG2', pcg2)
print('eq', np.linalg.norm(b- c), np.linalg.norm(b-cc))

rank 100
T^-1s
LCG 573 PCG 560 PCG2 563
eq 3.05476619573e-07 2.92568267283e-07


In [4]:
np.set_printoptions(precision=3)
ctr = 0
def i(_):
    global ctr
    ctr += 1

t = []
sz = 100
while len(t) < 2:
    tt = np.random.rand(sz)
    tt[::-1].sort()
    TT = scipy.linalg.toeplitz(tt)
    while True:
        r = np.linalg.matrix_rank(TT)
        if r == sz:
            break
        tt[0] *= 2
        TT = scipy.linalg.toeplitz(tt)

    t.append(tt)
        
s = np.random.randn(len(t) * len(t[0]))
Bs = np.array([[[3, 1], [1, 3]], [[2, 1], [1, 2]]])
tops = np.array(t)
tb = np.tensordot(Bs, tops, axes=(0, 0))

full = sum(np.kron(b, scipy.linalg.toeplitz(t)) for b, t in zip(Bs, t))
print(tb.shape)
circ_pre = np.array([[chan(tb[i, j])
    for j in range(tb.shape[1])] for i in range(tb.shape[0])])

C = np.bmat([[scipy.linalg.circulant(circ_pre[i, j])
    for j in range(tb.shape[1])] for i in range(tb.shape[0])]).A

b = (scipy.sparse.linalg.cg(full, s, tol=1e-10, maxiter=10000, callback=i)[0])
lcg = ctr
ctr =0

iC = np.linalg.inv(C)
lo = LinearOperator(C.shape, matvec=(lambda x: iC.dot(x)), rmatvec=(lambda x: iC.dot(x)))
ceig = fft(circ_pre)
rc = np.rollaxis(ceig, -1, 0)
lus = [scipy.linalg.lu_factor(dd, overwrite_a=True) for dd in rc]
# the rolled axis perm
perm=np.add.outer(np.arange(ceig.shape[2]), np.arange(ceig.shape[0]) * ceig.shape[2]).ravel()
inv_perm = np.zeros(len(perm), dtype=int)
inv_perm[perm] = np.arange(len(perm))

def mv2(x):
    return inv_precond(lus, perm, inv_perm, x).real
lo2 = LinearOperator(C.shape, matvec=mv2, rmatvec=mv2)

c=(scipy.sparse.linalg.cg(full, s, tol=1e-10, M=lo, maxiter=10000, callback=i)[0])
pcg = ctr
ctr =0
cc=(scipy.sparse.linalg.cg(full, s, tol=1e-10, M=lo2, maxiter=10000, callback=i)[0])
pcg2 = ctr
ctr =0
print('LCG', lcg, 'PCG', pcg, 'PCG2', pcg2)
print('eq', np.linalg.norm(b- c), np.linalg.norm(b-cc))

(2, 2, 100)
LCG 7335 PCG 3817 PCG2 3403
eq 2.63652168501e-05 2.04259999591e-05


In [None]:
# Copyright (c) 2016, Vladimir Feinberg
# Licensed under the BSD 3-clause license (see LICENSE)

# pylint: skip-file

import sys

import contexttimer
import numpy as np
import scipy.linalg
import scipy.spatial.distance
import scipy.sparse.linalg

from runlmc.approx.interpolation import multi_interpolant
from runlmc.kern.rbf import RBF
from runlmc.kern.matern32 import Matern32
from runlmc.kern.std_periodic import StdPeriodic
from runlmc.models.lmc import LMC
from runlmc.derivative.lmc_deriv import ExactLMCDerivative, ApproxLMCDerivative

_HELP_STR = """
Usage: python bench.py n_o d q eps [kern] [seed]

n_o > 7 is the number of inputs per output
d > 0 is the number of outputs
q > 0 is the number of LMC kernel terms
eps > 0 is the constant diagonal perturbation mean (a float)
kern is the kernel type, default rbf, one of 'rbf' 'periodic' 'matern' 'mix'
seed is the random seed, default 1234

For all benchmarks, this constructs a variety of LMC kernels,
all of which conform to the parameters n_o,d,q,eps specified
above. The particular kernel constructed is the sum of q ICM
terms:

  Aq = aa^T, a ~ Normal(mean=0, cov=I)
  kappa ~ vector of InverseGamma(shape=1, scale=1)
  Bq = Aq + kappa I
  Kq = one of RBF, Matern32, StdPeriodic applied to inputs
  entire term: HadamardProduct(KroneckerProduct(Bq, 1), Kq

Finally, we add independent noise for each output, sampled
from InverseGamma(shape=(1 + eps^-1), scale=1)

Choose q = d = 1 and n large to test Toeplitz, mainly
Choose q = 1 and n ~ d^2 > 7 to test Kronecker, mainly

Inputs/outputs are random and uniform in (0, 1). The interpolation grid
used by the SKI approximation is a grid with n_o datapoints.
"""

def _main():
    """Runs the benchmarking program."""
    min_args = 5
    max_args = min_args + 2
    if len(sys.argv) not in range(min_args, max_args + 1):
        print(_HELP_STR)
        sys.exit(1)

    n_o = int(sys.argv[1])
    d = int(sys.argv[2])
    q = int(sys.argv[3])
    eps = float(sys.argv[4])
    kern = sys.argv[5] if len(sys.argv) > 5 else 'rbf'
    seed = int(sys.argv[6]) if len(sys.argv) > 6 else 1234
    kerntypes = ['rbf', 'periodic', 'matern', 'mix']

    assert n_o > 7
    assert d > 0
    assert q > 0
    assert eps > 0
    assert kern in kerntypes
    np.random.seed(seed)
    n = n_o * d

    print('n_o {} d {} q {} eps {} kern {} seed {}'.format(
        n_o, d, q, eps, kern, seed))

    coreg_vecs = np.random.randn(q, d)
    coreg_diags = np.reciprocal(np.random.gamma(shape=1, scale=1, size=(q, d)))
    noise = np.reciprocal(np.random.gamma(
        shape=(1 + (1 / eps)), scale=1, size=d))
    kernels = gen_kernels(q)
    descriptions = [
        'rbf only - inverse lengthscales in logspace(0, 1, q)',
        'periodic only - inverse lengthscale is 1, periods in logspace',
        'matern32 only - invers lenngthscales in logspace',
        'mixed - all the above, with lengthscales/periods in'
        ' logspace(0, 1, max(q // 3, 1)']
    kdict = {k_name: (k, desc) for k_name, k, desc in
             zip(kerntypes, kernels, descriptions)}

    Xs, Ys = np.random.rand(2, d, n_o)

    dists, grid_dists, interpolant, interpolant_T = prep(d, n_o, Xs)


    print()
    k, desc = kdict[kern]
    run_kernel_benchmark(
        k, desc, coreg_vecs, coreg_diags, noise, Xs, np.hstack(Ys),
        dists, grid_dists, interpolant, interpolant_T)

def prep(d, n_o, Xs):
    # Replicates LMC (runlmc.models.lmc) code minimally.
    with contexttimer.Timer() as exact:
        dists = scipy.spatial.distance.pdist(Xs.reshape(-1, 1))
        dists = scipy.spatial.distance.squareform(dists)
    with contexttimer.Timer() as apprx:
        grid, m = LMC._autogrid(Xs, lo=None, hi=None, m=None)
        grid_dists = grid - grid[0]
        interpolant = multi_interpolant(Xs, grid)
        interpolantT = interpolant.transpose().tocsr()

    print()
    print('preparation time (once per optimization)')
    print('    {:8.4f} sec exact - pairwise distances'.format(exact.elapsed))
    print('    {:8.4f} sec apprx - linear interpolation'.format(apprx.elapsed))
    return dists, grid_dists, interpolant, interpolantT

def run_kernel_benchmark(
        kernels, desc, coreg_vecs, coreg_diags, noise, Xs, y,
        dists, grid_dists, interpolant, interpolantT):
    print(desc)
    lens = [len(X) for X in Xs]

    with contexttimer.Timer() as t:
        exact = ExactLMCDerivative(
            coreg_vecs, coreg_diags, kernels, dists, lens, y, noise)
    eigs = np.fabs(np.linalg.eigvalsh(exact.K))
    print('    covariance matrix info')
    print('        largest  eig        {:8.4e}'.format(eigs.max()))
    print('        smallest eig        {:8.4e}'.format(eigs.min()))
    print('        l2 condition number {:8.4e}'.format(eigs.max() / eigs.min()))
    print('    matrix materialization/inversion time')
    print('        {:10.4f} sec exact - cholesky'.format(t.elapsed))

    with contexttimer.Timer() as t:
        apprx = ApproxLMCDerivative(
            coreg_vecs, coreg_diags, kernels, grid_dists,
            interpolant, interpolantT, lens, y, noise)
    print('        {:10.4f} sec apprx - solve K*alpha=y'.format(t.elapsed))

    matrix_diff = np.fabs(apprx.ski.as_numpy() - exact.K).mean()
    print('        {:9.4e} |K_exact - K_apprx|_1 / n^2'.format(matrix_diff))
    alpha_diff = np.fabs(apprx.deriv.alpha - exact.deriv.alpha).mean()
    print('        {:9.4e} |alpha_exact - alpha_apprx|_1 / n'
          .format(alpha_diff))

    

    def check_grads(f, name):
        with contexttimer.Timer() as t:
            exact_kgrad = f(exact)
        ngrad = sum(map(len, exact_kgrad))
        print('    {} gradients # {}'.format(name, ngrad))
        print('        {:10.4f} sec exact per gradient'
              .format(t.elapsed / ngrad))
        tot_exact_time = t.elapsed
        with contexttimer.Timer() as t:
            apprx_kgrad = f(apprx)
        assert ngrad == sum(map(len, apprx_kgrad))
        print('        {:10.4f} sec apprx per gradient'
              .format(t.elapsed / ngrad))
        tot_apprx_time = t.elapsed
        exact_kgrad = np.hstack(exact_kgrad)
        apprx_kgrad = np.hstack(exact_kgrad)
        err = exact_kgrad - apprx_kgrad
        print('        {:9.4e} avg grad error'.format(np.fabs(err).mean()))
        print('        {:9.4e} avg signed error'.format(err.mean()))
        return err, tot_exact_time, tot_apprx_time

    gradient_type = [
        (lambda x: x.kernel_gradients(), 'kernel'),
        (lambda x: x.coreg_vec_gradients(), 'coregionalization Aq'),
        (lambda x: x.coreg_diag_gradients(), 'coregionalization kappa'),
        (lambda x: [x.noise_gradient()], 'noise')]

    errs = np.array([])
    tot_exact_time = 0
    tot_apprx_time = 0
    for f, name in gradient_type:
        err, exact_time, apprx_time = check_grads(f, name)
        errs = np.append(errs, err)
        tot_exact_time += exact_time
        tot_apprx_time += apprx_time

    print('    total gradient runtime summary')
    print('        {:10.4f} sec exact all gradients'.format(tot_exact_time))
    print('        {:10.4f} sec apprx all gradients'.format(tot_apprx_time))
    print('        {:9.4e} avg grad error'.format(np.fabs(errs).mean()))
    print('        {:9.4e} avg signed error'.format(errs.mean()))



def gen_kernels(q):
    kern_funcs = [RBF, lambda period: StdPeriodic(1, period), Matern32]
    kernels = [[kfunc(gamma)
                for gamma in np.logspace(0, 1, q)]
               for kfunc in kern_funcs]
    kernels.append([kfunc(gamma)
                    for gamma in np.logspace(0, 1, max(q // 3, 1))
                    for kfunc in kern_funcs])
    return kernels

if __name__ == '__main__':
    _main()
