In [13]:
using LightGraphs, RCall, LightGraphsFlows, Suppressor, Gadfly, DataFrames, CSV

In [110]:
R"library('genlasso')";
include("code/BGSM_general.jl");
include("code/BGSM_biclust.jl");
include("code/model_selection.jl");
include("code/l0pen.jl");

In [18]:
function get_data(n,s; signal = 5)
    g = PathGraph(n);
    D = -incidence_matrix(g, oriented = true)';
    J = pinv(full(D)) + 1/n;
    delta = signal * [zeros(n-1-s);randn(s)]
    theta = J * delta;
    y = theta + randn(n);
    return Dict([(:y, y), (:theta, theta)])
end

get_data (generic function with 1 method)

In [19]:
function BGSM(y, D)
    v0_range = 1e-2 * 2.^(0:9); a = Inf; bgsm1 = 0; bgsm2 = 0;
    t = 0;
    for i = 1:length(v0_range)
        tic();
        temp = BGSM_general(y,D, v0 = v0_range[i], v1 = 1e4, B = n, b = sum(size(D)),
                                    convtol = 1e-8, iter = 100, verbose = false);
        t = t + toc();
        bgsm_ms = model_selection(temp);
        if bgsm_ms[:score] < a
            a = bgsm_ms[:score];
            bgsm1 = temp[:alpha] + temp[:theta];
            bgsm2 = bgsm_ms[:t_full];
        end
    end
    
    return Dict([(:1, bgsm1), (:2, bgsm2), (:t, t)])
end

BGSM (generic function with 1 method)

In [20]:
function sim(n; s = floor(Int,n/20), repnum = 10, signal = 5)
    mse_bgsm = zeros(repnum); mse_fl = zeros(repnum); mse_l0 = zeros(repnum);
    mse_mean = zeros(3); t = zeros(3);
    changes = zeros(3);

    for i = 1:repnum
        srand(i);
        d = get_data(n,s; signal = signal); y = d[:y]; theta = d[:theta];
        tic();
        @rput y;
        R"out =  trendfilter(y, ord=0);";
        t[2] = t[2] + toc();
        R"cv = cv.trendfilter(out);
                 out_fl = coef(out, lambda=cv$lambda.min, verbose = FALSE)$beta;";
        @rget out_fl;
        
        g = PathGraph(n);
        D = -incidence_matrix(g, oriented = true)';
    
        out_bgsm = BGSM(y, D);
        t[1] = t[1] + out_bgsm[:t];
        
        tic();
        out_l0 = local_minimizer(y,g, lambda = (1 + log(n/s)) * 3, delta = 10/sqrt(n));
        t[3] = t[3] + toc();
        
        changes[1] += sum(abs.(D*out_bgsm[:2]) .> 1e-8);
        changes[2] += sum(abs.(D*out_fl) .> 1e-8);
        changes[3] += sum(abs.(D*out_l0) .> 1e-8);
        
        mse_bgsm[i] = norm(theta - out_bgsm[:2])^2/n;
        mse_fl[i] = norm(theta - out_fl)^2/n;
        mse_l0[i] = norm(theta - out_l0)^2/n;
        
        mse_mean = [mean(mse_bgsm); mean(mse_fl); mean(mse_l0)];
        
    end
    
    return Dict([(:bgsm, mse_bgsm), (:fuzedlasso, mse_fl),
                 (:penl0, mse_l0), (:mean, mse_mean), (:t, t), (:c, changes/(n-1)/repnum) ])
end

sim (generic function with 1 method)

In [41]:
res = zeros(10, 3); c = zeros(10,3); temp = 0; n = 0;
t = zeros(10,3)
for i = 1:8
    @suppress begin
        n = floor(Int,sqrt(3)^(i+5)); s = 5;
        temp = sim(n; s = s, repnum = 5); # for the plot we did repnum = 10
        res[i,:] = temp[:mean];
        c[i,:] = temp[:c];
        t[i,:] = temp[:t];
    end
    println(temp[:t])
end

[0.0397346, 0.290495, 0.0815291]
[0.0320997, 0.494194, 0.262053]
[0.0582738, 0.821921, 0.62852]
[0.0958751, 1.28794, 2.21527]
[0.185541, 2.55636, 6.20228]
[0.509778, 4.18042, 21.6173]
[0.652593, 7.67377, 91.5189]
[1.95356, 17.2372, 341.126]


In [42]:
res

10×3 Array{Float64,2}:
 1.5062     1.47754    1.03762  
 0.545365   0.956705   0.577066 
 0.425391   0.557889   0.404794 
 0.243046   0.349741   0.200504 
 0.181307   0.207293   0.193844 
 0.0486524  0.122058   0.126125 
 0.0266966  0.0772873  0.0581294
 0.027755   0.0478158  0.0330306
 0.0        0.0        0.0      
 0.0        0.0        0.0      

In [50]:
x = floor.(Int,sqrt(3).^((1:10)+5));
r1 = DataFrame(x = repeat(x[1:8], outer = 3), y = res[1:8,:][:],
                t = t[1:8,:][:], c = c[1:8,:][:],
                method = repeat(["BGSM";"Genlasso";"L0pen"], inner = 8));

In [51]:
p1 = Gadfly.plot(r1, x = :x, y = :y, color = :method, shape = :method, Geom.point, Geom.line,
                Scale.x_log10, Scale.y_log10, Coord.cartesian(xmin = 1, xmax = 3.6),
                Guide.title("mean squared error"), Guide.xlabel("n"), Guide.ylabel("mse"),
                Theme(point_size = 4pt, key_position = :none),
                shape = repeat([Shape.square, Shape.utriangle, Shape.cross], inner = 10));
p2 = Gadfly.plot(r1, x = :x, y = :t, color = :method, shape = :method, Geom.point, Geom.line,
                Scale.x_log10, Scale.y_log10, Coord.cartesian(xmin = 1, xmax = 3.6),
                Guide.title("computation time"), Guide.xlabel("n"), Guide.ylabel("time(sec)"),
                Theme(point_size = 4pt),
                shape = repeat([Shape.square, Shape.utriangle, Shape.cross], inner = 10))
p3 = Gadfly.plot(r1, x = :x, y = :c, color = :method, shape = :method, Geom.point, Geom.line,
                Scale.x_log10, Scale.y_log10, Coord.cartesian(xmin = 1, xmax = 3.6),
                Guide.title("number of changes"), Guide.xlabel("n"), Guide.ylabel("time(sec)"),
                Theme(point_size = 4pt, key_position = :none),
                shape = repeat([Shape.square, Shape.utriangle, Shape.cross], inner = 10));

In [52]:
CSV.write("result/linearpath1.txt", r1, delim = ',');

In [53]:
set_default_plot_size(45cm, 13cm)
F9 = title(hstack(compose(context(0, 0, 14cm, 13cm), render(p1)),
    compose(context(0, 0, 14cm, 13cm), render(p3)),
    compose(context(0, 0, 16cm, 13cm), render(p2))), "linear path graph")
Gadfly.draw(PDF("F9.pdf"), F9);