In [45]:
using DataFrames, PyPlot, LightGraphs, Cairo, Fontconfig, Colors, CSV, RCall, Gadfly

In [46]:
include("code/BGSM_effective_resistance.jl");
include("code/model_selection.jl");

In [47]:
using PyCall, Suppressor
@pyimport GraphSegment
@pyimport numpy

In [48]:
edge = CSV.read("data/email-Enron.txt", delim = '	', header = false);

In [49]:
p = maximum(Matrix(edge)) + 1;
m = size(edge,1);
G = SimpleGraph(p);
for i = 1:m
    add_edge!(G,(edge[i,1] + 1, edge[i,2] + 1));
end
largestcomp = connected_components(G)[1]
G = G[largestcomp];
G = G[3001:10000]
node = connected_components(G)[3];
G = G[node];

In [56]:
D   = -incidence_matrix(G, oriented = true)';
R   = effective_resistance(G, D);
m,p = size(D)
edges = repeat([(0,0)],m);
for i = 1:m
    temp = find(D[i,:] .!= 0);
    edges[i] = (temp[1]-1, temp[2]-1);
end
tind = find(abs.(D * col) .> 1e-8);

In [51]:
srand(4)
src = rand(1:p, 4); distance = zeros(p,4); col = zeros(Int, p);
for i = 1:4
    temp = dijkstra_shortest_paths(G, src[i]).dists;
    distance[:,i] = temp;
end
for i = 1:p
    col[i] = findmin(distance[i,:])[2]
end
col = [1;3;4;2][col];
println(sum(col .== 1)," ",sum(col .== 2)," ", sum(col .== 3)," ", sum(col .== 4))

1659 384 538 1531


In [8]:
srand(1);
y = 1 * col + rand(length(col));
bgsm = zeros(length(y),20);
for i = 1:20
    res1      = BGSM_effective_resistance(y,D,R, v0 = 1e-2 * 1.4^i, b = size(D,1), verbose = false);
    mss1      = model_selection(res1);
    bgsm[:,i] = mss1[:beta]
    println("score: ", mss1[:score]," and # of comp: ",length(mss1[:c]))
end

In [None]:
score: 20009.766141432487 and # of comp: 46
score: 19936.16843310993 and # of comp: 20
score: 19886.386503137142 and # of comp: 8
score: 19870.209142829597 and # of comp: 4
score: 19859.981404209735 and # of comp: 2
score: 19860.984179017654 and # of comp: 2
score: 19860.602914413852 and # of comp: 2
score: 19866.01906018282 and # of comp: 2
score: 20581.06866170354 and # of comp: 1
score: 20581.06866170354 and # of comp: 1
score: 20581.06866170354 and # of comp: 1
score: 20581.06866170354 and # of comp: 1
score: 20581.06866170354 and # of comp: 1
score: 20581.06866170354 and # of comp: 1
score: 20581.06866170354 and # of comp: 1
score: 20581.06866170354 and # of comp: 1
score: 20581.06866170354 and # of comp: 1
score: 20581.06866170354 and # of comp: 1
score: 20581.06866170354 and # of comp: 1
score: 20581.06866170354 and # of comp: 1

In [77]:
srand(1);
y         = 1 * col +  rand(length(col));
res1      = BGSM_effective_resistance(y,D,R, v0 = 1e-2 * 1.4^5, b = size(D,1), verbose = false);
mss1      = model_selection(res1);
println("score: ", mss1[:score]," and # of comp: ",length(mss1[:c]))
nzind     = find(abs.(D * mss1[:beta]) .> 1e-8);
tp        = length(findin(tind, nzind));
println("mse: ", norm(y - mss1[:beta])^2/p," fdr: ", 1 - tp/length(nzind)," power : ", tp/length(tind))

score: 19859.981404209735 and # of comp: 2
mse: 0.2648029492610089 fdr: 0.0058021467943139005 power : 0.749890590809628


In [64]:
srand(1)
y = 1 * col +  rand(length(col));
l0pen = 0;
for i = 1:20
    l0pen = GraphSegment.GraphSegment(y,edges, 1.5^(8-i), weights = R, delta = 1, verbose = false);
    nzind = find(abs.(D * l0pen) .> 1e-8);
    tp    = length(findin(tind, nzind));
    println("mse: ", norm(y-l0pen)^2/p," fdr: ", 1 - tp/length(nzind)," power : ", tp/length(tind))
end

mse: 1.803684347814692 fdr: 0.9090909090909091 power : 0.0002188183807439825
mse: 1.7616890387528121 fdr: 0.782608695652174 power : 0.0010940919037199124
mse: 1.6780536124529895 fdr: 0.5238095238095238 power : 0.0087527352297593
mse: 1.55197001136238 fdr: 0.5464480874316939 power : 0.018161925601750548
mse: 0.49790297357876967 fdr: 0.1674047829937998 power : 0.4113785557986871
mse: 0.32802673343969574 fdr: 0.05290659699542788 power : 0.6345733041575492
mse: 0.2765682726117164 fdr: 0.029376135675348247 power : 0.7013129102844639
mse: 0.2521988222977717 fdr: 0.03209019947961844 power : 0.7326039387308534
mse: 0.22852374685224738 fdr: 0.07449392712550607 power : 0.750328227571116
mse: 0.16891223852053336 fdr: 0.22699256110520727 power : 0.7958424507658644
mse: 0.11285177301077297 fdr: 0.3829956930929973 power : 0.8463894967177243
mse: 0.07796727522524105 fdr: 0.470595939751146 power : 0.8844638949671773
mse: 0.06187959186014932 fdr: 0.5059502344031734 power : 0.899343544857768
mse: 0.0548

In [98]:
srand(1)
y = 1 * col +  rand(length(col));
B     = 5;
err   = zeros(20, B);
out_l0pen = 0;
srand(1);
for b = 1:B
    println(b);
    for i = 1:20
        out_l0pen = GraphSegment.GraphSegment(y,edges, 1.5^(8-i), delta = 1, weights = R, verbose = false);
        sigma     = 1;
        alpha     = 0.2;
        z         = alpha * sigma * randn(p);
        y1        = y + z;
        y2        = y - z/alpha^2;
        mu1       = GraphSegment.GraphSegment(y1,edges, 1.5^(8-i), delta = 1, weights = R, verbose = false);
        err[i,b]  = norm(y2 - mu1)^2/p;
    end
end

1
2
3
4
5


In [104]:
l0pen = GraphSegment.GraphSegment(y,edges, 1.5^(8-findmin(sum(err,2))[2]), delta = 1, weights = R, verbose = false)
nzind = find(abs.(D * l0pen) .> 1e-8);
tp    = length(findin(tind, nzind));
println("mse: ", norm(y-l0pen)^2/p," fdr: ", 1 - tp/length(nzind)," power : ", tp/length(tind))

mse: 0.32802673343969574 fdr: 0.05290659699542788 power : 0.6345733041575492


In [69]:
srand(1);
y = 2 * col +  rand(length(col));
bgsm = zeros(length(y),20);
for i = 1:20
    res1      = BGSM_effective_resistance(y,D,R, v0 = 1e-3 * 2 * 1.4^i, b = size(D,1), verbose = false);
    mss1      = model_selection(res1);
    bgsm[:,i] = mss1[:beta]
    println("score: ", mss1[:score]," and # of comp: ", length(mss1[:c]))
end

score: 20394.74554138314 and # of comp: 146
score: 20023.749320630253 and # of comp: 62
score: 19816.6313909981 and # of comp: 14
score: 19773.748526045358 and # of comp: 4
score: 19773.748526045358 and # of comp: 4
score: 19773.748526045358 and # of comp: 4
score: 19773.748526045358 and # of comp: 4
score: 19774.416130740883 and # of comp: 4
score: 19990.164364576594 and # of comp: 3
score: 20037.936236023004 and # of comp: 4
score: 20087.03289574866 and # of comp: 6
score: 20145.183526472778 and # of comp: 7
score: 20138.636928797154 and # of comp: 4
score: 20138.124104856062 and # of comp: 3
score: 20155.19758343638 and # of comp: 2
score: 22003.619064450195 and # of comp: 1
score: 22013.39598715098 and # of comp: 3
score: 22003.619064450195 and # of comp: 1
score: 22003.619064450195 and # of comp: 1
score: 22003.619064450195 and # of comp: 1


In [None]:
score: 21692.471391903568 and # of comp: 433
score: 21015.80155721818 and # of comp: 284
score: 20412.460436867914 and # of comp: 150
score: 20037.083720790077 and # of comp: 65
score: 19825.19904217826 and # of comp: 16
score: 19780.415174231868 and # of comp: 5
score: 19773.748526045358 and # of comp: 4
score: 19773.748526045358 and # of comp: 4
score: 19773.748526045358 and # of comp: 4
score: 19774.416130740883 and # of comp: 4
score: 19990.164364576594 and # of comp: 3
score: 20037.936236023004 and # of comp: 4
score: 20087.03289574866 and # of comp: 6
score: 20143.015211438473 and # of comp: 6
score: 20138.636928797154 and # of comp: 4
score: 20138.124104856062 and # of comp: 3
score: 20154.03579986702 and # of comp: 2
score: 22003.619064450195 and # of comp: 1
score: 22018.523964046784 and # of comp: 4
score: 22003.619064450195 and # of comp: 1

In [78]:
srand(1);
y         = 2 * col +  rand(length(col));
res1      = BGSM_effective_resistance(y,D,R, v0 = 1e-3 * 2 * 1.4^5, b = size(D,1), verbose = false);
mss1      = model_selection(res1);
println("score: ", mss1[:score]," and # of comp: ",length(mss1[:c]))
nzind     = find(abs.(D * mss1[:beta]) .> 1e-8);
tp        = length(findin(tind, nzind));
println("mse: ", norm(y-mss1[:beta])^2/p," fdr: ", 1 - tp/length(nzind)," power : ", tp/length(tind))

score: 19773.748526045358 and # of comp: 4
mse: 0.08387687951803596 fdr: 0.0 power : 1.0


In [63]:
srand(1)
y = 2 * col +  rand(length(col));
l0pen = 0;
for i = 1:20
    l0pen = GraphSegment.GraphSegment(y,edges, 1.5^(8-i), weights = R, delta = 1, verbose = false);
    nzind = find(abs.(D * l0pen) .> 1e-8);
    tp    = length(findin(tind, nzind));
    println("mse: ", norm(y-l0pen)^2/p," fdr: ", 1 - tp/length(nzind)," power : ", tp/length(tind))
end

mse: 4.6359864410989475 fdr: 0.37768817204301075 power : 0.10131291028446389
mse: 1.040814872589927 fdr: 0.047940797940797975 power : 0.6474835886214442
mse: 0.5668778372490905 fdr: 0.05054454063110858 power : 0.7439824945295405
mse: 0.40214460426896337 fdr: 0.038738979428266096 power : 0.787308533916849
mse: 0.2851000585996561 fdr: 0.034403080872913994 power : 0.8229759299781182
mse: 0.1754446511017759 fdr: 0.0366116295764537 power : 0.8809628008752736
mse: 0.12359331632725205 fdr: 0.02809502055733215 power : 0.9310722100656456
mse: 0.09949824125394781 fdr: 0.03829225352112675 power : 0.9562363238512035
mse: 0.08520381575118657 fdr: 0.10950371709865381 power : 0.9698030634573304
mse: 0.06682641994995761 fdr: 0.2363666778186685 power : 0.9989059080962801
mse: 0.05221676790006594 fdr: 0.3452722063037249 power : 1.0
mse: 0.03666669219518331 fdr: 0.43208649186032067 power : 1.0
mse: 0.027631196216344557 fdr: 0.4782509418883434 power : 1.0
mse: 0.023701211153300195 fdr: 0.4983534577387486 

In [117]:
srand(1)
y = 2 * col +  rand(length(col));
B     = 5;
err   = zeros(20, B);
out_l0pen = 0;
srand(1);
for b = 1:B
    println(b);
    for i = 1:20
        out_l0pen = GraphSegment.GraphSegment(y,edges, 1.5^(8-i), delta = 1, weights = R, verbose = false);
        sigma     = 1;
        alpha     = 0.2;
        z         = alpha * sigma * randn(p);
        y1        = y + z;
        y2        = y - z/alpha^2;
        mu1       = GraphSegment.GraphSegment(y1,edges, 1.5^(8-i), delta = 1, weights = R, verbose = false);
        err[i,b]  = norm(y2 - mu1)^2/p;
    end
end
l0pen = GraphSegment.GraphSegment(y,edges, 1.5^(8-findmin(sum(err,2))[2]), delta = 1, weights = R, verbose = false)
nzind = find(abs.(D * l0pen) .> 1e-8);
tp    = length(findin(tind, nzind));
println("mse: ", norm(y-l0pen)^2/p," fdr: ", 1 - tp/length(nzind)," power : ", tp/length(tind))

1
2
3
4
5
mse: 0.06682641994995761 fdr: 0.2363666778186685 power : 0.9989059080962801


In [71]:
srand(1);
y = 3 * col +  rand(length(col));
bgsm = zeros(length(y),20);
for i = 1:20
    res1      = BGSM_effective_resistance(y,D,R, v0 = 1e-3 * 2 * 1.4^i, b = size(D,1), verbose = false);
    mss1      = model_selection(res1);
    bgsm[:,i] = mss1[:beta]
    println("score: ", mss1[:score]," and # of comp: ",length(mss1[:c]))
end

score: 19799.687925568465 and # of comp: 10
score: 19773.748526045794 and # of comp: 4
score: 19773.748526045794 and # of comp: 4
score: 19773.748526045794 and # of comp: 4
score: 19773.748526045794 and # of comp: 4
score: 19773.748526045794 and # of comp: 4
score: 19773.748526045794 and # of comp: 4
score: 19773.748526045794 and # of comp: 4
score: 19773.748526045794 and # of comp: 4
score: 19784.95896315207 and # of comp: 4
score: 19926.754417525037 and # of comp: 7
score: 20353.519720156575 and # of comp: 7
score: 20525.13712634944 and # of comp: 6
score: 20528.65737695159 and # of comp: 5
score: 20547.570053147923 and # of comp: 9
score: 20550.220763536636 and # of comp: 5
score: 20605.099021030703 and # of comp: 6
score: 23360.10321344413 and # of comp: 35
score: 23248.94543055043 and # of comp: 1
score: 23248.94543055043 and # of comp: 1


In [79]:
srand(1);
y         = 3 * col +  rand(length(col));
res1      = BGSM_effective_resistance(y,D,R, v0 = 1e-3 * 2 * 1.4^5, b = size(D,1), verbose = false);
mss1      = model_selection(res1);
println("score: ", mss1[:score]," and # of comp: ",length(mss1[:c]))
nzind     = find(abs.(D * mss1[:beta]) .> 1e-8);
tp        = length(findin(tind, nzind));
println("mse: ", norm(y-mss1[:beta])^2/p," fdr: ", 1 - tp/length(nzind)," power : ", tp/length(tind))

score: 19773.748526045794 and # of comp: 4
mse: 0.08387687951880218 fdr: 0.0 power : 1.0


In [None]:
score: 20285.88402593696 and # of comp: 121
score: 19898.489455920884 and # of comp: 33
score: 19799.687925568465 and # of comp: 10
score: 19773.748526045794 and # of comp: 4
score: 19773.748526045794 and # of comp: 4
score: 19773.748526045794 and # of comp: 4
score: 19773.748526045794 and # of comp: 4
score: 19773.748526045794 and # of comp: 4
score: 19773.748526045794 and # of comp: 4
score: 19773.748526045794 and # of comp: 4
score: 19773.748526045794 and # of comp: 4
score: 19782.050487646295 and # of comp: 4
score: 19888.57011529519 and # of comp: 5
score: 20353.519720156575 and # of comp: 7
score: 20525.13712634944 and # of comp: 6
score: 20528.65737695159 and # of comp: 5
score: 20547.570053147923 and # of comp: 9
score: 20550.220763536636 and # of comp: 5
score: 20614.067214498984 and # of comp: 8
score: 23370.166241295923 and # of comp: 37

In [62]:
srand(1)
y = 3 * col +  rand(length(col));
l0pen = 0;
for i = 1:20
    l0pen = GraphSegment.GraphSegment(y,edges, 1.5^(8-i), weights = R, delta = 1, verbose = false);
    nzind = find(abs.(D * l0pen) .> 1e-8);
    tp    = length(findin(tind, nzind));
    println("mse: ", norm(y-l0pen)^2/p," fdr: ", 1 - tp/length(nzind)," power : ", tp/length(tind))
end

mse: 1.0518294913817359 fdr: 0.04892205638474301 power : 0.7529540481400437
mse: 0.6457127117804858 fdr: 0.03722019781363872 power : 0.8094091903719912
mse: 0.4389786575765318 fdr: 0.031786074672048414 power : 0.8398249452954049
mse: 0.23042434309604537 fdr: 0.02686778222955455 power : 0.9035010940919037
mse: 0.13198465699728584 fdr: 0.010662431941923733 power : 0.9542669584245077
mse: 0.10098025189209565 fdr: 0.010685663401602818 power : 0.9724288840262582
mse: 0.08612444200520927 fdr: 0.012179208351457138 power : 0.9938730853391685
mse: 0.08071760972331984 fdr: 0.03627161535217205 power : 1.0
mse: 0.07591919553019674 fdr: 0.1060250391236307 power : 1.0
mse: 0.06626400597221509 fdr: 0.2365519545606415 power : 1.0
mse: 0.05221676790006594 fdr: 0.3452722063037249 power : 1.0
mse: 0.03666669219518331 fdr: 0.43208649186032067 power : 1.0
mse: 0.027631196216344557 fdr: 0.4782509418883434 power : 1.0
mse: 0.023701211153300195 fdr: 0.4983534577387486 power : 1.0
mse: 0.022196901092726977 fdr

In [128]:
srand(1)
y = 3 * col +  rand(length(col));
B     = 5;
err   = zeros(20, B);
out_l0pen = 0;
srand(1);
for b = 1:B
    println(b);
    for i = 1:20
        out_l0pen = GraphSegment.GraphSegment(y,edges, 1.5^(8-i), delta = 1, weights = R, verbose = false);
        sigma     = 1;
        alpha     = 0.2;
        z         = alpha * sigma * randn(p);
        y1        = y + z;
        y2        = y - z/alpha^2;
        mu1       = GraphSegment.GraphSegment(y1,edges, 1.5^(8-i), delta = 1, weights = R, verbose = false);
        err[i,b]  = norm(y2 - mu1)^2/p;
    end
end
l0pen = GraphSegment.GraphSegment(y,edges, 1.5^(8-findmin(sum(err,2))[2]), delta = 1, weights = R, verbose = false)
nzind = find(abs.(D * l0pen) .> 1e-8);
tp    = length(findin(tind, nzind));
println("mse: ", norm(y-l0pen)^2/p," fdr: ", 1 - tp/length(nzind)," power : ", tp/length(tind))

1
2
3
4
5
mse: 0.13198465699728584 fdr: 0.010662431941923733 power : 0.9542669584245077


In [None]:
srand(1);
y = 4 * col +  rand(length(col));
bgsm = zeros(length(y),20);
for i = 1:20
    res1      = BGSM_effective_resistance(y,D,R, v0 = 1e-3 * 1.4^i, b = size(D,1), verbose = false);
    mss1      = model_selection(res1);
    bgsm[:,i] = mss1[:beta]
    println("score: ", mss1[:score]," and # of comp: ",length(mss1[:c]))
end

In [None]:
score: 19821.35611975347 and # of comp: 15
score: 19773.748526046402 and # of comp: 4
score: 19773.748526046402 and # of comp: 4
score: 19773.748526046402 and # of comp: 4
score: 19773.748526046402 and # of comp: 4
score: 19773.748526046402 and # of comp: 4
score: 19773.748526046402 and # of comp: 4
score: 19773.748526046402 and # of comp: 4
score: 19773.748526046402 and # of comp: 4
score: 19773.748526046402 and # of comp: 4
score: 19773.748526046402 and # of comp: 4
score: 19775.530715949142 and # of comp: 4
score: 19845.400031627396 and # of comp: 5
score: 20064.73920023877 and # of comp: 8
score: 20124.806328552044 and # of comp: 10
score: 20129.65931946699 and # of comp: 10
score: 20135.008106032954 and # of comp: 11
score: 20671.619586371035 and # of comp: 13
score: 20990.440174843003 and # of comp: 10
score: 21018.250020787767 and # of comp: 9

In [83]:
srand(1);
y         = 4 * col +  rand(length(col));
res1      = BGSM_effective_resistance(y,D,R, v0 = 1e-3 * 1.4^5, b = size(D,1), verbose = false);
mss1      = model_selection(res1);
println("score: ", mss1[:score]," and # of comp: ",length(mss1[:c]))
nzind     = find(abs.(D * mss1[:beta]) .> 1e-8);
tp        = length(findin(tind, nzind));
println("mse: ", norm(y-mss1[:beta])^2/p," fdr: ", 1 - tp/length(nzind)," power : ", tp/length(tind))

score: 19773.748526046402 and # of comp: 4
mse: 0.08387687951987512 fdr: 0.0 power : 1.0


In [81]:
srand(1)
y = 4 * col +  rand(length(col));
l0pen = 0;
for i = 1:20
    l0pen = GraphSegment.GraphSegment(y,edges, 1.5^(8-i), weights = R, delta = 1, verbose = false);
    nzind = find(abs.(D * l0pen) .> 1e-8);
    tp    = length(findin(tind, nzind));
    println("mse: ", norm(y-l0pen)^2/p," fdr: ", 1 - tp/length(nzind)," power : ", tp/length(tind))
end

mse: 0.8267255068229964 fdr: 0.035705177250701325 power : 0.8273522975929978
mse: 0.3631092838921691 fdr: 0.025992438563327003 power : 0.9019693654266958
mse: 0.20096012796482582 fdr: 0.012802926383173263 power : 0.9448577680525164
mse: 0.13092259452172855 fdr: 0.00784577449002466 power : 0.9684901531728666
mse: 0.10479893968674958 fdr: 0.004662522202486641 power : 0.9809628008752735
mse: 0.0867967650500517 fdr: 0.005030621172353422 power : 0.9954048140043764
mse: 0.08261158756248291 fdr: 0.01146441704520873 power : 1.0
mse: 0.08071760972331984 fdr: 0.03627161535217205 power : 1.0
mse: 0.07591919553019674 fdr: 0.1060250391236307 power : 1.0
mse: 0.06626400597221509 fdr: 0.2365519545606415 power : 1.0
mse: 0.05221676790006594 fdr: 0.3452722063037249 power : 1.0
mse: 0.03666669219518332 fdr: 0.43208649186032067 power : 1.0
mse: 0.027631196216344567 fdr: 0.4782509418883434 power : 1.0
mse: 0.023701211153300202 fdr: 0.4983534577387486 power : 1.0
mse: 0.022196901092726977 fdr: 0.5069586794

In [129]:
srand(1)
y = 4 * col +  rand(length(col));
B     = 5;
err   = zeros(20, B);
out_l0pen = 0;
srand(1);
for b = 1:B
    println(b);
    for i = 1:20
        out_l0pen = GraphSegment.GraphSegment(y,edges, 1.5^(8-i), delta = 1, weights = R, verbose = false);
        sigma     = 1;
        alpha     = 0.2;
        z         = alpha * sigma * randn(p);
        y1        = y + z;
        y2        = y - z/alpha^2;
        mu1       = GraphSegment.GraphSegment(y1,edges, 1.5^(8-i), delta = 1, weights = R, verbose = false);
        err[i,b]  = norm(y2 - mu1)^2/p;
    end
end
l0pen = GraphSegment.GraphSegment(y,edges, 1.5^(8-findmin(sum(err,2))[2]), delta = 1, weights = R, verbose = false)
nzind = find(abs.(D * l0pen) .> 1e-8);
tp    = length(findin(tind, nzind));
println("mse: ", norm(y-l0pen)^2/p," fdr: ", 1 - tp/length(nzind)," power : ", tp/length(tind))

1
2
3
4
5
mse: 0.13092259452172855 fdr: 0.00784577449002466 power : 0.9684901531728666


In [None]:
srand(1);
y = 5 * col +  rand(length(col));
bgsm = zeros(length(y),20);
for i = 1:20
    res1      = BGSM_effective_resistance(y,D,R, v0 = 1e-3 * 1.4^i, b = size(D,1), verbose = false);
    mss1      = model_selection(res1);
    bgsm[:,i] = mss1[:beta]
    println("score: ", mss1[:score]," and # of comp: ",length(mss1[:c]))
end

In [None]:
score: 19773.748526047188 and # of comp: 4
score: 19773.748526047188 and # of comp: 4
score: 19773.748526047188 and # of comp: 4
score: 19773.748526047188 and # of comp: 4
score: 19773.748526047188 and # of comp: 4
score: 19773.748526047188 and # of comp: 4
score: 19773.748526047188 and # of comp: 4
score: 19773.748526047188 and # of comp: 4
score: 19773.748526047188 and # of comp: 4
score: 19773.748526047188 and # of comp: 4
score: 19773.748526047188 and # of comp: 4
score: 19773.748526047188 and # of comp: 4
score: 19818.80366910341 and # of comp: 5
score: 20039.194933336898 and # of comp: 9
score: 20097.1650249767 and # of comp: 12
score: 20202.7333784069 and # of comp: 13
score: 20126.87199021805 and # of comp: 14
score: 20095.37225347825 and # of comp: 15
score: 20071.250232969574 and # of comp: 17
score: 20066.374991969467 and # of comp: 27

In [93]:
srand(1);
y         = 5 * col +  rand(length(col));
res1      = BGSM_effective_resistance(y,D,R, v0 = 1e-3 * 1.4^5, b = size(D,1), verbose = false);
mss1      = model_selection(res1);
println("score: ", mss1[:score]," and # of comp: ",length(mss1[:c]))
nzind     = find(abs.(D * mss1[:beta]) .> 1e-8);
tp        = length(findin(tind, nzind));
println("mse: ", norm(y-mss1[:beta])^2/p," fdr: ", 1 - tp/length(nzind)," power : ", tp/length(tind))

score: 19773.748526047188 and # of comp: 4
mse: 0.08387687952125479 fdr: 0.0 power : 1.0


In [61]:
srand(1)
y = 5 * col +  rand(length(col));
l0pen = 0;
for i = 1:20
    l0pen = GraphSegment.GraphSegment(y,edges, 1.5^(8-i), weights = R, delta = 1, verbose = false);
    nzind = find(abs.(D * l0pen) .> 1e-8);
    tp    = length(findin(tind, nzind));
    println("mse: ", norm(y-l0pen)^2/p," fdr: ", 1 - tp/length(nzind)," power : ", tp/length(tind))
end

mse: 0.4700634932095659 fdr: 0.025176470588235245 power : 0.9065645514223195
mse: 0.23821126522574892 fdr: 0.009543285616905206 power : 0.9538293216630197
mse: 0.13532395958899485 fdr: 0.0062611806797853164 power : 0.9724288840262582
mse: 0.09446396606350393 fdr: 0.0013192612137202797 power : 0.9938730853391685
mse: 0.08350013780376078 fdr: 0.0021834061135370675 power : 1.0
mse: 0.08323112735144393 fdr: 0.004357298474945481 power : 1.0
mse: 0.08261158756248291 fdr: 0.01146441704520873 power : 1.0
mse: 0.08071760972331984 fdr: 0.03627161535217205 power : 1.0
mse: 0.07591919553019674 fdr: 0.1060250391236307 power : 1.0
mse: 0.06626400597221509 fdr: 0.2365519545606415 power : 1.0
mse: 0.05221676790006594 fdr: 0.3452722063037249 power : 1.0
mse: 0.03666669219518332 fdr: 0.43208649186032067 power : 1.0
mse: 0.027631196216344567 fdr: 0.4782509418883434 power : 1.0
mse: 0.023701211153300202 fdr: 0.4983534577387486 power : 1.0
mse: 0.022196901092726977 fdr: 0.5069586794691985 power : 1.0
mse: 

In [131]:
srand(1)
y = 5 * col +  rand(length(col));
B     = 5;
err   = zeros(20, B);
out_l0pen = 0;
srand(1);
for b = 1:B
    println(b);
    for i = 1:20
        out_l0pen = GraphSegment.GraphSegment(y,edges, 1.5^(8-i), delta = 1, weights = R, verbose = false);
        sigma     = 1;
        alpha     = 0.2;
        z         = alpha * sigma * randn(p);
        y1        = y + z;
        y2        = y - z/alpha^2;
        mu1       = GraphSegment.GraphSegment(y1,edges, 1.5^(8-i), delta = 1, weights = R, verbose = false);
        err[i,b]  = norm(y2 - mu1)^2/p;
    end
end
l0pen = GraphSegment.GraphSegment(y,edges, 1.5^(8-findmin(sum(err,2))[2]), delta = 1, weights = R, verbose = false)
nzind = find(abs.(D * l0pen) .> 1e-8);
tp    = length(findin(tind, nzind));
println("mse: ", norm(y-l0pen)^2/p," fdr: ", 1 - tp/length(nzind)," power : ", tp/length(tind))

1
2
3
4
5
mse: 0.09446396606350393 fdr: 0.0013192612137202797 power : 0.9938730853391685


In [92]:
l0pen = GraphSegment.GraphSegment(y,edges, 1.5^(8-4), weights = R, delta = 1, verbose = false);
nzind = find(abs.(D * l0pen) .> 1e-8);
tp    = length(findin(tind, nzind));
println("mse: ", norm(y-l0pen)^2/p," fdr: ", 1 - tp/length(nzind)," power : ", tp/length(tind))

mse: 0.09446396606350393 fdr: 0.0013192612137202797 power : 0.9938730853391685


In [216]:
df = DataFrame(mse = mse[:], fdr = fdr[:], power = power[:],
                signal = [1:5;1:5;1:5], method = repeat(["BGSM","Genlasso","L0pen"], inner = 5))
CSV.write("result/enron1.txt", df, delim = ',');

In [106]:
mse = zeros(5,3); fdr = zeros(5,3); power = zeros(5,3);

In [147]:
X = pinv(full(D)); R"library(glmnet);"; @rput X;

In [186]:
y = 1 * col +  rand(length(col));
@rput y; 
R"y = y - mean(y);
res1 = cv.glmnet(X, y, family='gaussian', standardize = FALSE);";
R"genl = coef(res1)[-1];"; @rget genl;
nzind = find(abs.(genl) .> 1e-8);
println("mse: ", norm(y-X * genl)^2/p," fdr: ", 1 - tp/length(nzind)," power : ", tp/length(tind))

mse: 9.252736759647846 fdr: 0.3312741312741313 power : 0.18949671772428883


In [187]:
y = 2 * col +  rand(length(col));
@rput y; 
R"y = y - mean(y);
res2 = cv.glmnet(X, y, family='gaussian', standardize = FALSE);";
R"genl = coef(res2)[-1];"; @rget genl;
nzind = find(abs.(genl) .> 1e-8);
println("mse: ", norm(y-X * genl)^2/p," fdr: ", 1 - tp/length(nzind)," power : ", tp/length(tind))

mse: 31.140736602892417 fdr: 0.304975922953451 power : 0.18949671772428883


In [188]:
y = 3 * col +  rand(length(col));
@rput y; 
R"y = y - mean(y);
res3 = cv.glmnet(X, y, family='gaussian', standardize = FALSE);";
R"genl = coef(res3)[-1];"; @rget genl;
nzind = find(abs.(genl) .> 1e-8);
println("mse: ", norm(y-X * genl)^2/p," fdr: ", 1 - tp/length(nzind)," power : ", tp/length(tind))

mse: 65.99406937587976 fdr: 0.31649565903709553 power : 0.18949671772428883


In [189]:
y = 4 * col +  rand(length(col));
@rput y; 
R"y = y - mean(y);
res4 = cv.glmnet(X, y, family='gaussian', standardize = FALSE);";
R"genl = coef(res4)[-1];"; @rget genl;
nzind = find(abs.(genl) .> 1e-8);
println("mse: ", norm(y-X * genl)^2/p," fdr: ", 1 - tp/length(nzind)," power : ", tp/length(tind))

mse: 114.0042595261158 fdr: 0.2765246449456976 power : 0.18949671772428883


In [190]:
y = 5 * col +  rand(length(col));
@rput y; 
R"y = y - mean(y);
res5 = cv.glmnet(X, y, family='gaussian', standardize = FALSE);";
R"genl = coef(res5)[-1];"; @rget genl;
nzind = find(abs.(genl) .> 1e-8);
println("mse: ", norm(y - mean(y) - X * genl)^2/p," fdr: ", 1 - tp/length(nzind)," power : ", tp/length(tind))

mse: 9.312360129774417 fdr: 0.29878542510121453 power : 0.18949671772428883


In [212]:
y = 1 * col +  rand(length(col));
R"genl = coef(res1)[-1];"; @rget genl;
nzind = find(abs.(D * X * genl) .> 1e-8);
tp    = length(findin(tind, nzind));
println("mse: ", norm(y- mean(y) - X * genl)^2/p," fdr: ", 1 - tp/length(nzind)," power : ", tp/length(tind))
y = 2 * col +  rand(length(col));
R"genl = coef(res2)[-1];"; @rget genl;
nzind = find(abs.(D * X * genl) .> 1e-8);
tp    = length(findin(tind, nzind));
println("mse: ", norm(y- mean(y) - X * genl)^2/p," fdr: ", 1 - tp/length(nzind)," power : ", tp/length(tind))
y = 3 * col +  rand(length(col));
R"genl = coef(res3)[-1];"; @rget genl;
nzind = find(abs.(D * X * genl) .> 1e-8);
tp    = length(findin(tind, nzind));
println("mse: ", norm(y- mean(y) - X * genl)^2/p," fdr: ", 1 - tp/length(nzind)," power : ", tp/length(tind))
y = 4 * col +  rand(length(col));
R"genl = coef(res4)[-1];"; @rget genl;
nzind = find(abs.(D * X * genl) .> 1e-8);
tp    = length(findin(tind, nzind));
println("mse: ", norm(y- mean(y) - X * genl)^2/p," fdr: ", 1 - tp/length(nzind)," power : ", tp/length(tind))
y = 5 * col +  rand(length(col));
R"genl = coef(res5)[-1];"; @rget genl;
nzind = find(abs.(D * X * genl) .> 1e-8);
tp    = length(findin(tind, nzind));
println("mse: ", norm(y- mean(y) - X * genl)^2/p," fdr: ", 1 - tp/length(nzind)," power : ", tp/length(tind))

mse: 0.4590461936685914 fdr: 0.6601977842218753 power : 1.0
mse: 1.5346344323955565 fdr: 0.6572671366431678 power : 1.0
mse: 3.303586209061094 fdr: 0.6576522585961495 power : 1.0
mse: 6.236157613164027 fdr: 0.6570099069348544 power : 1.0
mse: 9.3487573053296 fdr: 0.6569841627261128 power : 1.0
