In [1]:
using LMCLUS
include("NMI.jl")
using MultivariateStats
using FreqTables
using PlotlyJS
using Distances
using Combinatorics

#color map for plots
cDict = Dict("1" => "red", "2" => "blue", "3" => "green", "4" => "violet", "5"=>"black", "6"=>"orange", "7"=>"magenta", "8"=>"rgb(87,153,199)");

[1m[36mINFO: [39m[22m

[36mLoading HttpServer methods...
[39m

In [2]:
function normalizeArr(x)
    for i=1:size(x)[1]
        x[i,:] = (x[i,:]-minimum(x[i,:]))/(maximum(x[i,:])-minimum(x[i,:]));
    end
end

normalizeArr (generic function with 1 method)

In [6]:
function genPredLab(Ms, l)
    predLab = ones(l);

    for i=1:length(Ms)
        lab = labels(Ms[i]);
        predLab[lab] = i;
    end

    return predLab;
end

genPredLab (generic function with 1 method)

In [4]:
# works only for N=3
function plotManifolds(data_original, Ms = 0)
    
    trace = PlotlyBase.GenericTrace{Dict{Symbol,Any}}[];
    
    if Ms == 0
        append!(trace, [scatter3d(;x=data_original[1,:], y=data_original[2,:], z=data_original[3,:], mode="markers", marker_size=0.5, marker_color=map(x->cDict[string(Int(x))], data_original[end,:]))]);
        layout = Layout(;showlegend=false);
        return trace,layout;
    end

    predLab = genPredLab(Ms, size(data_original,2));
    #append!(trace, [scatter3d(;x=data_original[1,:], y=data_original[2,:], z=data_original[3,:], mode="markers", marker_size=0.5, marker_color=map(x->cDict[string(Int(x))], predLab))]);
    
    for i=1:length(Ms)
        if Ms[i].d != 0
            clusName = join(["CLUSTER ",i]);
            C = mean(data_original[1:3,Ms[i].points], 2);
            proj = Ms[i].proj;
            
            colText = join(["<span style=\"color:", cDict[string(i)], "\">", clusName, "</span>"])
            
            append!(trace, [scatter3d(;name=colText, x=data_original[1,Ms[i].points], y=data_original[2,Ms[i].points], z=data_original[3,Ms[i].points], mode="markers", marker_size=0.5, marker_color=cDict[string(i)], legendgroup=clusName)]);
            
            if size(Ms[i].proj,2) == 2
                append!(trace, [scatter3d(;name="First Basis", x=[C[1]-5*proj[1,1],C[1]+5*proj[1,1]], y=[C[2]-5*proj[2,1], C[2]+5*proj[2,1]], z=[C[3]-5*proj[3,1], C[3]+5*proj[3,1]], mode="lines", marker_color=cDict[string(i)], legendgroup=clusName, showlegend=false)])
                append!(trace, [scatter3d(;name="Second Basis",x=[C[1]-5*proj[1,2],C[1]+5*proj[1,2]], y=[C[2]-5*proj[2,2], C[2]+5*proj[2,2]], z=[C[3]-5*proj[3,2], C[3]+5*proj[3,2]], mode="lines", marker_color=cDict[string(i)], legendgroup=clusName, showlegend=false)]);
            else
                append!(trace, [scatter3d(;name="Basis",x=[C[1]-5*proj[1],C[1]+5*proj[1]], y=[C[2]-5*proj[2], C[2]+5*proj[2]], z=[C[3]-5*proj[3], C[3]+5*proj[3]], mode="lines", marker_color=cDict[string(i)], legendgroup=clusName, showlegend=false)]);
            end
        end
    end
    layout = Layout(;legend=attr(family="Times New Roman, sans-serif", font_size=14));
    plot(trace, layout)
end

plotManifolds (generic function with 2 methods)

In [5]:
# Recursive LMCLUS algorithm
function lmclus_rec{T<:AbstractFloat}(X::Matrix{T}, params::LMCLUS.Parameters, index::Vector{Int})    
    global manifolds;
    Ms = lmclus(X, params);
    
    if length(Ms) == 1
        Ms[1].points = index;
        M = fit(PCA, X, pratio=0.95);
        
        if MultivariateStats.outdim(M) == size(X,1)
            Ms[1].μ = zeros(size(X,1));
            Ms[1].proj = zeros(size(X,1),0);
            Ms[1].d = size(X,1);
        else
            Ms[1].μ = M.mean;
            Ms[1].proj = M.proj;
            Ms[1].d = MultivariateStats.outdim(M);
        end
        push!(manifolds, Ms[1]);
    else
        for i=1:length(Ms)
            lmclus_rec(X[:,Ms[i].points], params, index[Ms[i].points]);
        end
    end
end

lmclus_rec (generic function with 1 method)

In [13]:
# Re-assign function
function re_assign(manifolds, data, params)
    Ms = deepcopy(manifolds);
    mfs = Manifold[];
    X_tmp = deepcopy(data);
    assigned_points = [];
    index = collect(1:size(X_tmp,2));
    
    Avg_size = mean([length(m.points) for m in Ms]);

    for i=1:length(Ms)
        #Re-adjust the mean and basis by basis
        M = fit(PCA, X_tmp[:,Ms[i].points], pratio=0.95);
        Ms[i].μ = M.mean;
        Ms[i].proj = M.proj;

        #Separate points for using the newly constructed manifold
        if length(assigned_points) != size(data, 2)
            if length(Ms[i].points) > 100
                Ms[i].d = MultivariateStats.outdim(M);
                s = find_separation(data, Ms[i].μ, Ms[i].proj, params);
                cluster_points, removed_points = filter_separeted(index, data, Ms[i].μ, Ms[i].proj, s);
                index = removed_points;
                Ms[i].points = cluster_points;
                append!(assigned_points, cluster_points);
                push!(mfs, Ms[i]);
            end
        else
            break;
        end
    end
    return mfs;
end

re_assign (generic function with 1 method)

In [24]:
function computeSSD(Ms::Array{LMCLUS.Manifold}, X::Matrix{Float64})
    ssd = 0
    for i in 1:length(Ms)
        ssd = ssd + sum(distance_to_manifold(X[:,Ms[i].points], Ms[i].μ, Ms[i].proj))
        #ssd = ssd + mean(distance_to_manifold(X[:,Ms[i].points], Ms[i].μ, Ms[i].proj))
        #print("\nSSE = ", sse, "\n")
    end
    return ssd/size(X)[2]
    #return ssd/length(Ms)
end

computeSSD (generic function with 1 method)

In [23]:
function computeMinDist(Ms::Array{LMCLUS.Manifold}, X::Matrix{Float64})
    mDistSum = 0;
    
    for i in combinations(1:size(Ms)[1],2)
        a = i[1]; b = i[2];
        mDistSum = mDistSum + minimum(pairwise(SqEuclidean(), data[:,Ms[a].points], data[:,Ms[b].points]));
    end
    return 2*mDistSum;
end

computeMinDist (generic function with 1 method)

In [146]:
function computeMeanDist(Ms::Array{LMCLUS.Manifold}, X::Matrix{Float64})
    meanDist = Dict();
    
    for i in combinations(1:size(Ms)[1],2)
        a = i[1]; b = i[2];
        mean_dist = Distances.evaluate(Euclidean(), mean(data[:,Ms[a].points],2), mean(data[:,Ms[b].points],2))
        try
            push!(meanDist[string(a)], mean_dist)
        catch
            meanDist[string(a)] = [mean_dist]
        end
        try
            push!(meanDist[string(b)], mean_dist)
        catch
            meanDist[string(b)] = [mean_dist]
        end
    end
    
    sumDist = 0
    
    for i in keys(meanDist)
       sumDist = sumDist + mean(meanDist[i]) 
    end     
    
    return sumDist;
end

computeMeanDist (generic function with 1 method)

In [34]:
mergeMan

5-element Array{LMCLUS.Manifold,1}:
 Manifold (dim = 1, size = 494)
 Manifold (dim = 1, size = 496)
 Manifold (dim = 1, size = 500)
 Manifold (dim = 1, size = 500)
 Manifold (dim = 1, size = 10) 

In [51]:
d = computeMeanDist(mergeMan,data)

In [47]:
for i in keys(d)
   d[i] = minimum(d[i]) 
end

Dict{Any,Any} with 5 entries:
  "4" => 0.239449
  "1" => 0.112538
  "5" => 0.112538
  "2" => 0.209441
  "3" => 0.205256

In [35]:
plotManifolds(data_original, mergeMan)

In [18]:
a = Dict()
#a["1"] = [0.12, 0.656, 0.6565];
#push!(a["1"],0.265)
#a = string(15)
#typeof(a)
try
    a["1"]
catch
    print("caught")
end

caught

In [126]:
function mergeManifolds(manifolds, data, λ)
    MArr = deepcopy(manifolds);
    ssd_seg = 0;
    
    while true
        if size(MArr)[1] == 1
           return MArr;
        end
        ssd_init = computeSSD(MArr, data) - λ*computeMeanDist(MArr,data);
        for i in combinations(1:size(MArr)[1],2)
            ssd_seg = ssd_init;
            Mtemp = Manifold[];
            i₁ = i[1];
            i₂ = i[2];
            M = fit(PCA, data[:,[MArr[i₁].points; MArr[i₂].points]], pratio = 0.95);
            N = MultivariateStats.indim(M);
            K = MultivariateStats.outdim(M);

            # check if the manifold identified by PCA has dimension less than N
            # and the dimensions of of cluster i₁ and i₂
            if (N > K) && (K <= MArr[i₁].d) && (K <= MArr[i₂].d)
                # push all other clusters to the temp manifold array
                append!(Mtemp, MArr[setdiff(collect(1:size(MArr)[1]),[i₁,i₂])]);
                # push the merged manifold to the array
                push!(Mtemp, Manifold(MultivariateStats.outdim(M), M.mean, M.proj, 
                        [MArr[i₁].points; MArr[i₂].points], LMCLUS.Separation()));
                #ssd_seg = computeSSD(MArr, data);
                ssd_mer = computeSSD(Mtemp, data) - λ*computeMeanDist(Mtemp,data);
                
                #print("\nFor cluster ", i₁, " and ", i₂, " PCA K = ", K, "\n");
                #print("\nSSD_seg = ", ssd_seg, " and ssd_mer = ", ssd_mer, "\n");
                #print("SSD_seg_new = ", ssd_seg_new, " and ssd_mer_new = ", ssd_mer_new, "\n");
                
                if ssd_seg > ssd_mer
                    MArr = deepcopy(Mtemp);
                    ssd_seg = ssd_mer;
                    #print("Clusters ", i₁, " and ", i₂, " merged!!\n");
                    break;
                end
            end
        end

        if ssd_seg == ssd_init
            #print("No change! exiting...\n");
            break;
        end
    end
    return MArr;
end

mergeManifolds (generic function with 1 method)

In [24]:
a = (3,4);
i,j = a;
a == (3,4)

In [9]:
function mergeManifolds2(manifolds, data, λ)
    MArr = deepcopy(manifolds);
    ssd_seg = 0;
    
    while true
        if size(MArr)[1] == 1
           return MArr;
        end
        ssd_init = computeSSD(MArr, data) - λ*computeMinDist(MArr,data);
        ssd_best = ssd_init;
        Mbest = 0;
        best_merge = (0,0);
        for i in combinations(1:size(MArr)[1],2)
            ssd_seg = ssd_init;
            Mtemp = Manifold[];
            i₁ = i[1];
            i₂ = i[2];
            M = fit(PCA, data[:,[MArr[i₁].points; MArr[i₂].points]], pratio = 0.95);
            N = MultivariateStats.indim(M);
            K = MultivariateStats.outdim(M);

#             s = 
#             try
#                 find_separation(data[manifolds[i₁].points; manifolds[i₂].points], M.mean, M.proj, params)
#             catch
#                 print("\nperfect merge...\n")
#             end

            # check if the manifold identified by PCA has dimension less than N
            # and the dimensions of of cluster i₁ and i₂
            if (N > K) && (K <= MArr[i₁].d) && (K <= MArr[i₂].d)
                # push all other clusters to the temp manifold array
                append!(Mtemp, MArr[setdiff(collect(1:size(MArr)[1]),[i₁,i₂])]);
                # push the merged manifold to the array
                push!(Mtemp, Manifold(MultivariateStats.outdim(M), M.mean, M.proj, 
                        [MArr[i₁].points; MArr[i₂].points], LMCLUS.Separation()));
                #ssd_seg = computeSSD(MArr, data);
                ssd_mer = computeSSD(Mtemp, data) - λ*computeMinDist(Mtemp,data);
                
                #print("\nFor cluster ", i₁, " and ", i₂, " PCA K = ", K, "\n");
                #print("\nSSD_seg = ", ssd_seg, " and ssd_mer = ", ssd_mer, "\n");
                #print("SSD_seg_new = ", ssd_seg_new, " and ssd_mer_new = ", ssd_mer_new, "\n");
                
                if ssd_seg > ssd_mer
                    if ssd_best > ssd_mer
                        best_merge = (i₁,i₂);
                        ssd_best = ssd_mer;
                        Mbest = deepcopy(Mtemp);
                        #print("\nNew best merge found : ", best_merge);
                        #print("\nSSD best = ", ssd_best, "\n");
                    end
                end
            end
        end

        if best_merge != (0,0)
            MArr = deepcopy(Mbest);
        else
            print("No change! exiting...\n");
            break;
        end
    end
    return MArr;
end

mergeManifolds2 (generic function with 1 method)

In [93]:
# Re-assign function
function re_assignF(manifolds, data, params)
    Ms = deepcopy(manifolds);
    mfs = Manifold[];
    X_tmp = deepcopy(data);
    assigned_points = [];
    index = collect(1:size(X_tmp,2));
    
    Avg_size = mean([length(m.points) for m in Ms]);

    for i=1:length(Ms)
        #Re-adjust the mean and basis by basis
        M = fit(PCA, X_tmp[:,Ms[i].points], pratio=0.95);
        Ms[i].μ = M.mean;
        Ms[i].proj = M.proj;

        #Separate points for using the newly constructed manifold
        if length(assigned_points) <= size(data, 2)
            if length(Ms[i].points) > 100
                Ms[i].d = MultivariateStats.outdim(M);
                s = find_separation(data, Ms[i].μ, Ms[i].proj, params);
                cluster_points, removed_points = filter_separeted(index, data, Ms[i].μ, Ms[i].proj, s);
                #index = removed_points;
                Ms[i].points = cluster_points;
                append!(assigned_points, cluster_points);
                push!(mfs, Ms[i]);
            end
        else
            break;
        end
    end
    return mfs;
end

re_assignF (generic function with 1 method)

In [7]:
# Re-assign function
function findClus(manifolds, data, params)
    Ms = deepcopy(manifolds);
    mfs = Manifold[];
    X_tmp = deepcopy(data);
    assigned_points = [];
    index = collect(1:size(X_tmp,2));
    
    for i=1:length(Ms)
        #Re-adjust the mean and basis by basis
        M = fit(PCA, X_tmp[:,Ms[i].points], pratio=0.95);
        
        if MultivariateStats.outdim(M) == size(X_tmp,1)
            Ms[i].μ = zeros(size(X_tmp,1));
            Ms[i].proj = zeros(size(X_tmp,1),0);
            push!(mfs, Ms[i]);
        else
            Ms[i].μ = M.mean;
            Ms[i].proj = M.proj;
            Ms[i].d = MultivariateStats.outdim(M);
            push!(mfs, Ms[i]);
        end
    end
    return mfs;
end

findClus (generic function with 1 method)

In [20]:
collect(combinations(2:5,2))

6-element Array{Array{Int64,1},1}:
 [2, 3]
 [2, 4]
 [2, 5]
 [3, 4]
 [3, 5]
 [4, 5]

### 3-D Dataset with four 1-d linear manifold clusters 

In [20]:
data_original = readdlm("d:\\LMC_Datasets\\data_3_4_5_5_1111.csv", header=false, ',');
trueLab = data_original[end,:];
data = data_original[1:end-1,:];

normalizeArr(data);

In [11]:
trace,layout = plotManifolds(data_original);
plot(trace,layout)

In [12]:
global manifolds = Manifold[];
params = LMCLUS.Parameters(2);
#params.random_seed = 1260616;
Ms = lmclus(data_original, params);
lmclus_rec(data_original, params, Vector(1:2000))

In [123]:
tic();
global manifolds = Manifold[];
params = LMCLUS.Parameters(2);
seed = rand(1:20000000);#174147#62017#1368015;
params.random_seed = seed;
Ms = lmclus(data, params);
#print("Original Cluster : " , Ms, "\n");
lmclus_rec(data, params, Vector(1:2000))
#print("Rec Cluster : " , manifolds, "\n");
mergeMan = mergeManifolds(manifolds, data, 0.1);
#print("merge Cluster : " , mergeMan, "\n\n");
toc();
pLabs = genPredLab(Ms, size(data_original,2));
pLabs2 = genPredLab(manifolds, size(data_original,2));
predLabs = genPredLab(mergeMan, size(data_original,2));
print("NMI original = ", getNMI(freqtable(trueLab, pLabs)), "\n")
print("NMI rec = ", getNMI(freqtable(trueLab, pLabs2)), "\n")
print("NMI merge = ", getNMI(freqtable(trueLab, predLabs)), "\n")

No change! exiting...
elapsed time: 0.074092671 seconds
NMI original = 1.0
NMI rec = 1.0
NMI merge = 1.0


In [164]:
tic();
res = Dict([("lmclus", []), ("lmclus_rec", []),("rec_merge", [])])
for i=1:1000
    global manifolds = Manifold[];
    params = LMCLUS.Parameters(2);
    seed = rand(1:200000000);
    params.random_seed = seed;
    Ms = lmclus(data, params);
    #print("Original Cluster : " , Ms, "\n");
    lmclus_rec(data, params, Vector(1:2000))
    #print("Rec Cluster : " , manifolds, "\n");
    mergeMan = mergeManifolds(manifolds, data, 0.1);
    #print("merge Cluster : " , mergeMan, "\n\n");
    pLabs = genPredLab(Ms, size(data_original,2));
    pLabs2 = genPredLab(manifolds, size(data_original,2));
    predLabs = genPredLab(mergeMan, size(data_original,2));
    push!(res["lmclus"],getNMI(freqtable(trueLab, pLabs)));
    push!(res["lmclus_rec"],getNMI(freqtable(trueLab, pLabs2)));
    push!(res["rec_merge"],getNMI(freqtable(trueLab, predLabs)));
        
end
toc();
A = convert(Array{Float64,1}, res["lmclus"]);
B = convert(Array{Float64,1}, res["lmclus_rec"]);
C = convert(Array{Float64,1}, res["rec_merge"]);
print("\nPERFORMANCE RESULTS : \n");
print("----------------------------\n")
print("LMCLUS               : ", round(mean(A),2)," ± ",round(std(A),2),"\n");
print("LMCLUS Recursive     : ", round(mean(B),2)," ± ",round(std(B),2),"\n");
print("LMCLUS Rec and Merge : ", round(mean(C),2)," ± ",round(std(C),2),"\n");

elapsed time: 56.791734041 seconds


In [165]:
A = convert(Array{Float64,1}, res["lmclus"]);
B = convert(Array{Float64,1}, res["lmclus_rec"]);
C = convert(Array{Float64,1}, res["rec_merge"]);

In [183]:
print("\nPERFORMANCE RESULTS : \n");
print("----------------------------\n")
print("LMCLUS               : ", round(mean(A),2)," ± ",round(std(A),2),"\n");
print("LMCLUS Recursive     : ", round(mean(B),2)," ± ",round(std(B),2),"\n");
print("LMCLUS Rec and Merge : ", round(mean(C),2)," ± ",round(std(C),2),"\n");


PERFORMANCE RESULTS : 
----------------------------
LMCLUS               : 0.78 ± 0.31
LMCLUS Recursive     : 0.91 ± 0.18
LMCLUS Rec and Merge : 0.91 ± 0.18


In [228]:
freqtable(trueLab, predLabs)

4×6 Named Array{Int64,2}
Dim1 ╲ Dim2 │ 1.0  2.0  3.0  4.0  5.0  6.0
────────────┼─────────────────────────────
1.0         │   0    3    0    0  497    0
2.0         │   0    1    8    0    0  491
3.0         │  97    8  395    0    0    0
4.0         │  23  107    0  370    0    0

In [36]:
using Distances


d = Dict();

tic()
for i in combinations(1:size(mergeMan)[1],2)
    a = i[1]; b = i[2];
    d[join([a,"->",b])] = minimum(pairwise(Euclidean(), data[:,mergeMan[a].points], data[:,mergeMan[b].points]));
end
toc()

elapsed time: 0.037705102 seconds


In [42]:
tic();
d = computeMinDist(mergeMan, data);
toc()

elapsed time: 0.024356355 seconds


In [43]:
d

Dict{Any,Any} with 15 entries:
  "1->2" => 0.05844
  "1->4" => 0.0166378
  "5->6" => 0.585427
  "1->3" => 0.0720382
  "1->5" => 0.0303039
  "3->4" => 0.0360371
  "1->6" => 0.282691
  "4->6" => 0.0625713
  "2->3" => 0.00816497
  "2->5" => 0.370507
  "3->6" => 0.0960555
  "2->4" => 0.0105825
  "3->5" => 0.218458
  "4->5" => 0.00643702
  "2->6" => 0.0074675

In [14]:
M_r = re_assign(manifolds, data, params)

4-element Array{LMCLUS.Manifold,1}:
 Manifold (dim = 1, size = 500)
 Manifold (dim = 1, size = 500)
 Manifold (dim = 1, size = 500)
 Manifold (dim = 1, size = 500)

In [16]:
plotManifolds(data_original, manifolds)

### 3-D Dataset with one 2-d and three 1-d linear manifold clusters 

In [184]:
data_original = readdlm("d:\\LMC_Datasets\\data_3_4_5_5_2111.csv", header=false, ',');
trueLab = data_original[end,:];
data = data_original[1:end-1,:];

normalizeArr(data);

In [68]:
trace,layout = plotManifolds(data_original);
plot(trace,layout)

In [187]:
tic();
global manifolds = Manifold[];
params = LMCLUS.Parameters(2);
seed = rand(1:20000000);#482743;
params.random_seed = seed;
Ms = lmclus(data, params);
#print("Original Cluster : " , Ms, "\n");
lmclus_rec(data, params, Vector(1:2500))
#print("Rec Cluster : " , manifolds, "\n");
mergeMan = mergeManifolds(manifolds, data, 0.01);
#print("merge Cluster : " , mergeMan, "\n\n");
toc();
pLabs = genPredLab(Ms, size(data_original,2));
pLabs2 = genPredLab(manifolds, size(data_original,2));
predLabs = genPredLab(mergeMan, size(data_original,2));
print("NMI original = ", getNMI(freqtable(trueLab, pLabs)), "\n")
print("NMI rec = ", getNMI(freqtable(trueLab, pLabs2)), "\n")
print("NMI merge = ", getNMI(freqtable(trueLab, predLabs)), "\n")

Clusters 4 and 5 merged!!
No change! exiting...
elapsed time: 0.294627556 seconds
NMI original = 0.91
NMI rec = 0.96
NMI merge = 1.0


In [188]:
tic();
res = Dict([("lmclus", []), ("lmclus_rec", []),("rec_merge", [])])
for i=1:1000
    global manifolds = Manifold[];
    params = LMCLUS.Parameters(2);
    seed = rand(1:200000000);
    params.random_seed = seed;
    Ms = lmclus(data, params);
    #print("Original Cluster : " , Ms, "\n");
    lmclus_rec(data, params, Vector(1:2500))
    #print("Rec Cluster : " , manifolds, "\n");
    mergeMan = mergeManifolds(manifolds, data, 0.01);
    #print("merge Cluster : " , mergeMan, "\n\n");
    pLabs = genPredLab(Ms, size(data_original,2));
    pLabs2 = genPredLab(manifolds, size(data_original,2));
    predLabs = genPredLab(mergeMan, size(data_original,2));
    push!(res["lmclus"],getNMI(freqtable(trueLab, pLabs)));
    push!(res["lmclus_rec"],getNMI(freqtable(trueLab, pLabs2)));
    push!(res["rec_merge"],getNMI(freqtable(trueLab, predLabs)));
        
end
toc();
A = convert(Array{Float64,1}, res["lmclus"]);
B = convert(Array{Float64,1}, res["lmclus_rec"]);
C = convert(Array{Float64,1}, res["rec_merge"]);
print("\nPERFORMANCE RESULTS : \n");
print("----------------------------\n")
print("LMCLUS               : ", round(mean(A),2)," ± ",round(std(A),2),"\n");
print("LMCLUS Recursive     : ", round(mean(B),2)," ± ",round(std(B),2),"\n");
print("LMCLUS Rec and Merge : ", round(mean(C),2)," ± ",round(std(C),2),"\n");

elapsed time: 82.27651685 seconds

PERFORMANCE RESULTS : 
----------------------------
LMCLUS               : 0.86 ± 0.09
LMCLUS Recursive     : 0.95 ± 0.03
LMCLUS Rec and Merge : 0.91 ± 0.04


In [472]:
manifolds

5-element Array{LMCLUS.Manifold,1}:
 Manifold (dim = 1, size = 500) 
 Manifold (dim = 2, size = 1000)
 Manifold (dim = 3, size = 85)  
 Manifold (dim = 1, size = 500) 
 Manifold (dim = 1, size = 415) 

In [471]:
freqtable(trueLab, predLabs)

4×4 Named Array{Int64,2}
Dim1 ╲ Dim2 │  1.0   2.0   3.0   4.0
────────────┼───────────────────────
1.0         │    0  1000     0     0
2.0         │    0     0     0   500
3.0         │  500     0     0     0
4.0         │    0     0   500     0

In [156]:
for i in combinations(1:1,2)
    print(i)
end

In [29]:
mfs = findClus(manifolds, data, params)

5-element Array{LMCLUS.Manifold,1}:
 Manifold (dim = 2, size = 1000)
 Manifold (dim = 0, size = 77)  
 Manifold (dim = 1, size = 422) 
 Manifold (dim = 1, size = 501) 
 Manifold (dim = 1, size = 500) 

In [32]:
mean(distance_to_manifold(data[:,mfs[1].points], mfs[3].μ, mfs[3].proj))

In [33]:
plotManifolds(data_original, mfs)

In [105]:
M_r = re_assign(manifolds, data, params)

5-element Array{LMCLUS.Manifold,1}:
 Manifold (dim = 2, size = 1000)
 Manifold (dim = 1, size = 500) 
 Manifold (dim = 2, size = 0)   
 Manifold (dim = 1, size = 500) 
 Manifold (dim = 1, size = 500) 

In [106]:
M_r2 = re_assignF(manifolds, data, params)

4-element Array{LMCLUS.Manifold,1}:
 Manifold (dim = 2, size = 1000)
 Manifold (dim = 1, size = 500) 
 Manifold (dim = 2, size = 1000)
 Manifold (dim = 1, size = 500) 

In [15]:
print("Shared points with cluster 1 : ", length(intersect(M_r2[2].points, M_r2[1].points)), "\n")
print("Shared points with cluster 3 : ", length(intersect(M_r2[2].points, M_r2[3].points)), "\n")
print("Shared points with cluster 4 : ", length(intersect(M_r2[2].points, M_r2[4].points)), "\n")

Shared points with cluster 1 : 816
Shared points with cluster 3 : 500
Shared points with cluster 4 : 500


In [36]:
proj_clus1 = sum(dot.(data[1:3,intersect(M_r2[2].points, M_r2[1].points)], M_r2[2].proj[:,1]),1);
proj_clus2 = sum(dot.(data[1:3,intersect(M_r2[2].points, M_r2[3].points)], M_r2[2].proj[:,1]),1);
proj_clus3 = sum(dot.(data[1:3,intersect(M_r2[2].points, M_r2[4].points)], M_r2[2].proj[:,1]),1);
#trueLab = data_original[4,:];
#M_r[2].proj[:,1]

In [39]:
print("Shared points with cluster 2 : ", length(intersect(M_r2[1].points, M_r2[2].points)), "\n")
print("Shared points with cluster 3 : ", length(intersect(M_r2[1].points, M_r2[3].points)), "\n")
print("Shared points with cluster 4 : ", length(intersect(M_r2[1].points, M_r2[4].points)), "\n")

Shared points with cluster 2 : 816
Shared points with cluster 3 : 0
Shared points with cluster 4 : 0


In [28]:
proj_clus11 = sum(dot.(data[1:3,M_r2[1].points], M_r2[1].proj[:,1]),1);
proj_clus1I = sum(dot.(data[1:3,intersect(M_r2[1].points, M_r2[2].points)], M_r2[1].proj[:,1]),1);

In [32]:
proj_clus12 = sum(dot.(data[1:3,M_r2[1].points], M_r2[1].proj[:,2]),1);
proj_clus2I = sum(dot.(data[1:3,intersect(M_r2[1].points, M_r2[2].points)], M_r2[1].proj[:,2]),1);

In [30]:
Z₂=StatsBase.zscore(proj_clus1I, mean(proj_clus11), std(proj_clus11));

In [33]:
Z₃=StatsBase.zscore(proj_clus2I, mean(proj_clus12), std(proj_clus12));

In [37]:
proj_clusA = sum(dot.(data[1:3,M_r2[2].points], M_r2[2].proj[:,1]),1);
proj_clusI = sum(dot.(data[1:3,union(M_r2[1].points, M_r2[2].points)], M_r2[2].proj[:,1]),1);
proj_clusI2 = sum(dot.(data[1:3,union(M_r2[1].points, M_r2[2].points)], M_r2[1].proj[:,1]),1);

In [29]:
Z₁ = StatsBase.zscore(proj_clusI, mean(proj_clusA), std(proj_clusA))

1×816 Array{Float64,2}:
 0.821584  0.689684  0.862198  0.583641  …  0.821529  0.480086  0.555736

In [26]:
M_r2[2].proj

3×1 Array{Float64,2}:
 -0.671078
 -0.438568
 -0.597756

In [42]:
h1 = histogram(;x=proj_clus12[1,:], opacity=0.75);
plot(h1, layout)

In [68]:
#Z = StatsBase.zscore(proj_clus11);
#Z3 = StatsBase.zscore(proj_clus12);
#Z2 = StatsBase.zscore(proj_clusA);
Z4 = StatsBase.zscore(proj4);

In [38]:
h1 = histogram(;x=proj_clusA[1,:], opacity=0.75);
h2 = histogram(;x=proj_clusI2[1,:], opacity=0.75);
h3 = histogram(;x=proj_clusI[1,:], opacity=0.75);
#h4 = histogram(;x=Z4[1,:], opacity=0.75);
layout = Layout(title="Projected distance histogram for first basis");
plot([h1,h2,h3], layout)

In [34]:
h1 = histogram(;x=Z₁[1,:], opacity=0.75);
h2 = histogram(;x=Z₂[1,:], opacity=0.75);
h3 = histogram(;x=Z₃[1,:], opacity=0.75);
#h4 = histogram(;x=Z4[1,:], opacity=0.75);
layout = Layout(title="Projected distance histogram for first basis");
plot([h1,h2,h3], layout)

### 3-D Dataset with two 2-d and two 1-d linear manifold clusters 

In [189]:
data_original = readdlm("d:\\LMC_Datasets\\data_3_4_5_5_2211.csv", header=false, ',');
trueLab = data_original[end,:];
data = data_original[1:end-1,:];

normalizeArr(data);

trace,layout = plotManifolds(data_original);
plot(trace,layout)



In [279]:
tic();
global manifolds = Manifold[];
params = LMCLUS.Parameters(2);
seed = rand(1:20000000);#482743;
params.random_seed = seed;
Ms = lmclus(data, params);
#print("Original Cluster : " , Ms, "\n");
lmclus_rec(data, params, Vector(1:3000))
#print("Rec Cluster : " , manifolds, "\n");
mergeMan = mergeManifolds(manifolds, data, 0.01);
#print("merge Cluster : " , mergeMan, "\n\n");
toc();
pLabs = genPredLab(Ms, size(data_original,2));
pLabs2 = genPredLab(manifolds, size(data_original,2));
predLabs = genPredLab(mergeMan, size(data_original,2));
print("NMI original = ", getNMI(freqtable(trueLab, pLabs)), "\n")
print("NMI rec = ", getNMI(freqtable(trueLab, pLabs2)), "\n")
print("NMI merge = ", getNMI(freqtable(trueLab, predLabs)), "\n")

Clusters 2 and 3 merged!!
Clusters 2 and 4 merged!!
No change! exiting...
elapsed time: 0.435011161 seconds
NMI original = 0.9
NMI rec = 0.86
NMI merge = 0.87


In [194]:
tic();
res = Dict([("lmclus", []), ("lmclus_rec", []),("rec_merge", [])])
for i=1:1000
    global manifolds = Manifold[];
    params = LMCLUS.Parameters(2);
    seed = rand(1:200000000);
    params.random_seed = seed;
    Ms = lmclus(data, params);
    #print("Original Cluster : " , Ms, "\n");
    lmclus_rec(data, params, Vector(1:3000))
    #print("Rec Cluster : " , manifolds, "\n");
    mergeMan = mergeManifolds(manifolds, data, 0);
    #print("merge Cluster : " , mergeMan, "\n\n");
    pLabs = genPredLab(Ms, size(data_original,2));
    pLabs2 = genPredLab(manifolds, size(data_original,2));
    predLabs = genPredLab(mergeMan, size(data_original,2));
    push!(res["lmclus"],getNMI(freqtable(trueLab, pLabs)));
    push!(res["lmclus_rec"],getNMI(freqtable(trueLab, pLabs2)));
    push!(res["rec_merge"],getNMI(freqtable(trueLab, predLabs)));
        
end
toc();
A = convert(Array{Float64,1}, res["lmclus"]);
B = convert(Array{Float64,1}, res["lmclus_rec"]);
C = convert(Array{Float64,1}, res["rec_merge"]);
print("\nPERFORMANCE RESULTS : \n");
print("----------------------------\n")
print("LMCLUS               : ", round(mean(A),2)," ± ",round(std(A),2),"\n");
print("LMCLUS Recursive     : ", round(mean(B),2)," ± ",round(std(B),2),"\n");
print("LMCLUS Rec and Merge : ", round(mean(C),2)," ± ",round(std(C),2),"\n");

elapsed time: 108.767121291 seconds

PERFORMANCE RESULTS : 
----------------------------
LMCLUS               : 0.86 ± 0.08
LMCLUS Recursive     : 0.89 ± 0.05
LMCLUS Rec and Merge : 0.9 ± 0.05


In [525]:
freqtable(trueLab, pLabs)

4×4 Named Array{Int64,2}
Dim1 ╲ Dim2 │  1.0   2.0   3.0   4.0
────────────┼───────────────────────
1.0         │    8   992     0     0
2.0         │    0     0     0  1000
3.0         │    0     0   500     0
4.0         │  500     0     0     0

In [23]:
M_r = re_assign(manifolds, data, params)

4-element Array{LMCLUS.Manifold,1}:
 Manifold (dim = 1, size = 500) 
 Manifold (dim = 2, size = 1000)
 Manifold (dim = 2, size = 1000)
 Manifold (dim = 1, size = 500) 

In [24]:
plotManifolds(data_original, M_r)

### 3-D Dataset with three 2-d and one 1-d linear manifold clusters 

In [195]:
data_original = readdlm("d:\\LMC_Datasets\\data_3_4_5_5_2221.csv", header=false, ',');
trueLab = data_original[end,:];
data = data_original[1:end-1,:];

normalizeArr(data);

#trace,layout = plotManifolds(data_original);
#plot(trace,layout)

In [350]:
tic();
global manifolds = Manifold[];
params = LMCLUS.Parameters(2);
seed = rand(1:2000000);#482743;728753;#
params.random_seed = seed;
Ms = lmclus(data, params);
#print("Original Cluster : " , Ms, "\n");
lmclus_rec(data, params, Vector(1:3500))
#print("Rec Cluster : " , manifolds, "\n");
mergeMan = mergeManifolds2(manifolds, data, 0.01);
#print("merge Cluster : " , mergeMan, "\n\n");
toc();
pLabs = genPredLab(Ms, size(data_original,2));
pLabs2 = genPredLab(manifolds, size(data_original,2));
predLabs = genPredLab(mergeMan, size(data_original,2));
print("NMI original = ", getNMI(freqtable(trueLab, pLabs)), "\n")
print("NMI rec = ", getNMI(freqtable(trueLab, pLabs2)), "\n")
print("NMI merge = ", getNMI(freqtable(trueLab, predLabs)), "\n")

No change! exiting...
elapsed time: 0.282136889 seconds
NMI original = 0.8
NMI rec = 0.96
NMI merge = 0.96


In [197]:
tic();
res = Dict([("lmclus", []), ("lmclus_rec", []),("rec_merge", [])])
for i=1:1000
    global manifolds = Manifold[];
    params = LMCLUS.Parameters(2);
    seed = rand(1:200000000);
    params.random_seed = seed;
    Ms = lmclus(data, params);
    #print("Original Cluster : " , Ms, "\n");
    lmclus_rec(data, params, Vector(1:3500))
    #print("Rec Cluster : " , manifolds, "\n");
    mergeMan = mergeManifolds(manifolds, data, 0);
    #print("merge Cluster : " , mergeMan, "\n\n");
    pLabs = genPredLab(Ms, size(data_original,2));
    pLabs2 = genPredLab(manifolds, size(data_original,2));
    predLabs = genPredLab(mergeMan, size(data_original,2));
    push!(res["lmclus"],getNMI(freqtable(trueLab, pLabs)));
    push!(res["lmclus_rec"],getNMI(freqtable(trueLab, pLabs2)));
    push!(res["rec_merge"],getNMI(freqtable(trueLab, predLabs)));
        
end
toc();
A = convert(Array{Float64,1}, res["lmclus"]);
B = convert(Array{Float64,1}, res["lmclus_rec"]);
C = convert(Array{Float64,1}, res["rec_merge"]);
print("\nPERFORMANCE RESULTS : \n");
print("----------------------------\n")
print("LMCLUS               : ", round(mean(A),2)," ± ",round(std(A),2),"\n");
print("LMCLUS Recursive     : ", round(mean(B),2)," ± ",round(std(B),2),"\n");
print("LMCLUS Rec and Merge : ", round(mean(C),2)," ± ",round(std(C),2),"\n");

elapsed time: 158.955100427 seconds

PERFORMANCE RESULTS : 
----------------------------
LMCLUS               : 0.82 ± 0.16
LMCLUS Recursive     : 0.86 ± 0.13
LMCLUS Rec and Merge : 0.86 ± 0.13


In [200]:
tic();
res = Dict([("lmclus", []), ("lmclus_rec", []),("rec_merge", [])])
for i=1:1000
    global manifolds = Manifold[];
    params = LMCLUS.Parameters(2);
    seed = rand(1:200000000);
    params.random_seed = seed;
    Ms = lmclus(data, params);
    #print("Original Cluster : " , Ms, "\n");
    lmclus_rec(data, params, Vector(1:3500))
    #print("Rec Cluster : " , manifolds, "\n");
    mergeMan = mergeManifolds(manifolds, data, 0.01);
    #print("merge Cluster : " , mergeMan, "\n\n");
    pLabs = genPredLab(Ms, size(data_original,2));
    pLabs2 = genPredLab(manifolds, size(data_original,2));
    predLabs = genPredLab(mergeMan, size(data_original,2));
    push!(res["lmclus"],getNMI(freqtable(trueLab, pLabs)));
    push!(res["lmclus_rec"],getNMI(freqtable(trueLab, pLabs2)));
    push!(res["rec_merge"],getNMI(freqtable(trueLab, predLabs)));
        
end
toc();
A = convert(Array{Float64,1}, res["lmclus"]);
B = convert(Array{Float64,1}, res["lmclus_rec"]);
C = convert(Array{Float64,1}, res["rec_merge"]);
print("\nPERFORMANCE RESULTS : \n");
print("----------------------------\n")
print("LMCLUS               : ", round(mean(A),2)," ± ",round(std(A),2),"\n");
print("LMCLUS Recursive     : ", round(mean(B),2)," ± ",round(std(B),2),"\n");
print("LMCLUS Rec and Merge : ", round(mean(C),2)," ± ",round(std(C),2),"\n");

elapsed time: 157.772356651 seconds

PERFORMANCE RESULTS : 
----------------------------
LMCLUS               : 0.81 ± 0.18
LMCLUS Recursive     : 0.86 ± 0.14
LMCLUS Rec and Merge : 0.86 ± 0.14


In [29]:
plotManifolds(data_original, M_r)

### 3-D Dataset with four 2-d linear manifold clusters 

In [201]:
data_original = readdlm("d:\\LMC_Datasets\\data_3_4_5_5_2222.csv", header=false, ',');
trueLab = data_original[end,:];
data = data_original[1:end-1,:];

normalizeArr(data);

trace,layout = plotManifolds(data_original);
plot(trace,layout)

In [375]:
tic();
global manifolds = Manifold[];
params = LMCLUS.Parameters(2);
seed = rand(1:2000000);#482743;728753;#
params.random_seed = seed;
Ms = lmclus(data, params);
#print("Original Cluster : " , Ms, "\n");
lmclus_rec(data, params, Vector(1:4000))
#print("Rec Cluster : " , manifolds, "\n");
mergeMan = mergeManifolds(manifolds, data, 0.1);
#print("merge Cluster : " , mergeMan, "\n\n");
toc();
pLabs = genPredLab(Ms, size(data_original,2));
pLabs2 = genPredLab(manifolds, size(data_original,2));
predLabs = genPredLab(mergeMan, size(data_original,2));
print("NMI original = ", getNMI(freqtable(trueLab, pLabs)), "\n")
print("NMI rec = ", getNMI(freqtable(trueLab, pLabs2)), "\n")
print("NMI merge = ", getNMI(freqtable(trueLab, predLabs)), "\n")

No change! exiting...
elapsed time: 1.857546272 seconds
NMI original = 0.77
NMI rec = 0.82
NMI merge = 0.82


In [203]:
tic();
res = Dict([("lmclus", []), ("lmclus_rec", []),("rec_merge", [])])
for i=1:1000
    global manifolds = Manifold[];
    params = LMCLUS.Parameters(2);
    seed = rand(1:200000000);
    params.random_seed = seed;
    Ms = lmclus(data, params);
    #print("Original Cluster : " , Ms, "\n");
    lmclus_rec(data, params, Vector(1:4000))
    #print("Rec Cluster : " , manifolds, "\n");
    mergeMan = mergeManifolds(manifolds, data, 0);
    #print("merge Cluster : " , mergeMan, "\n\n");
    pLabs = genPredLab(Ms, size(data_original,2));
    pLabs2 = genPredLab(manifolds, size(data_original,2));
    predLabs = genPredLab(mergeMan, size(data_original,2));
    push!(res["lmclus"],getNMI(freqtable(trueLab, pLabs)));
    push!(res["lmclus_rec"],getNMI(freqtable(trueLab, pLabs2)));
    push!(res["rec_merge"],getNMI(freqtable(trueLab, predLabs)));        
end
toc();
A = convert(Array{Float64,1}, res["lmclus"]);
B = convert(Array{Float64,1}, res["lmclus_rec"]);
C = convert(Array{Float64,1}, res["rec_merge"]);
print("\nPERFORMANCE RESULTS : \n");
print("----------------------------\n")
print("LMCLUS               : ", round(mean(A),2)," ± ",round(std(A),2),"\n");
print("LMCLUS Recursive     : ", round(mean(B),2)," ± ",round(std(B),2),"\n");
print("LMCLUS Rec and Merge : ", round(mean(C),2)," ± ",round(std(C),2),"\n");

elapsed time: 176.20434418 seconds

PERFORMANCE RESULTS : 
----------------------------
LMCLUS               : 0.75 ± 0.16
LMCLUS Recursive     : 0.84 ± 0.16
LMCLUS Rec and Merge : 0.84 ± 0.16


In [12]:
# NMI for LMCLUS_recursive
predLabs = genPredLab(manifolds, size(data_original,2));
getNMI(freqtable(trueLab, predLabs))

In [35]:
# NMI for LMCLUS_recursive + merging
predLabs = genPredLab(mergMan, size(data_original,2));
getNMI(freqtable(trueLab, predLabs))

In [36]:
freqtable(trueLab, predLabs)

4×5 Named Array{Int64,2}
Dim1 ╲ Dim2 │  1.0   2.0   3.0   4.0   5.0
────────────┼─────────────────────────────
1.0         │  109    90   334   467     0
2.0         │    0   963     0     0    37
3.0         │  993     0     0     0     7
4.0         │    0     0     0     0  1000

In [109]:
Ms

4-element Array{LMCLUS.Manifold,1}:
 Manifold (dim = 1, size = 1660)
 Manifold (dim = 1, size = 1008)
 Manifold (dim = 1, size = 924) 
 Manifold (dim = 0, size = 408) 

In [59]:
manifolds

6-element Array{LMCLUS.Manifold,1}:
 Manifold (dim = 2, size = 1072)
 Manifold (dim = 2, size = 457) 
 Manifold (dim = 2, size = 131) 
 Manifold (dim = 2, size = 1008)
 Manifold (dim = 2, size = 924) 
 Manifold (dim = 2, size = 408) 

mergeManifolds (generic function with 1 method)

In [27]:
mergeManifolds(manifolds, data)

4-element Array{LMCLUS.Manifold,1}:
 Manifold (dim = 2, size = 1072)
 Manifold (dim = 2, size = 1008)
 Manifold (dim = 2, size = 924) 
 Manifold (dim = 2, size = 996) 

Clusters 2 and 6 merged!!
Clusters 2 and 5 merged!!
No change! exiting...


In [96]:
tic();
global manifolds = Manifold[];
params = LMCLUS.Parameters(2);
params.random_seed = 63261;
Ms = lmclus(data, params);
print("Original Cluster : " , Ms, "\n");
lmclus_rec(data, params, Vector(1:4000))
print("Rec Cluster : " , manifolds, "\n");
mergeMan = mergeManifolds(manifolds, data);
print("merge Cluster : " , mergeMan, "\n\n");
toc();
pLabs = genPredLab(Ms, size(data_original,2));
pLabs2 = genPredLab(manifolds, size(data_original,2));
predLabs = genPredLab(mergeMan, size(data_original,2));
print("NMI original = ", getNMI(freqtable(trueLab, pLabs)), "\n")
print("NMI rec = ", getNMI(freqtable(trueLab, pLabs2)), "\n")
print("NMI merge = ", getNMI(freqtable(trueLab, predLabs)), "\n")

Original Cluster : LMCLUS.Manifold[Manifold (dim = 1, size = 2991), Manifold (dim = 0, size = 1009)]
Rec Cluster : LMCLUS.Manifold[Manifold (dim = 2, size = 1075), Manifold (dim = 2, size = 488), Manifold (dim = 2, size = 925), Manifold (dim = 2, size = 428), Manifold (dim = 0, size = 75), Manifold (dim = 2, size = 1009)]
Clusters 1 and 5 merged!!
Clusters 1 and 3 merged!!
No change! exiting...
merge Cluster : LMCLUS.Manifold[Manifold (dim = 2, size = 925), Manifold (dim = 2, size = 1009), Manifold (dim = 2, size = 1150), Manifold (dim = 2, size = 916)]

elapsed time: 0.185012938 seconds
NMI original = 0.62
NMI rec = 0.87
NMI merge = 0.89


In [56]:
predLabs = genPredLab(mergeMan, size(data_original,2));
getNMI(freqtable(trueLab, predLabs))

In [105]:
rand(1:100000)

In [None]:
M = fit(PCA, data[:,[manifolds[2].points; manifolds[3].points]],pratio=0.95)


In [16]:
predLabs = genPredLab(manifolds, size(data_original,2))
cDict

Dict{String,String} with 8 entries:
  "8" => "rgb(87,153,199)"
  "4" => "violet"
  "1" => "red"
  "5" => "black"
  "2" => "blue"
  "6" => "orange"
  "7" => "magenta"
  "3" => "green"

In [2]:
n = 5
while true
    ssdt = 200;
    for i=1:n
        ssd = i*i;
    end
    
    if ssd > ssdt
        print("ssd is greator than ssdt and equal to ", ssd);
        break;
    end
    
    n = n * 2
end

ssd is greator than ssdt and equal to 400

In [11]:
#manCopy = deepcopy(manifolds);
for i=1:size(manifolds)[1]
    for j=i+1:size(manifolds)[1]
        M = fit(PCA, data[:,[manifolds[i].points; manifolds[j].points]], pratio = 0.95);
        if MultivariateStats.indim(M) > MultivariateStats.outdim(M)
            print("Cluster ", i, " and ", j, " have pratio : ", principalratio(M), "\n")
        end
    end
end

Cluster 1 and 3 have pratio : 0.9913631876386377
Cluster 1 and 5 have pratio : 0.9720253843073957
Cluster 1 and 6 have pratio : 0.9831135758516818
Cluster 2 and 3 have pratio : 0.9952120524804308
Cluster 2 and 6 have pratio : 0.9899501977448013
Cluster 3 and 4 have pratio : 0.9840333901356274
Cluster 3 and 6 have pratio : 0.9782454383704895
Cluster 4 and 5 have pratio : 0.9936874833377533
Cluster 4 and 6 have pratio : 0.9587619160662391


In [78]:
M = fit(PCA, data[:,[manifolds[2].points; manifolds[6].points]],pratio=0.95)

newMan = Manifold[]

s = 
try
    find_separation(data[manifolds[2].points; manifolds[6].points], M.mean, M.proj, params)
catch
    print("perfect merge...")
    if MultivariateStats.indim(M) > MultivariateStats.outdim(M)
        push!(newMan, Manifold(MultivariateStats.outdim(M), M.mean, M.proj, [manifolds[2].points; manifolds[6].points], LMCLUS.Separation()));
    end
end

1-element Array{LMCLUS.Manifold,1}:
 Manifold (dim = 2, size = 865)

perfect merge...

In [77]:
computeSSD([manifolds[2], manifolds[6]],data)

In [79]:
computeSSD(newMan, data)

In [74]:
push!(newMan, manifolds[1]);
push!(newMan, manifolds[4]);
push!(newMan, manifolds[3]);
push!(newMan, manifolds[5]);

In [113]:
Mtemp = Manifold[];
push!(Mtemp, manifolds[1]);
# push!(Mtemp, manifolds[2]);
# push!(Mtemp, manifolds[3]);
push!(Mtemp, manifolds[4]);
push!(Mtemp, manifolds[5]);
push!(Mtemp, manifolds[6]);
push!(Mtemp, newMan);

In [111]:
#find_separation(data[:, [manifolds[2].points; manifolds[6].points]], manifolds[2].μ, manifolds[2].proj, params)
M = fit(PCA, data[:, [manifolds[2].points; manifolds[3].points; manifolds[5].points]], pratio=0.95);

s = 
try
    find_separation(data[manifolds[2].points; manifolds[3].points; manifolds[5].points], M.mean, M.proj, params)
catch
    print("perfect merge...")
end

# s1 = 
# try
#     find_separation(data[:, [manifolds[1].points; manifolds[5].points]], manifolds[5].μ, manifolds[5].proj, params)
# catch
#     print("perfect merge...")
# end

perfect merge...

In [115]:
M

PCA(indim = 3, outdim = 3, principalratio = 1.00000)

In [46]:
cluster_points, removed_points = filter_separeted(Vector(1:4000), data, M.mean, M.proj, s);
#cluster_points1, removed_points1 = filter_separeted([manifolds[1].points; manifolds[5].points], data, manifolds[5].μ, manifolds[5].proj, s1);

In [112]:
newMan = Manifold(2, M.mean, M.proj, [manifolds[2].points; manifolds[3].points; manifolds[5].points], LMCLUS.Separation());

In [12]:
predLab = genPredLab(manifolds, size(data_original,2));
freqtable(trueLab, predLab)

4×6 Named Array{Int64,2}
Dim1 ╲ Dim2 │  1.0   2.0   3.0   4.0   5.0   6.0
────────────┼───────────────────────────────────
1.0         │   79   456   131     2     0   332
2.0         │  993     1     0     6     0     0
3.0         │    0     0     0     0   924    76
4.0         │    0     0     0  1000     0     0

In [11]:
plotManifolds(data_original, manifolds)



In [33]:
M_r = re_assign(manifolds, data, params)

4-element Array{LMCLUS.Manifold,1}:
 Manifold (dim = 2, size = 1000)
 Manifold (dim = 2, size = 1058)
 Manifold (dim = 2, size = 1072)
 Manifold (dim = 2, size = 870) 

In [34]:
plotManifolds(data_original, M_r)

In [35]:
M_r2 = re_assign2(manifolds, data, params)

4-element Array{LMCLUS.Manifold,1}:
 Manifold (dim = 2, size = 1000)
 Manifold (dim = 2, size = 1144)
 Manifold (dim = 2, size = 1198)
 Manifold (dim = 2, size = 1006)

### Intersecting assignemnts
#### The first cluster shares points with cluster 2,3 and 4.

In [36]:
print("Shared points with cluster 2 : ", length(intersect(M_r2[1].points, M_r2[2].points)), "\n")
print("Shared points with cluster 3 : ", length(intersect(M_r2[1].points, M_r2[3].points)), "\n")
print("Shared points with cluster 4 : ", length(intersect(M_r2[1].points, M_r2[4].points)), "\n")

Shared points with cluster 2 : 86
Shared points with cluster 3 : 126
Shared points with cluster 4 : 6


In [37]:
proj_clus1 = sum(dot.(data[1:3,M_r2[1].points], M_r2[1].proj[:,1]),1);
proj_clus2 = sum(dot.(data[1:3,M_r2[1].points], M_r2[1].proj[:,2]),1);
print("Mean index 1= ", mean(proj_clus1), "\n");
print("Std index 1= ", std(proj_clus1), "\n");
print("Mean index 2= ", mean(proj_clus2), "\n");
print("Std index 2= ", std(proj_clus2), "\n");

Mean index 1= 0.9188598719571001
Std index 1= 0.11005556081118063
Mean index 2= 0.5145720599953133
Std index 2= 0.09281059870463258


In [38]:
h1 = histogram(;x=proj_clus1[1,:], opacity=0.75);
layout = Layout(title="Projected distance histogram for first basis");
h2 = histogram(x=proj_clus2[1,:], opacity=0.75);
layout2 = Layout(;title="Projected distance histogram for second basis",bin_size=20);
#layout = Layout(barmode="overlay")
p = [plot(h1, layout) plot(h2, layout2)]
p.plot.layout["showlegend"] = false
p

#### The second cluster shares points with cluster 1 and 4.

In [39]:
print("Shared points with cluster 1 : ", length(intersect(M_r2[2].points, M_r2[1].points)), "\n")
print("Shared points with cluster 3 : ", length(intersect(M_r2[2].points, M_r2[3].points)), "\n")
print("Shared points with cluster 4 : ", length(intersect(M_r2[2].points, M_r2[4].points)), "\n")

Shared points with cluster 1 : 86
Shared points with cluster 3 : 0
Shared points with cluster 4 : 58


In [40]:
proj_clus1 = sum(dot.(data[1:3,M_r2[2].points], M_r2[2].proj[:,1]),1);
proj_clus2 = sum(dot.(data[1:3,M_r2[2].points], M_r2[2].proj[:,2]),1);
print("Mean index 1= ", mean(proj_clus1), "\n");
print("Std index 1= ", std(proj_clus1), "\n");
print("Mean index 2= ", mean(proj_clus2), "\n");
print("Std index 2= ", std(proj_clus2), "\n");

Mean index 1= 1.194018109676346
Std index 1= 0.17105481464602232
Mean index 2= 0.9162602756093647
Std index 2= 0.15136489737286477


In [41]:
h1 = histogram(x=proj_clus1[1,:], opacity=0.75);
layout = Layout(title="Projected distance histogram for first basis");
h2 = histogram(x=proj_clus2[1,:], opacity=0.75);
layout2 = Layout(title="Projected distance histogram for second basis");
#layout = Layout(barmode="overlay")
p = [plot(h1, layout) plot(h2, layout2)]
p.plot.layout["showlegend"] = false
p

#### The third cluster shares points with cluster 1 and 4

In [42]:
print("Shared points with cluster 1 : ", length(intersect(M_r2[3].points, M_r2[1].points)), "\n")
print("Shared points with cluster 2 : ", length(intersect(M_r2[3].points, M_r2[2].points)), "\n")
print("Shared points with cluster 4 : ", length(intersect(M_r2[3].points, M_r2[4].points)), "\n")

Shared points with cluster 1 : 126
Shared points with cluster 2 : 0
Shared points with cluster 4 : 72


In [43]:
proj_clus1 = sum(dot.(data[1:3,M_r2[3].points], M_r2[3].proj[:,1]),1);
proj_clus2 = sum(dot.(data[1:3,M_r2[3].points], M_r2[3].proj[:,2]),1);
print("Mean index 1= ", mean(proj_clus1), "\n");
print("Std index 1= ", std(proj_clus1), "\n");
print("Mean index 2= ", mean(proj_clus2), "\n");
print("Std index 2= ", std(proj_clus2), "\n");

Mean index 1= -0.36758504139891546
Std index 1= 0.3175204073037777
Mean index 2= 0.14748113129552587
Std index 2= 0.155453616151615


In [183]:
h1 = histogram(;x=proj_clus1[1,:], opacity=0.75);
layout = Layout(title="Projected distance histogram for first basis");
h2 = histogram(x=proj_clus2[1,:], opacity=0.75);
layout2 = Layout(;title="Projected distance histogram for second basis",bin_size=20);
#layout = Layout(barmode="overlay")
p = [plot(h1, layout) plot(h2, layout2)]
p.plot.layout["showlegend"] = false
p

#### The fourth cluster shares points with cluster 1,2 and 3.

In [44]:
print("Shared points with cluster 1 : ", length(intersect(M_r2[4].points, M_r2[1].points)), "\n")
print("Shared points with cluster 2 : ", length(intersect(M_r2[4].points, M_r2[2].points)), "\n")
print("Shared points with cluster 3 : ", length(intersect(M_r2[4].points, M_r2[3].points)), "\n")

Shared points with cluster 1 : 6
Shared points with cluster 2 : 58
Shared points with cluster 3 : 72


In [45]:
proj_clus1 = sum(dot.(data[1:3,M_r2[4].points], M_r2[4].proj[:,1]),1);
proj_clus2 = sum(dot.(data[1:3,M_r2[4].points], M_r2[4].proj[:,2]),1);
print("Mean index 1= ", mean(proj_clus1), "\n");
print("Std index 1= ", std(proj_clus1), "\n");
print("Mean index 2= ", mean(proj_clus2), "\n");
print("Std index 2= ", std(proj_clus2), "\n");

Mean index 1= -0.016354531746892557
Std index 1= 0.13375730177973016
Mean index 2= 0.7451107343467935
Std index 2= 0.11596705436985161


In [46]:
h1 = histogram(;x=proj_clus1[1,:], opacity=0.75);
layout = Layout(title="Projected distance histogram for first basis");
h2 = histogram(x=proj_clus2[1,:], opacity=0.75);
layout2 = Layout(;title="Projected distance histogram for second basis",bin_size=20);
#layout = Layout(barmode="overlay")
p = [plot(h1, layout) plot(h2, layout2)]
p.plot.layout["showlegend"] = false
p

In [51]:
data_original = readdlm("d:\\LMC_Datasets\\data_3_4_5_30_2211.csv", header=false, ',');
trueLab = data_original[end,:];
data = data_original[1:end-1,:];

using Clustering
R = kmeans(data, 4);
kMeansPred = Clustering.assignments(R);

trace1 = scatter3d(;x=data_original[1,:], y=data_original[2,:], z=data_original[3,:], mode="markers", marker_size=0.5, marker_color=map(x->cDict[string(Int(x))], kMeansPred))
layout = Layout(;title="K-Means Clusters");
plot(trace1,layout)