In [1]:
using DataFrames
using CSV
Cities = CSV.read("cities.csv")
Data = Array{Float64,2}([Cities[:X]'; Cities[:Y]']);

In [151]:
Data

2×197769 Array{Float64,2}:
  316.837  4377.41   3454.16  4688.1  …  2615.3   4775.89  2994.23  1354.76
 2202.34    336.602  2820.05  2935.9     2267.98  3103.85  1931.76  3218.1 

In [4]:
## For small example testing
Data = rand(2,2000)

2×2000 Array{Float64,2}:
 0.775948  0.116337  0.534243  0.079839  …  0.285189  0.796065  0.565594
 0.127127  0.40738   0.744475  0.599656     0.262211  0.561397  0.314744

In [5]:
include("src//ParaSelectACO.jl")
## Set the hyperparameters for ParaSelectACO
Range = [0.0 5.0; 0.0 5.0; 0.0 1.0; 0.0 30.0]
N_sample = 500;

include("src//kNNRWICDist.jl")

kNNRWICDist

In [6]:
k = 50
N_d = 10

10

In [7]:
C = collect(1:size(Data,2))
Sub_Clusters = Array{Int,1}[C]
Bottom_Clusters = Array{Int,1}[]
Path = Array{Int,1}[C] ## the path of clusters
In_Out = Array{Int,1}[[1,1]] ## the in/out-point corresponding to each cluster in Path

using Clustering
T = 0

###########################################################################################

## (Initializing step)
println("Initialization Step")
for init = 1
    Path_copy = copy(Path)
    Path = Array{Int,1}[]
    In_Out_copy = copy(In_Out)
    In_Out = Array{Int,1}[]

    In_Out_C = In_Out_copy[1]

    N_C = min(k, round(Int, length(C)/k, RoundUp))
    Sub_Clusters = setdiff(Sub_Clusters, [C])
    R = kmeans(Data[:,C], N_C) ##; maxiter = 2000, display=:iter) ## R = Result
    A = assignments(R) ## the assignments of the points

    C_subs = [C[A.==j] for j=1:N_C]
    C_in = C[In_Out_C[1]]
    C_out = C[In_Out_C[2]]
    O = findfirst([(C_in in sub) for sub in C_subs])
    G = findfirst([(C_out in sub) for sub in C_subs])

    Dist_subs = zeros(N_C, N_C)
    Label_subs = Array{Tuple{Int,Int}, 2}(undef, N_C, N_C)
    for j=1:N_C
        Label_subs[j,j] = (0,0)
    end

    append!(Sub_Clusters, C_subs)
    for j=1:N_C-1
        for k = j+1:N_C
            Sub_j = C_subs[j]
            Sub_k = C_subs[k]
            KNND = kNNRWICDist(Data[:,Sub_j], Data[:,Sub_k])
            Dist_subs[j,k] = KNND[3]
            Dist_subs[k,j] = KNND[3]
            Label_subs[j,k] = (KNND[1], KNND[2])
            Label_subs[k,j] = (KNND[2], KNND[1])
        end
    end

    PSACO = ParaSelectACO(Range, N_sample, Dist_subs, O, G; Na=200)
    append!(Path, C_subs[PSACO["Best_Path"]][1:end-1])
    Bottom_Clusters = Bottom_Clusters
    P = PSACO["Best_Path"]
    push!(In_Out, [Label_subs[P[end-1], P[end]][2], Label_subs[O, P[2]][1]])
    for i = 2:length(P)-1
        push!(In_Out, [Label_subs[P[i-1], P[i]][2], Label_subs[P[i], P[i+1]][1]])
    end
    println("Initialization completed!!")
end

Initialization Step
Initialization completed!!


In [6]:
In_Out
Sub_Clusters
Path
using Plots
plotly()

scatter()
for i = 1:length(Path)
    P_i = Path[i]
    scatter!(Data[1, P_i], Data[2, P_i], label = "Cluster $i")
    scatter!([Data[1, P_i[In_Out[i][1]]]], [Data[2, P_i[In_Out[i][1]]]], label = "Cluster $i in-point")
    scatter!([Data[1, P_i[In_Out[i][2]]]], [Data[2, P_i[In_Out[i][2]]]], label = "Cluster $i out-point")
end
scatter!()

In [14]:
include("src//CutHalf2.jl")
include("src//Nearest.jl")

Nearest

In [15]:
#######################################################################
## (Loop step)
println("Loop Step")
while length(Sub_Clusters)!=0
    Path_copy = copy(Path)
    Path = Array{Int,1}[]
    In_Out_copy = copy(In_Out)
    In_Out = Array{Int,1}[]
    for i=1:length(Path_copy)
        C = Path_copy[i]
        In_Out_C = In_Out_copy[i]
        if C in Bottom_Clusters
            push!(Path, C)
            push!(In_Out, In_Out_C)
        else        
            N_C = min(k, round(Int,length(C)/k, RoundUp))
            if N_C==1
                push!(Path, C)
                push!(In_Out, In_Out_C)
                Sub_Clusters = setdiff(Sub_Clusters, [C])
                push!(Bottom_Clusters, C)
            else
                T = T+1
                println(T)
                N = 1
                Sub_Clusters = setdiff(Sub_Clusters, [C])
                O = 0
                G = 0
                C_in = 0
                C_out = 0
                C_subs = Array{Int,1}[]
                while N<=N_d
                    R = kmeans(Data[:,C], N_C) ##; maxiter = 2000, display=:iter) ## R = Result
                    A = assignments(R) ## the assignments of the points
                    Nc = nclusters(R) ## number of clusters
                    Ct = counts(R); ## size of each cluster
                    
                    C_subs = [C[A.==j] for j=1:N_C]
                    C_in = C[In_Out_C[1]]
                    C_out = C[In_Out_C[2]]
                    O = findfirst([(C_in in sub) for sub in C_subs])
                    G = findfirst([(C_out in sub) for sub in C_subs])
                    if O!=G
                        break
                    elseif (O==G)&&(length(C)==1)
                        N = N_d+1
                    else
                        N = N+1
                        println("Repeat = $(N-1)")
                    end
                end
                
                ## if k-means cannot separate C_in and C_out, implement the naive brute force CutHalf2
                if (N==N_d+1)&&(C_in!=C_out) ## 
                    println("Execute CutHalf2")
                    N_C = 2
                    CH2 = CutHalf2(Data[:,C], In_Out_C[1], In_Out_C[2])
                    C_subs = [C[CH2[1]], C[CH2[2]]]
                    O = 1
                    G = 2
                elseif (N==N_d+1)&&(C_in==C_out)&&(length(C)!=1)
                    println("Execute CutHalf")
                    N_C = 2
                    CH = CutHalf2(Data[:,C], In_Out_C[1], Nearest(Data[:,C], In_Out_C[1]))
                    C_subs = [C[CH[1]], C[CH[2]]]
                    O = 1
                    G = 2
                    C_out = C[Nearest(Data[:,C], In_Out_C[1])]
                end
                
                if O==G
                    push!(Bottom_Clusters, C)
                    push!(Path, C)
                    push!(In_Out, In_Out_C)
                else
                    append!(Sub_Clusters, C_subs)
                    Dist_subs = zeros(N_C, N_C)
                    Label_subs = Array{Tuple{Int,Int}, 2}(undef, N_C, N_C)
                    for j=1:N_C                    
                        Label_subs[j,j] = (0,0)
                    end
                    
                    for j=1:N_C-1
                        for k = j+1:N_C
                            Sub_j = C_subs[j]
                            Sub_k = C_subs[k]
                            KNND = kNNRWICDist(Data[:,Sub_j], Data[:,Sub_k])
                            Dist_subs[j,k] = KNND[3]
                            Dist_subs[k,j] = KNND[3]
                            Label_subs[j,k] = (KNND[1], KNND[2])
                            Label_subs[k,j] = (KNND[2], KNND[1])
                        end
                    end
                    PSACO = ParaSelectACO(Range, N_sample, Dist_subs, O, G; Na=200)
                    append!(Path, C_subs[PSACO["Best_Path"]])
                    Bottom_Clusters = Bottom_Clusters
                    P = PSACO["Best_Path"]
                    push!(In_Out, [findfirst(C_subs[O].==C_in), Label_subs[O, P[2]][1]])
                    for i=2:length(P)-1
                        push!(In_Out, [Label_subs[P[i-1], P[i]][2], Label_subs[P[i], P[i+1]][1]])
                    end
                    push!(In_Out, [Label_subs[P[end-1], P[end]][2], findfirst(C_subs[G].==C_out)])
                end
            end
        end
    end
end

1
2
Repeat = 1
Repeat = 2
3
4
Repeat = 1
Repeat = 2
Repeat = 3
Repeat = 4
Repeat = 5
Repeat = 6
Repeat = 7
Repeat = 8
Repeat = 9
Repeat = 10
Execute CutHalf2
5
Repeat = 1
Repeat = 2
Repeat = 3
Repeat = 4
Repeat = 5
Repeat = 6
Repeat = 7
Repeat = 8
Repeat = 9
Repeat = 10
Execute CutHalf2
6
7
Repeat = 1
Repeat = 2
Repeat = 3
Repeat = 4
Repeat = 5
Repeat = 6
Repeat = 7
Repeat = 8
Repeat = 9
Repeat = 10
Execute CutHalf2
8
9
10
Repeat = 1
Repeat = 2
11
Repeat = 1
12
Repeat = 1
Repeat = 2
Repeat = 3
Repeat = 4
Repeat = 5
Repeat = 6
Repeat = 7
Repeat = 8
Repeat = 9
Repeat = 10
Execute CutHalf2
13
14
Repeat = 1
15
16
17


In [16]:
[Path In_Out]
sort([length(Path[i]) for i=1:length(Path)])

57-element Array{Int64,1}:
 16
 19
 20
 21
 22
 23
 23
 24
 25
 26
 26
 26
 27
  ⋮
 44
 44
 44
 44
 46
 46
 46
 48
 49
 49
 49
 50

In [18]:
using Plots
plotly()
scatter()
for i = 20:30
    P_i = Path[i]
    scatter!(Data[1, P_i], Data[2, P_i], label = "Cluster $i")
    scatter!([Data[1, P_i[In_Out[i][1]]]], [Data[2, P_i[In_Out[i][1]]]], label = "Cluster $i in-point")
    scatter!([Data[1, P_i[In_Out[i][2]]]], [Data[2, P_i[In_Out[i][2]]]], label = "Cluster $i out-point")
end
scatter!()

### Implement ACO w/o considering the 10-step constraint

In [19]:
println("Naive ACO (not considering 10-step constratint)")
Sol_naive = Int[]
for i = 1:length(Path)
    println(i)
    P_i = Path[i]
    Dist_i = zeros(length(P_i), length(P_i))
    for j = 1:length(P_i)-1
        for k = j+1:length(P_i)
            Dist_i[j,k] = norm(Data[:,P_i][:,j]-Data[:,P_i][:,k])
            Dist_i[k,j] = Dist_i[j,k]
        end
    end
    Dist_i
    O = In_Out[i][1]
    G = In_Out[i][2];
    PSACO = ParaSelectACO(Range, N_sample, Dist_i, O, G; Na=200)
    if O==G
        append!(Sol_naive, P_i[PSACO["Best_Path"]][1:end-1])
    else
        append!(Sol_naive, P_i[PSACO["Best_Path"]])
    end
end

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57


In [20]:
Sol_naive

2000-element Array{Int64,1}:
 1050
  776
  382
    1
 1840
 1481
  202
  878
  273
  277
  986
  716
 1227
    ⋮
 1619
  329
  487
  949
    5
   27
  286
 1595
 1918
  768
 1219
 1071

In [21]:
Polar_Index = findfirst((x->(x==1)), Sol_naive)
Z = [Sol_naive[Polar_Index:end]; Sol_naive[1:Polar_Index]].-1

2001-element Array{Int64,1}:
    0
 1839
 1480
  201
  877
  272
  276
  985
  715
 1226
 1296
  953
 1167
    ⋮
    4
   26
  285
 1594
 1917
  767
 1218
 1070
 1049
  775
  381
    0

In [89]:
df = DataFrame([Z], [:Path]); ## store as dataframe type and add the column name :Path
CSV.write("Z_$(k)_$(N_d).csv", df)

"Z.csv"

In [28]:
using Plots
plotly()
scatter(Data[1,:], Data[2,:], label = "Points", markersize = 1)
P = Z.+1
plot!(Data[1,P], Data[2,P], label = "Path")
scatter!([Data[1,P[1]]], [Data[2,P[1]]], label = "North Pole", markersize = 5)

In [2]:
include("Code-Execute.jl")

Initialization Step
Initialization completed!!
Loop Step
1
Repeat = 1
Repeat = 2
Repeat = 3
Repeat = 4
Repeat = 5
Repeat = 6
Repeat = 7
Repeat = 8
Repeat = 9
Repeat = 10
Execute CutHalf2
2
3
4
Repeat = 1
Repeat = 2
Repeat = 3
Repeat = 4
Repeat = 5
Repeat = 6
Repeat = 7
Repeat = 8
Repeat = 9
Repeat = 10
Execute CutHalf2
5
6
Repeat = 1
7
8
Repeat = 1
9
Repeat = 1
Repeat = 2
Repeat = 3
Path length = 29
Naive ACO (not considering 10-step constratint)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29


"Z.csv"

### Implement ACO considering the 10-step constraint

In [8]:
using Plots
plotly()
scatter()
for i=1:length(Path)
    scatter!(Data[1,Path[i]], Data[2, Path[i]], label = "Cluster $i", markersize = 2)
end
scatter!()